Skip to content

Commit 7ffacae

Browse files
authored
Merge pull request #4 from oxylabs/feature/new-output-types
Feature/new output types
2 parents 8e51397 + bd5dc1e commit 7ffacae

File tree

5 files changed

+44
-22
lines changed

5 files changed

+44
-22
lines changed

agentic_code_guide.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ Output (result):
7878

7979
```python
8080
class DataModel(BaseModel):
81-
type: Literal["json", "markdown", "html", "screenshot"]
81+
type: Literal["json", "markdown", "html", "screenshot", "csv"]
8282
content: dict[str, Any] | str | None
8383

8484
class BrowserAgentJob(BaseModel):
@@ -138,7 +138,7 @@ if __name__ == "__main__":
138138
Parameters:
139139

140140
- url (str): Target URL to scrape (required)
141-
- output_format (Literal["json", "markdown"]): Output format (default: "markdown")
141+
- output_format (Literal["json", "markdown", "csv", "screenshot"]): Output format (default: "markdown")
142142
- schema (dict | None): OpenAPI schema for structured extraction (required if output_format is "json")
143143
- render_javascript (bool): Render JavaScript (default: False)
144144
- geo_location (str): proxy location in ISO2 format.
@@ -156,6 +156,8 @@ Output (result):
156156

157157
If output_format is "json", data will be a dictionary.
158158
If output_format is "markdown", data will be a string.
159+
If output_format is "csv", data will be a string formatted in a form of csv.
160+
If output_format is "screenshot", data will be a string.
159161

160162

161163
## Use Cases Examples

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "oxylabs-ai-studio"
3-
version = "0.2.14"
3+
version = "0.2.15"
44
description = "Oxylabs studio python sdk"
55
readme = "README.md"
66
keywords = ["oxylabs", "ai", "studio"]

src/oxylabs_ai_studio/apps/ai_crawler.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,16 @@ def crawl(
3131
self,
3232
url: str,
3333
user_prompt: str,
34-
output_format: Literal["json", "markdown"] = "markdown",
34+
output_format: Literal["json", "markdown", "csv"] = "markdown",
3535
schema: dict[str, Any] | None = None,
3636
render_javascript: bool = False,
3737
return_sources_limit: int = 25,
3838
geo_location: str | None = None,
3939
) -> AiCrawlerJob:
40-
if output_format == "json" and schema is None:
41-
raise ValueError("openapi_schema is required when output_format is json")
40+
if output_format in ["json", "csv"] and schema is None:
41+
raise ValueError(
42+
"openapi_schema is required when output_format is json or csv.",
43+
)
4244

4345
body = {
4446
"domain": url,
@@ -119,15 +121,17 @@ async def crawl_async(
119121
self,
120122
url: str,
121123
user_prompt: str = "",
122-
output_format: Literal["json", "markdown"] = "markdown",
124+
output_format: Literal["json", "markdown", "csv"] = "markdown",
123125
schema: dict[str, Any] | None = None,
124126
render_javascript: bool = False,
125127
return_sources_limit: int = 25,
126128
geo_location: str | None = None,
127129
) -> AiCrawlerJob:
128130
"""Async version of crawl."""
129-
if output_format == "json" and schema is None:
130-
raise ValueError("openapi_schema is required when output_format is json")
131+
if output_format in ["json", "csv"] and schema is None:
132+
raise ValueError(
133+
"openapi_schema is required when output_format is json or csv.",
134+
)
131135

132136
body = {
133137
"domain": url,

src/oxylabs_ai_studio/apps/ai_scraper.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,15 @@ def __init__(self, api_key: str | None = None):
3131
def scrape(
3232
self,
3333
url: str,
34-
output_format: Literal["json", "markdown"] = "markdown",
34+
output_format: Literal["json", "markdown", "csv", "screenshot"] = "markdown",
3535
schema: dict[str, Any] | None = None,
3636
render_javascript: bool = False,
3737
geo_location: str | None = None,
3838
) -> AiScraperJob:
39-
if output_format == "json" and schema is None:
40-
raise ValueError("openapi_schema is required when output_format is json")
39+
if output_format in ["json", "csv"] and schema is None:
40+
raise ValueError(
41+
"openapi_schema is required when output_format is json or csv.",
42+
)
4143

4244
body = {
4345
"url": url,
@@ -117,14 +119,16 @@ def generate_schema(self, prompt: str) -> dict[str, Any] | None:
117119
async def scrape_async(
118120
self,
119121
url: str,
120-
output_format: Literal["json", "markdown"] = "markdown",
122+
output_format: Literal["json", "markdown", "csv", "screenshot"] = "markdown",
121123
schema: dict[str, Any] | None = None,
122124
render_javascript: bool = False,
123125
geo_location: str | None = None,
124126
) -> AiScraperJob:
125127
"""Async version of scrape."""
126-
if output_format == "json" and schema is None:
127-
raise ValueError("openapi_schema is required when output_format is json")
128+
if output_format in ["json", "csv"] and schema is None:
129+
raise ValueError(
130+
"openapi_schema is required when output_format is json or csv.",
131+
)
128132

129133
body = {
130134
"url": url,

src/oxylabs_ai_studio/apps/browser_agent.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717

1818
class DataModel(BaseModel):
19-
type: Literal["json", "markdown", "html", "screenshot"]
19+
type: Literal["json", "markdown", "html", "screenshot", "csv"]
2020
content: dict[str, Any] | str | None
2121

2222

@@ -34,12 +34,16 @@ def run(
3434
self,
3535
url: str,
3636
user_prompt: str = "",
37-
output_format: Literal["json", "markdown", "html", "screenshot"] = "markdown",
37+
output_format: Literal[
38+
"json", "markdown", "html", "screenshot", "csv"
39+
] = "markdown",
3840
schema: dict[str, Any] | None = None,
3941
geo_location: str | None = None,
4042
) -> BrowserAgentJob:
41-
if output_format == "json" and schema is None:
42-
raise ValueError("openapi_schema is required when output_format is json")
43+
if output_format in ["json", "csv"] and schema is None:
44+
raise ValueError(
45+
"openapi_schema is required when output_format is json or csv.",
46+
)
4347

4448
body = {
4549
"url": url,
@@ -115,13 +119,21 @@ async def run_async(
115119
self,
116120
url: str,
117121
user_prompt: str = "",
118-
output_format: Literal["json", "markdown", "html", "screenshot"] = "markdown",
122+
output_format: Literal[
123+
"json",
124+
"markdown",
125+
"html",
126+
"screenshot",
127+
"csv",
128+
] = "markdown",
119129
schema: dict[str, Any] | None = None,
120130
geo_location: str | None = None,
121131
) -> BrowserAgentJob:
122132
"""Async version of run."""
123-
if output_format == "json" and schema is None:
124-
raise ValueError("openapi_schema is required when output_format is json")
133+
if output_format in ["json", "csv"] and schema is None:
134+
raise ValueError(
135+
"openapi_schema is required when output_format is json or csv.",
136+
)
125137

126138
body = {
127139
"url": url,

0 commit comments

Comments
 (0)