Skip to content

Commit bea07c1

Browse files
authored
Merge pull request #2 from oxylabs/feature/ai-map
Add ai-map app
2 parents d499d42 + af02d85 commit bea07c1

File tree

4 files changed

+177
-1
lines changed

4 files changed

+177
-1
lines changed

examples/map.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from oxylabs_ai_studio.apps.ai_map import AiMap
2+
3+
4+
map = AiMap(api_key="<API_KEY>")
5+
6+
payload = {
7+
"url": "https://career.oxylabs.io",
8+
"user_prompt": "job ad pages",
9+
"return_sources_limit": 10,
10+
"max_depth": 1,
11+
"geo_location": None,
12+
"render_javascript": False,
13+
}
14+
result = map.map(**payload)
15+
print(result.data)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "oxylabs-ai-studio"
3-
version = "0.2.9"
3+
version = "0.2.10"
44
description = "Oxylabs studio python sdk"
55
readme = "README.md"
66
keywords = ["oxylabs", "ai", "studio"]

readme.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,5 +130,29 @@ print(result.data)
130130
- `return_content` (bool): Whether to return markdown contents in results (default: True)
131131
- `geo_location` (str): search proxy location in ISO2 format.
132132

133+
### Map (`AiMap.map`)
134+
```python
135+
from oxylabs_ai_studio.apps.ai_map import AiMap
136+
137+
138+
map = AiMap(api_key="<API_KEY>")
139+
payload = {
140+
"url": "https://career.oxylabs.io",
141+
"user_prompt": "job ad pages",
142+
"return_sources_limit": 10,
143+
"max_depth": 1,
144+
"geo_location": None,
145+
"render_javascript": False,
146+
}
147+
result = map.map(**payload)
148+
print(result.data)
149+
```
150+
**Parameters:**
151+
- `url` (str): Starting URL to crawl (**required**)
152+
- `user_prompt` (str): Natural language prompt to guide extraction (**required**)
153+
- `render_javascript` (bool): Render JavaScript (default: False)
154+
- `return_sources_limit` (int): Max number of sources to return (default: 25)
155+
- `geo_location` (str): proxy location in ISO2 format.
156+
133157
---
134158
See the [examples](https://github.com/oxylabs/oxylabs-ai-studio-py/tree/main/examples) folder for usage examples of each method. Each method has corresponding async version.
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import asyncio
2+
import time
3+
from typing import Any
4+
5+
import httpx
6+
from pydantic import BaseModel
7+
8+
from oxylabs_ai_studio.client import OxyStudioAIClient
9+
from oxylabs_ai_studio.logger import get_logger
10+
11+
POLL_MAX_ATTEMPTS = 10
12+
POLL_INTERVAL_SECONDS = 3
13+
14+
logger = get_logger(__name__)
15+
16+
17+
class AiMapJob(BaseModel):
18+
run_id: str
19+
message: str | None = None
20+
data: dict[str, Any] | str | None
21+
22+
23+
class AiMap(OxyStudioAIClient):
24+
"""AI Map app."""
25+
26+
def __init__(self, api_key: str):
27+
super().__init__(api_key=api_key)
28+
29+
def map(
30+
self,
31+
url: str,
32+
user_prompt: str,
33+
return_sources_limit: int = 25,
34+
max_depth: int = 3,
35+
geo_location: str | None = None,
36+
render_javascript: bool = False,
37+
) -> AiMapJob:
38+
body = {
39+
"url": url,
40+
"user_prompt": user_prompt,
41+
"return_sources_limit": return_sources_limit,
42+
"max_depth": max_depth,
43+
"geo_location": geo_location,
44+
"render_html": render_javascript,
45+
}
46+
create_response = self.client.post(url="/map", json=body)
47+
if create_response.status_code != 200:
48+
raise Exception(
49+
f"Failed to create map job for {url}: {create_response.text}"
50+
)
51+
resp_body = create_response.json()
52+
run_id = resp_body["run_id"]
53+
try:
54+
for _ in range(POLL_MAX_ATTEMPTS):
55+
get_response = self.client.get("/map/run", params={"run_id": run_id})
56+
if get_response.status_code != 200:
57+
raise Exception(f"Failed to map {url}: {get_response.text}")
58+
resp_body = get_response.json()
59+
if resp_body["status"] == "completed":
60+
return AiMapJob(
61+
run_id=run_id,
62+
message=resp_body.get("message", None),
63+
data=self._get_data(run_id=run_id),
64+
)
65+
if resp_body["status"] == "failed":
66+
raise Exception(f"Failed to map {url}.")
67+
time.sleep(POLL_INTERVAL_SECONDS)
68+
except KeyboardInterrupt:
69+
logger.info("[Cancelled] Mapping was cancelled by user.")
70+
raise KeyboardInterrupt from None
71+
except Exception as e:
72+
raise e
73+
raise TimeoutError(f"Failed to map {url}: timeout.")
74+
75+
def _get_data(self, run_id: str) -> dict[str, Any]:
76+
get_response = self.client.get("/map/run/data", params={"run_id": run_id})
77+
if get_response.status_code != 200:
78+
raise Exception(f"Failed to get data for run {run_id}: {get_response.text}")
79+
return get_response.json().get("data", {}) or {}
80+
81+
async def map_async(
82+
self,
83+
url: str,
84+
user_prompt: str,
85+
return_sources_limit: int = 25,
86+
max_depth: int = 3,
87+
geo_location: str | None = None,
88+
render_javascript: bool = False,
89+
) -> AiMapJob:
90+
body = {
91+
"url": url,
92+
"user_prompt": user_prompt,
93+
"return_sources_limit": return_sources_limit,
94+
"max_depth": max_depth,
95+
"geo_location": geo_location,
96+
"render_html": render_javascript,
97+
}
98+
async with self.async_client() as client:
99+
create_response = await client.post(url="/map", json=body)
100+
if create_response.status_code != 200:
101+
raise Exception(
102+
f"Failed to create map job for {url}: {create_response.text}"
103+
)
104+
resp_body = create_response.json()
105+
run_id = resp_body["run_id"]
106+
try:
107+
for _ in range(POLL_MAX_ATTEMPTS):
108+
get_response = await client.get(
109+
"/map/run", params={"run_id": run_id}
110+
)
111+
if get_response.status_code != 200:
112+
raise Exception(f"Failed to map {url}: {get_response.text}")
113+
resp_body = get_response.json()
114+
if resp_body["status"] == "completed":
115+
data = await self.get_data_async(client, run_id=run_id)
116+
return AiMapJob(
117+
run_id=run_id,
118+
message=resp_body.get("message", None),
119+
data=data,
120+
)
121+
if resp_body["status"] == "failed":
122+
raise Exception(f"Failed to map {url}.")
123+
await asyncio.sleep(POLL_INTERVAL_SECONDS)
124+
except KeyboardInterrupt:
125+
logger.info("[Cancelled] Mapping was cancelled by user.")
126+
raise KeyboardInterrupt from None
127+
except Exception as e:
128+
raise e
129+
raise TimeoutError(f"Failed to map {url}: timeout.")
130+
131+
async def get_data_async(
132+
self, client: httpx.AsyncClient, run_id: str
133+
) -> dict[str, Any]:
134+
get_response = await client.get("/map/run/data", params={"run_id": run_id})
135+
if get_response.status_code != 200:
136+
raise Exception(f"Failed to get data for run {run_id}: {get_response.text}")
137+
return get_response.json().get("data", {}) or {}

0 commit comments

Comments
 (0)