99from oxylabs_ai_studio .models import SchemaResponse
1010
1111CRAWLER_TIMEOUT_SECONDS = 60 * 10
12- POLL_INTERVAL_SECONDS = 3
12+ POLL_INTERVAL_SECONDS = 5
1313POLL_MAX_ATTEMPTS = CRAWLER_TIMEOUT_SECONDS // POLL_INTERVAL_SECONDS
1414
1515logger = get_logger (__name__ )
@@ -51,7 +51,9 @@ def crawl(
5151 "geo_location" : geo_location ,
5252 }
5353 client = self .get_client ()
54- create_response = client .post (url = "/extract/run" , json = body )
54+ create_response = self .call_api (
55+ client = client , url = "/extract/run" , method = "POST" , body = body
56+ )
5557 if create_response .status_code != 200 :
5658 raise Exception (
5759 f"Failed to create crawl job for { url } : { create_response .text } "
@@ -61,14 +63,22 @@ def crawl(
6163 logger .info (f"Starting crawl for url: { url } . Job id: { run_id } ." )
6264 try :
6365 for _ in range (POLL_MAX_ATTEMPTS ):
64- get_response = client .get (
65- "/extract/run/data" , params = {"run_id" : run_id }
66- )
66+ try :
67+ get_response = self .call_api (
68+ client = client ,
69+ url = "/extract/run/data" ,
70+ method = "GET" ,
71+ params = {"run_id" : run_id },
72+ )
73+ except Exception :
74+ time .sleep (POLL_INTERVAL_SECONDS )
75+ continue
6776 if get_response .status_code == 202 :
6877 time .sleep (POLL_INTERVAL_SECONDS )
6978 continue
7079 if get_response .status_code != 200 :
71- raise Exception (f"Failed to crawl { url } : { get_response .text } " )
80+ time .sleep (POLL_INTERVAL_SECONDS )
81+ continue
7282 resp_body = get_response .json ()
7383 if resp_body ["status" ] == "processing" :
7484 time .sleep (POLL_INTERVAL_SECONDS )
@@ -80,7 +90,11 @@ def crawl(
8090 data = resp_body ["data" ],
8191 )
8292 if resp_body ["status" ] == "failed" :
83- raise Exception (f"Failed to crawl { url } ." )
93+ return AiCrawlerJob (
94+ run_id = run_id ,
95+ message = resp_body .get ("error_code" , None ),
96+ data = None ,
97+ )
8498 time .sleep (POLL_INTERVAL_SECONDS )
8599 except KeyboardInterrupt :
86100 logger .info ("[Cancelled] Crawling was cancelled by user." )
@@ -90,7 +104,12 @@ def crawl(
90104 def generate_schema (self , prompt : str ) -> dict [str , Any ] | None :
91105 logger .info ("Generating schema" )
92106 body = {"user_prompt" : prompt }
93- response = self .get_client ().post (url = "/extract/generate-params" , json = body )
107+ response = self .call_api (
108+ client = self .get_client (),
109+ url = "/extract/generate-params" ,
110+ method = "POST" ,
111+ body = body ,
112+ )
94113 if response .status_code != 200 :
95114 raise Exception (f"Failed to generate schema: { response .text } " )
96115 json_response : SchemaResponse = response .json ()
@@ -121,7 +140,9 @@ async def crawl_async(
121140 "geo_location" : geo_location ,
122141 }
123142 async with self .async_client () as client :
124- create_response = await client .post (url = "/extract/run" , json = body )
143+ create_response = await self .call_api_async (
144+ client = client , url = "/extract/run" , method = "POST" , body = body
145+ )
125146 if create_response .status_code != 200 :
126147 raise Exception (
127148 f"Failed to create crawl job for { url } : { create_response .text } "
@@ -131,14 +152,22 @@ async def crawl_async(
131152 logger .info (f"Starting async crawl for url: { url } . Job id: { run_id } ." )
132153 try :
133154 for _ in range (POLL_MAX_ATTEMPTS ):
134- get_response = await client .get (
135- "/extract/run/data" , params = {"run_id" : run_id }
136- )
155+ try :
156+ get_response = await self .call_api_async (
157+ client = client ,
158+ url = "/extract/run/data" ,
159+ method = "GET" ,
160+ params = {"run_id" : run_id },
161+ )
162+ except Exception :
163+ await asyncio .sleep (POLL_INTERVAL_SECONDS )
164+ continue
137165 if get_response .status_code == 202 :
138166 await asyncio .sleep (POLL_INTERVAL_SECONDS )
139167 continue
140168 if get_response .status_code != 200 :
141- raise Exception (f"Failed to crawl { url } : { get_response .text } " )
169+ await asyncio .sleep (POLL_INTERVAL_SECONDS )
170+ continue
142171 resp_body = get_response .json ()
143172 if resp_body ["status" ] == "processing" :
144173 await asyncio .sleep (POLL_INTERVAL_SECONDS )
@@ -150,7 +179,11 @@ async def crawl_async(
150179 data = resp_body ["data" ],
151180 )
152181 if resp_body ["status" ] == "failed" :
153- raise Exception (f"Failed to crawl { url } ." )
182+ return AiCrawlerJob (
183+ run_id = run_id ,
184+ message = resp_body .get ("error_code" , None ),
185+ data = None ,
186+ )
154187 await asyncio .sleep (POLL_INTERVAL_SECONDS )
155188 except KeyboardInterrupt :
156189 logger .info ("[Cancelled] Crawling was cancelled by user." )
@@ -162,7 +195,9 @@ async def generate_schema_async(self, prompt: str) -> dict[str, Any] | None:
162195 logger .info ("Generating schema (async)" )
163196 body = {"user_prompt" : prompt }
164197 async with self .async_client () as client :
165- response = await client .post (url = "/extract/generate-params" , json = body )
198+ response = await self .call_api_async (
199+ client = client , url = "/extract/generate-params" , method = "POST" , body = body
200+ )
166201 if response .status_code != 200 :
167202 raise Exception (f"Failed to generate schema: { response .text } " )
168203 json_response : SchemaResponse = response .json ()
0 commit comments