Skip to content

Commit e8fe07c

Browse files
committed
feat(wait): add idle and almost idle network
1 parent 664e8b0 commit e8fe07c

File tree

11 files changed

+95
-10
lines changed

11 files changed

+95
-10
lines changed

cli/Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider-cloud-cli"
3-
version = "0.1.80"
3+
version = "0.1.82"
44
edition = "2021"
55
authors = [ "j-mendez <jeff@spider.cloud>"]
66
description = "The Spider Cloud CLI for web crawling and scraping"

javascript/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@spider-cloud/spider-client",
3-
"version": "0.1.80",
3+
"version": "0.1.82",
44
"description": "Isomorphic Javascript SDK for Spider Cloud services",
55
"scripts": {
66
"test": "node --import tsx --test __tests__/*test.ts",

javascript/src/config.ts

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,13 +112,17 @@ interface Delay {
112112
/**
113113
* Represents the wait_for configuration.
114114
* @typedef {Object} WaitFor
115-
* @property {IdleNetwork} [idle_network] - Configuration to wait for network to be idle.
115+
* @property {IdleNetwork} [idle_network] - Configuration to wait for network to be idle between period.
116+
* @property {IdleNetwork0} [idle_network] - Configuration to wait for network to be idle with max timeout.
117+
* @property {AlmostIdleNetwork0} [idle_network] - Configuration to wait for network to almost idle with max timeout.
116118
* @property {Selector} [selector] - Configuration to wait for a CSS selector.
117119
* @property {Delay} [delay] - Configuration to wait for a delay.
118120
* @property {boolean} [page_navigations] - Whether to wait for page navigations.
119121
*/
120122
export interface WaitForConfiguration {
121123
idle_network?: IdleNetwork;
124+
idle_network0?: IdleNetwork;
125+
almost_idle_network0?: IdleNetwork;
122126
selector?: Selector;
123127
dom?: Selector;
124128
delay?: Delay;
@@ -310,6 +314,32 @@ export type Proxy =
310314
| "mobile"
311315
| "isp";
312316

317+
export type LinkRewriteReplace = {
318+
type: "replace";
319+
/**
320+
* Only apply when the link's host matches this value.
321+
* Optional key; null means "no host filter".
322+
*/
323+
host?: string | null;
324+
find: string;
325+
replace_with: string;
326+
};
327+
328+
// Link rewrite regex.
329+
export type LinkRewriteRegex = {
330+
type: "regex";
331+
/**
332+
* Only apply when the link's host matches this value.
333+
* Optional key; null means "no host filter".
334+
*/
335+
host?: string | null;
336+
pattern: string;
337+
replace_with: string;
338+
};
339+
340+
// The link rewrite rule.
341+
export type LinkRewriteRule = LinkRewriteReplace | LinkRewriteRegex;
342+
313343
/**
314344
* Represents the options available for making a spider request.
315345
*/
@@ -424,6 +454,8 @@ export interface SpiderParams {
424454
*/
425455
webhooks?: WebhookSettings;
426456

457+
link_rewrite?: LinkRewriteRule
458+
427459
/**
428460
* Specifies whether to use fingerprinting protection.
429461
*/

python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def read_file(fname):
88

99
setup(
1010
name="spider_client",
11-
version="0.1.80",
11+
version="0.1.82",
1212
url="https://github.com/spider-rs/spider-clients/tree/main/python",
1313
license="MIT",
1414
author="Spider",

python/spider/async_spider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def _prepare_headers(
267267
return {
268268
"Content-Type": content_type,
269269
"Authorization": f"Bearer {self.api_key}",
270-
"User-Agent": "AsyncSpider-Client/0.1.80",
270+
"User-Agent": "AsyncSpider-Client/0.1.82",
271271
}
272272

273273
async def _handle_error(self, response: ClientResponse, action: str) -> None:

python/spider/spider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def _prepare_headers(self, content_type: str = "application/json"):
280280
return {
281281
"Content-Type": content_type,
282282
"Authorization": f"Bearer {self.api_key}",
283-
"User-Agent": f"Spider-Client/0.1.80",
283+
"User-Agent": f"Spider-Client/0.1.82",
284284
}
285285

286286
def _post_request(self, url: str, data, headers, stream=False):

python/spider/spider_types.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ class DelayDict(TypedDict):
170170

171171
class WaitForDict(TypedDict, total=False):
172172
idle_network: Optional[IdleNetworkDict]
173+
idle_network0: Optional[IdleNetworkDict]
174+
almost_idle_network0: Optional[IdleNetworkDict]
173175
selector: Optional[SelectorDict]
174176
dom: Optional[SelectorDict]
175177
delay: Optional[DelayDict]
@@ -218,6 +220,23 @@ class Proxy(str, Enum):
218220
residential_core = "residential_core" # Balanced core plan
219221
residential_plus = "residential_plus" # Extended core pool
220222

223+
class LinkRewriteReplace(TypedDict):
224+
type: Literal["replace"]
225+
host: Optional[str]
226+
find: str
227+
replace_with: str
228+
229+
230+
class LinkRewriteRegex(TypedDict):
231+
type: Literal["regex"]
232+
host: Optional[str]
233+
pattern: str
234+
replace_with: str
235+
236+
237+
LinkRewriteRule = Union[LinkRewriteReplace, LinkRewriteRegex]
238+
239+
221240
class RequestParamsDict(TypedDict, total=False):
222241
# The URL to be crawled.
223242
url: Optional[str]
@@ -281,6 +300,9 @@ class RequestParamsDict(TypedDict, total=False):
281300
# The user agent string to be used for the request.
282301
user_agent: Optional[str]
283302

303+
# URL rewrite rule applied to every discovered link before it's crawled.
304+
link_rewrite: Optional[LinkRewriteRule]
305+
284306
# The two letter country code for the request geo-location.
285307
country_code: Optional[str]
286308

rust/Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "spider-client"
3-
version = "0.1.80"
3+
version = "0.1.82"
44
edition = "2021"
55
authors = [ "j-mendez <jeff@spider.cloud>"]
66
description = "Spider Cloud client"

0 commit comments

Comments
 (0)