Skip to content

Commit 775d3ca

Browse files
committed
Make list methods of clients iterable
1 parent ff9817c commit 775d3ca

36 files changed

Lines changed: 1583 additions & 653 deletions

docs/02_concepts/08_pagination.mdx

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ import ApiLink from '@site/src/components/ApiLink';
1212

1313
import PaginationAsyncExample from '!!raw-loader!./code/08_pagination_async.py';
1414
import PaginationSyncExample from '!!raw-loader!./code/08_pagination_sync.py';
15-
1615
import IterateItemsAsyncExample from '!!raw-loader!./code/08_iterate_items_async.py';
1716
import IterateItemsSyncExample from '!!raw-loader!./code/08_iterate_items_sync.py';
1817

18+
1919
Most methods named `list` or `list_something` in the Apify client return a <ApiLink to="class/ListPage">`ListPage`</ApiLink> object. This object provides a consistent interface for working with paginated data and includes the following properties:
2020

2121
- `items` - The main results you're looking for.
@@ -45,7 +45,7 @@ The <ApiLink to="class/ListPage">`ListPage`</ApiLink> interface offers several k
4545

4646
## Generator-based iteration
4747

48-
For most use cases, `iterate_items()` is the recommended way to process all items in a dataset. It handles pagination automatically using a Python generator, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself.
48+
You can also use the `list` methods directly in iteration. It handles pagination automatically, fetching items in batches behind the scenes so you don't need to manage offsets or limits yourself.
4949

5050
<Tabs>
5151
<TabItem value="AsyncExample" label="Async client" default>
@@ -60,6 +60,4 @@ For most use cases, `iterate_items()` is the recommended way to process all item
6060
</TabItem>
6161
</Tabs>
6262

63-
`iterate_items()` accepts the same filtering parameters as `list_items()` (`clean`, `fields`, `omit`, `unwind`, `skip_empty`, `skip_hidden`), so you can combine automatic pagination with data filtering.
64-
65-
Similarly, `KeyValueStoreClient` provides an `iterate_keys()` method for iterating over all keys in a key-value store without manual pagination.
63+
Similarly, you can iterate over the return value of `KeyValueStoreClient.list_keys()` to go through all keys in a key-value store without manual pagination. The older `iterate_keys()` method is deprecated.

docs/02_concepts/code/08_iterate_items_async.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ async def main() -> None:
77
apify_client = ApifyClientAsync(TOKEN)
88
dataset_client = apify_client.dataset('dataset-id')
99

10-
# Iterate through all items automatically.
11-
async for item in dataset_client.iterate_items():
12-
print(item)
10+
# Define the pagination parameters
11+
limit = 1500 # Number of items in total
12+
offset = 100 # Starting offset
13+
14+
# Iterate through items automatically, lazily sending as many API calls
15+
# as needed and receiving items in chunks.
16+
async for item in dataset_client.list_items(limit=limit, offset=offset):
17+
print(item) # Process the item as needed

docs/02_concepts/code/08_iterate_items_sync.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,14 @@ def main() -> None:
77
apify_client = ApifyClient(TOKEN)
88
dataset_client = apify_client.dataset('dataset-id')
99

10-
# Iterate through all items automatically.
11-
for item in dataset_client.iterate_items():
12-
print(item)
10+
# Define the pagination parameters
11+
limit = 1500 # Number of items in total
12+
offset = 100 # Starting offset
13+
14+
# Iterate through items automatically, lazily sending as many API calls
15+
# as needed and receiving items in chunks.
16+
for item in dataset_client.list_items(limit=limit, offset=offset):
17+
print(item) # Process the item as needed
1318

1419

1520
if __name__ == '__main__':

docs/02_concepts/code/08_pagination_async.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,15 @@ async def main() -> None:
1010
dataset_client = apify_client.dataset('dataset-id')
1111

1212
# Define the pagination parameters
13-
limit = 1000 # Number of items per page
13+
limit = 1000 # Number items to request from API
1414
offset = 0 # Starting offset
15-
all_items = [] # List to store all fetched items
1615

17-
while True:
18-
# Fetch a page of items
19-
response = await dataset_client.list_items(limit=limit, offset=offset)
20-
items = response.items
21-
total = response.total
16+
# Send single API call to fetch paginated items.
17+
# (number of items per single call can be limited by API)
18+
paginated_items = await dataset_client.list_items(limit=limit, offset=offset)
2219

23-
print(f'Fetched {len(items)} items')
20+
# Inspect pagination metadata returned by API
21+
print(paginated_items.total)
2422

25-
# Add the fetched items to the complete list
26-
all_items.extend(items)
27-
28-
# Exit the loop if there are no more items to fetch
29-
if offset + limit >= total:
30-
break
31-
32-
# Increment the offset for the next page
33-
offset += limit
34-
35-
print(f'Overall fetched {len(all_items)} items')
23+
for item in paginated_items.items:
24+
print(item) # Process the item as needed

docs/02_concepts/code/08_pagination_sync.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,15 @@ def main() -> None:
1010
dataset_client = apify_client.dataset('dataset-id')
1111

1212
# Define the pagination parameters
13-
limit = 1000 # Number of items per page
13+
limit = 1000 # Number items to request from API
1414
offset = 0 # Starting offset
15-
all_items = [] # List to store all fetched items
1615

17-
while True:
18-
# Fetch a page of items
19-
response = dataset_client.list_items(limit=limit, offset=offset)
20-
items = response.items
21-
total = response.total
16+
# Send single API call to fetch paginated items.
17+
# (number of items per single call can be limited by API)
18+
paginated_items = dataset_client.list_items(limit=limit, offset=offset)
2219

23-
print(f'Fetched {len(items)} items')
20+
# Inspect pagination metadata returned by API
21+
print(paginated_items.total)
2422

25-
# Add the fetched items to the complete list
26-
all_items.extend(items)
27-
28-
# Exit the loop if there are no more items to fetch
29-
if offset + limit >= total:
30-
break
31-
32-
# Increment the offset for the next page
33-
offset += limit
34-
35-
print(f'Overall fetched {len(all_items)} items')
23+
for item in paginated_items.items:
24+
print(item) # Process the item as needed

scripts/_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
(re.compile(r'\bSynchronous\b'), 'Asynchronous'),
2828
(re.compile(r'Retry a function'), 'Retry an async function'),
2929
(re.compile(r'Function to retry'), 'Async function to retry'),
30+
(re.compile(r'returned page also supports iteration: `for'), 'returned page also supports iteration: `async for'),
3031
]
3132
"""Patterns for converting sync docstrings to async docstrings."""
3233

src/apify_client/_resource_clients/actor_collection.py

Lines changed: 67 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,25 @@
1010
CreateActorRequest,
1111
DefaultRunOptions,
1212
ExampleRunInput,
13-
ListOfActors,
1413
ListOfActorsResponse,
1514
)
15+
from apify_client._pagination import (
16+
_LazyTask,
17+
build_get_iterator,
18+
build_get_iterator_async,
19+
)
20+
from apify_client._pagination_classes import (
21+
IterablePageOfActors,
22+
IterablePageOfActorsAsync,
23+
PageOfItems,
24+
)
1625
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
1726
from apify_client._utils import to_seconds
1827

1928
if TYPE_CHECKING:
2029
from datetime import timedelta
2130

31+
from apify_client._models_generated import ActorShort
2232
from apify_client._types import Timeout
2333

2434
_SORT_BY_TO_API: dict[str, str] = {
@@ -55,9 +65,12 @@ def list(
5565
desc: bool | None = None,
5666
sort_by: Literal['created_at', 'last_run_started_at'] | None = 'created_at',
5767
timeout: Timeout = 'medium',
58-
) -> ListOfActors:
68+
) -> IterablePageOfActors:
5969
"""List the Actors the user has created or used.
6070
71+
The returned page also supports iteration: `for item in client.list(...)` yields individual Actors
72+
and transparently fetches further pages from the API.
73+
6174
https://docs.apify.com/api/v2#/reference/actors/actor-collection/get-list-of-actors
6275
6376
Args:
@@ -72,8 +85,31 @@ def list(
7285
The list of available Actors matching the specified filters.
7386
"""
7487
api_sort_by = _SORT_BY_TO_API[sort_by] if sort_by is not None else None
75-
result = self._list(timeout=timeout, my=my, limit=limit, offset=offset, desc=desc, sortBy=api_sort_by)
76-
return ListOfActorsResponse.model_validate(result).data
88+
89+
def _callback(**kwargs: Any) -> PageOfItems[ActorShort]:
90+
result = self._list(timeout=timeout, my=my, sortBy=api_sort_by, **kwargs)
91+
data = ListOfActorsResponse.model_validate(result).data
92+
return PageOfItems(
93+
items=data.items,
94+
count=data.count,
95+
limit=data.limit,
96+
total=data.total,
97+
offset=data.offset,
98+
desc=data.desc,
99+
)
100+
101+
first_page = _callback(limit=limit, offset=offset, desc=desc)
102+
get_iterator = build_get_iterator(_callback, first_page, limit=limit, offset=offset, desc=desc)
103+
104+
return IterablePageOfActors(
105+
_get_iterator=get_iterator,
106+
items=first_page.items,
107+
count=first_page.count,
108+
limit=first_page.limit,
109+
total=first_page.total,
110+
offset=first_page.offset,
111+
desc=first_page.desc,
112+
)
77113

78114
def create(
79115
self,
@@ -192,7 +228,7 @@ def __init__(
192228
**kwargs,
193229
)
194230

195-
async def list(
231+
def list(
196232
self,
197233
*,
198234
my: bool | None = None,
@@ -201,9 +237,12 @@ async def list(
201237
desc: bool | None = None,
202238
sort_by: Literal['created_at', 'last_run_started_at'] | None = 'created_at',
203239
timeout: Timeout = 'medium',
204-
) -> ListOfActors:
240+
) -> IterablePageOfActorsAsync:
205241
"""List the Actors the user has created or used.
206242
243+
The returned page also supports iteration: `async for item in client.list(...)` yields individual Actors
244+
and transparently fetches further pages from the API.
245+
207246
https://docs.apify.com/api/v2#/reference/actors/actor-collection/get-list-of-actors
208247
209248
Args:
@@ -218,8 +257,28 @@ async def list(
218257
The list of available Actors matching the specified filters.
219258
"""
220259
api_sort_by = _SORT_BY_TO_API[sort_by] if sort_by is not None else None
221-
result = await self._list(timeout=timeout, my=my, limit=limit, offset=offset, desc=desc, sortBy=api_sort_by)
222-
return ListOfActorsResponse.model_validate(result).data
260+
261+
async def _callback(**kwargs: Any) -> PageOfItems[ActorShort]:
262+
result = await self._list(timeout=timeout, my=my, sortBy=api_sort_by, **kwargs)
263+
data = ListOfActorsResponse.model_validate(result).data
264+
return PageOfItems(
265+
items=data.items,
266+
count=data.count,
267+
limit=data.limit,
268+
total=data.total,
269+
offset=data.offset,
270+
desc=data.desc,
271+
)
272+
273+
fetch_first_page = _LazyTask(_callback(limit=limit, offset=offset, desc=desc))
274+
get_async_iterator = build_get_iterator_async(
275+
_callback, fetch_first_page, limit=limit, offset=offset, desc=desc
276+
)
277+
278+
return IterablePageOfActorsAsync(
279+
_awaitable_first_page=fetch_first_page,
280+
_get_async_iterator=get_async_iterator,
281+
)
223282

224283
async def create(
225284
self,

src/apify_client/_resource_clients/actor_env_var_collection.py

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,17 @@
33
from typing import TYPE_CHECKING, Any
44

55
from apify_client._docs import docs_group
6-
from apify_client._models_generated import EnvVar, EnvVarResponse, ListOfEnvVars, ListOfEnvVarsResponse
6+
from apify_client._models_generated import EnvVar, EnvVarResponse, ListOfEnvVarsResponse
7+
from apify_client._pagination import (
8+
_LazyTask,
9+
build_get_iterator,
10+
build_get_iterator_async,
11+
)
12+
from apify_client._pagination_classes import (
13+
IterablePageOfEnvVars,
14+
IterablePageOfEnvVarsAsync,
15+
PageOfItemsOnlyTotal,
16+
)
717
from apify_client._resource_clients._resource_client import ResourceClient, ResourceClientAsync
818

919
if TYPE_CHECKING:
@@ -29,9 +39,12 @@ def __init__(
2939
**kwargs,
3040
)
3141

32-
def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars:
42+
def list(self, *, timeout: Timeout = 'short') -> IterablePageOfEnvVars:
3343
"""List the available Actor environment variables.
3444
45+
The returned page also supports iteration: `for item in client.list()` yields individual environment
46+
variables.
47+
3548
https://docs.apify.com/api/v2#/reference/actors/environment-variable-collection/get-list-of-environment-variables
3649
3750
Args:
@@ -40,8 +53,20 @@ def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars:
4053
Returns:
4154
The list of available Actor environment variables.
4255
"""
43-
result = self._list(timeout=timeout)
44-
return ListOfEnvVarsResponse.model_validate(result).data
56+
57+
def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[EnvVar]:
58+
result = self._list(timeout=timeout, **kwargs)
59+
data = ListOfEnvVarsResponse.model_validate(result).data
60+
return PageOfItemsOnlyTotal(items=data.items, total=data.total)
61+
62+
first_page = _callback()
63+
get_iterator = build_get_iterator(_callback, first_page)
64+
65+
return IterablePageOfEnvVars(
66+
_get_iterator=get_iterator,
67+
items=first_page.items,
68+
total=first_page.total,
69+
)
4570

4671
def create(
4772
self,
@@ -90,9 +115,12 @@ def __init__(
90115
**kwargs,
91116
)
92117

93-
async def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars:
118+
def list(self, *, timeout: Timeout = 'short') -> IterablePageOfEnvVarsAsync:
94119
"""List the available Actor environment variables.
95120
121+
The returned page also supports iteration: `async for item in client.list()` yields individual environment
122+
variables.
123+
96124
https://docs.apify.com/api/v2#/reference/actors/environment-variable-collection/get-list-of-environment-variables
97125
98126
Args:
@@ -101,8 +129,19 @@ async def list(self, *, timeout: Timeout = 'short') -> ListOfEnvVars:
101129
Returns:
102130
The list of available Actor environment variables.
103131
"""
104-
result = await self._list(timeout=timeout)
105-
return ListOfEnvVarsResponse.model_validate(result).data
132+
133+
async def _callback(**kwargs: Any) -> PageOfItemsOnlyTotal[EnvVar]:
134+
result = await self._list(timeout=timeout, **kwargs)
135+
data = ListOfEnvVarsResponse.model_validate(result).data
136+
return PageOfItemsOnlyTotal(items=data.items, total=data.total)
137+
138+
fetch_first_page = _LazyTask(_callback())
139+
get_async_iterator = build_get_iterator_async(_callback, fetch_first_page)
140+
141+
return IterablePageOfEnvVarsAsync(
142+
_awaitable_first_page=fetch_first_page,
143+
_get_async_iterator=get_async_iterator,
144+
)
106145

107146
async def create(
108147
self,

0 commit comments

Comments
 (0)