Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 116 additions & 9 deletions .github/workflows/_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,119 @@ jobs:
tests_concurrency: "1"

integration_tests:
name: Integration tests
uses: apify/workflows/.github/workflows/python_integration_tests.yaml@main
secrets: inherit
with:
python_versions: '["3.10", "3.14"]'
operating_systems: '["ubuntu-latest"]'
python_version_for_codecov: "3.14"
operating_system_for_codecov: ubuntu-latest
tests_concurrency: "16"
name: Integration tests (${{ matrix.python-version }}, ${{ matrix.os }})

if: >-
${{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think these braces are necessary in this context, but I may be wrong.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is copied from the apify/workflows, and we know it works, so I would probably stay with that 🙂.

(github.event_name == 'pull_request' && github.event.pull_request.head.repo.owner.login == 'apify') ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What problem does the owner check solve? Tests don't run in forks, do they?

Copy link
Contributor Author

@vdusek vdusek Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They don't, thanks to this condition (they are skipped). Otherwise, they would be executed, and they would fail.

(github.event_name == 'push' && github.ref == 'refs/heads/master')
}}

strategy:
matrix:
os: ["ubuntu-latest"]
python-version: ["3.10", "3.14"]

runs-on: ${{ matrix.os }}

env:
TESTS_CONCURRENCY: "16"

steps:
- name: Checkout repository
uses: actions/checkout@v6

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}

- name: Set up uv package manager
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python-version }}

- name: Install Python dependencies
run: uv run poe install-dev

- name: Run integration tests
run: uv run poe integration-tests-cov
env:
APIFY_TEST_USER_API_TOKEN: ${{ secrets.APIFY_TEST_USER_PYTHON_SDK_API_TOKEN }}
APIFY_TEST_USER_2_API_TOKEN: ${{ secrets.APIFY_TEST_USER_2_API_TOKEN }}

- name: Upload integration test coverage
if: >-
${{
matrix.os == 'ubuntu-latest' &&
matrix.python-version == '3.14' &&
env.CODECOV_TOKEN != ''
}}
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
token: ${{ env.CODECOV_TOKEN }}
files: coverage-integration.xml
flags: integration

e2e_tests:
name: E2E tests (${{ matrix.python-version }}, ${{ matrix.os }})

if: >-
${{
(github.event_name == 'pull_request' && github.event.pull_request.head.repo.owner.login == 'apify') ||
(github.event_name == 'push' && github.ref == 'refs/heads/master')
}}

strategy:
# E2E tests build and run Actors on the platform. Limit parallel workflows to 1 to avoid exceeding
# the platform's memory limits. A single workflow with 16 pytest workers provides good test
# parallelization while staying within platform constraints.
max-parallel: 1
matrix:
os: ["ubuntu-latest"]
python-version: ["3.10", "3.14"]

runs-on: ${{ matrix.os }}

env:
TESTS_CONCURRENCY: "16"

steps:
- name: Checkout repository
uses: actions/checkout@v6

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}

- name: Set up uv package manager
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.python-version }}

- name: Install Python dependencies
run: uv run poe install-dev

- name: Run E2E tests
run: uv run poe e2e-tests-cov
env:
APIFY_TEST_USER_API_TOKEN: ${{ secrets.APIFY_TEST_USER_PYTHON_SDK_API_TOKEN }}
APIFY_TEST_USER_2_API_TOKEN: ${{ secrets.APIFY_TEST_USER_2_API_TOKEN }}

- name: Upload E2E test coverage
if: >-
${{
matrix.os == 'ubuntu-latest' &&
matrix.python-version == '3.14' &&
env.CODECOV_TOKEN != ''
}}
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
token: ${{ env.CODECOV_TOKEN }}
files: coverage-e2e.xml
flags: e2e
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,13 @@ indent-style = "space"
"**/{tests}/*" = [
"D", # Everything from the pydocstyle
"INP001", # File {filename} is part of an implicit namespace package, add an __init__.py
"PLC0415", # `import` should be at the top-level of a file
"PLR2004", # Magic value used in comparison, consider replacing {value} with a constant variable
"S101", # Use of assert detected
"SLF001", # Private member accessed: `{name}`
"T20", # flake8-print
"TRY301", # Abstract `raise` to an inner function
"TID252", # Prefer absolute imports over relative imports from parent modules
]
"**/{tests}/{integration}/*" = [
"PLC0415", # `import` should be at the top-level of a file
"TRY301", # Abstract `raise` to an inner function
]
"**/{docs,website}/**" = [
"D", # Everything from the pydocstyle
Expand Down Expand Up @@ -234,6 +232,8 @@ unit-tests = "uv run pytest --numprocesses=${TESTS_CONCURRENCY:-auto} tests/unit
unit-tests-cov = "uv run pytest --numprocesses=${TESTS_CONCURRENCY:-auto} --cov=src/apify --cov-report=xml:coverage-unit.xml tests/unit"
integration-tests = "uv run pytest --numprocesses=${TESTS_CONCURRENCY:-auto} tests/integration"
integration-tests-cov = "uv run pytest --numprocesses=${TESTS_CONCURRENCY:-auto} --cov=src/apify --cov-report=xml:coverage-integration.xml tests/integration"
e2e-tests = "uv run pytest --numprocesses=${TESTS_CONCURRENCY:-auto} tests/e2e"
e2e-tests-cov = "uv run pytest --numprocesses=${TESTS_CONCURRENCY:-auto} --cov=src/apify --cov-report=xml:coverage-e2e.xml tests/e2e"
check-code = ["lint", "type-check", "unit-tests"]

[tool.poe.tasks.install-dev]
Expand Down
100 changes: 100 additions & 0 deletions tests/e2e/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# E2E tests

These tests build and run Actors using the Python SDK on the Apify platform. They are slower than integration tests (see [`tests/integration/`](../integration/)) because they need to build and deploy Actors.

When writing new tests, prefer integration tests if possible. Only write E2E tests when you need to test something that requires building and running an Actor on the platform.

## Running

```bash
export APIFY_TEST_USER_API_TOKEN=<your-token>
uv run poe e2e-tests
```

To run against a different environment, also set `APIFY_INTEGRATION_TESTS_API_URL`.

## Key fixtures

- **`apify_client_async`** — A session-scoped `ApifyClientAsync` instance configured with the test token and API URL.
- **`prepare_test_env`** / **`_isolate_test_environment`** (autouse) — Resets global state and sets `APIFY_LOCAL_STORAGE_DIR` to a temporary directory before each test.
- **`make_actor`** — Factory for creating temporary Actors on the Apify platform (built, then auto-deleted after the test).
- **`run_actor`** — Starts an Actor run and waits for completion (10 min timeout).

## How to write tests

### Creating an Actor from a Python function

You can create Actors straight from a Python function. This is great because the test Actor source code gets checked by the linter.

```python
async def test_something(
make_actor: MakeActorFunction,
run_actor: RunActorFunction,
) -> None:
async def main() -> None:
async with Actor:
print('Hello!')

actor = await make_actor(label='something', main_func=main)
run_result = await run_actor(actor)

assert run_result.status == 'SUCCEEDED'
```

The `src/main.py` file will be set to the function definition, prepended with `import asyncio` and `from apify import Actor`. You can add extra imports directly inside the function body.

### Creating an Actor from source files

Pass the `main_py` argument for a single-file Actor:

```python
async def test_something(
make_actor: MakeActorFunction,
run_actor: RunActorFunction,
) -> None:
expected_output = f'ACTOR_OUTPUT_{crypto_random_object_id(5)}'
main_py_source = f"""
import asyncio
from datetime import datetime
from apify import Actor
async def main():
async with Actor:
await Actor.set_value('OUTPUT', '{expected_output}')
"""

actor = await make_actor(label='something', main_py=main_py_source)
await run_actor(actor)

output_record = await actor.last_run().key_value_store().get_record('OUTPUT')
assert output_record is not None
assert output_record['value'] == expected_output
```

Or pass `source_files` for multi-file Actors:

```python
actor_source_files = {
'src/utils.py': """
from datetime import datetime, timezone
def get_current_datetime():
return datetime.now(timezone.utc)
""",
'src/main.py': """
import asyncio
from apify import Actor
from .utils import get_current_datetime
async def main():
async with Actor:
print('Hello! It is ' + str(get_current_datetime()))
""",
}
actor = await make_actor(label='something', source_files=actor_source_files)
```

### Assertions inside Actors

Since test Actors are not executed as standard pytest tests, we don't get introspection of assertion expressions. In case of failure, only a bare `AssertionError` is shown. Always include explicit assertion messages:

```python
assert is_finished is False, f'is_finished={is_finished}'
```
File renamed without changes.
17 changes: 17 additions & 0 deletions tests/e2e/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

from crawlee._utils.crypto import crypto_random_object_id


def generate_unique_resource_name(label: str) -> str:
"""Generates a unique resource name, which will contain the given label."""
name_template = 'python-sdk-tests-{}-generated-{}'
template_length = len(name_template.format('', ''))
api_name_limit = 63
generated_random_id_length = 8
label_length_limit = api_name_limit - template_length - generated_random_id_length

label = label.replace('_', '-')
assert len(label) <= label_length_limit, f'Max label length is {label_length_limit}, but got {len(label)}'

return name_template.format(label, crypto_random_object_id(generated_random_id_length))
84 changes: 79 additions & 5 deletions tests/integration/actor/conftest.py → tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
from filelock import FileLock

from apify_client import ApifyClient, ApifyClientAsync
from apify_shared.consts import ActorJobStatus, ActorPermissionLevel, ActorSourceType
from apify_shared.consts import ActorJobStatus, ActorPermissionLevel, ActorSourceType, ApifyEnvVars
from crawlee import service_locator

from .._utils import generate_unique_resource_name
import apify._actor
from ._utils import generate_unique_resource_name
from apify._models import ActorRun
from apify.storage_clients._apify._alias_resolving import AliasResolver

if TYPE_CHECKING:
from collections.abc import Awaitable, Callable, Coroutine, Iterator, Mapping
Expand All @@ -26,7 +29,78 @@

_TOKEN_ENV_VAR = 'APIFY_TEST_USER_API_TOKEN'
_API_URL_ENV_VAR = 'APIFY_INTEGRATION_TESTS_API_URL'
_SDK_ROOT_PATH = Path(__file__).parent.parent.parent.parent.resolve()
_SDK_ROOT_PATH = Path(__file__).parent.parent.parent.resolve()


@pytest.fixture(scope='session')
def apify_token() -> str:
api_token = os.getenv(_TOKEN_ENV_VAR)

if not api_token:
raise RuntimeError(f'{_TOKEN_ENV_VAR} environment variable is missing, cannot run tests!')

return api_token


@pytest.fixture(scope='session')
def apify_client_async(apify_token: str) -> ApifyClientAsync:
"""Create an instance of the ApifyClientAsync."""
api_url = os.getenv(_API_URL_ENV_VAR)

return ApifyClientAsync(apify_token, api_url=api_url)


@pytest.fixture
def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callable[[], None]:
"""Prepare the testing environment by resetting the global state before each test.

This fixture ensures that the global state of the package is reset to a known baseline before each test runs.
It also configures a temporary storage directory for test isolation.

Args:
monkeypatch: Test utility provided by pytest for patching.
tmp_path: A unique temporary directory path provided by pytest for test isolation.

Returns:
A callable that prepares the test environment.
"""

def _prepare_test_env() -> None:
if hasattr(apify._actor.Actor, '__wrapped__'):
delattr(apify._actor.Actor, '__wrapped__')

apify._actor.Actor._is_initialized = False

# Set the environment variable for the local storage directory to the temporary path.
monkeypatch.setenv(ApifyEnvVars.LOCAL_STORAGE_DIR, str(tmp_path))

# Reset the services in the service locator.
service_locator._configuration = None
service_locator._event_manager = None
service_locator._storage_client = None
service_locator.storage_instance_manager.clear_cache()

# Reset the AliasResolver class state.
AliasResolver._alias_map = {}
AliasResolver._alias_init_lock = None

# Verify that the test environment was set up correctly.
assert os.environ.get(ApifyEnvVars.LOCAL_STORAGE_DIR) == str(tmp_path)

return _prepare_test_env


@pytest.fixture(autouse=True)
def _isolate_test_environment(prepare_test_env: Callable[[], None]) -> None:
"""Isolate the testing environment by resetting global state before each test.

This fixture ensures that each test starts with a clean slate and that any modifications during the test
do not affect subsequent tests. It runs automatically for all tests.

Args:
prepare_test_env: Fixture to prepare the environment before each test.
"""
prepare_test_env()


@pytest.fixture(scope='session')
Expand Down Expand Up @@ -70,13 +144,13 @@ def sdk_wheel_path(tmp_path_factory: pytest.TempPathFactory, testrun_uid: str) -
def actor_base_source_files(sdk_wheel_path: Path) -> dict[str, str | bytes]:
"""Create a dictionary of the base source files for a testing Actor.

It takes the files from `tests/integration/actor_source_base`, builds the Apify SDK wheel from
It takes the files from `tests/e2e/actor_source_base`, builds the Apify SDK wheel from
the current codebase, and adds them all together in a dictionary.
"""
source_files: dict[str, str | bytes] = {}

# First read the actor_source_base files
actor_source_base_path = _SDK_ROOT_PATH / 'tests/integration/actor/actor_source_base'
actor_source_base_path = _SDK_ROOT_PATH / 'tests/e2e/actor_source_base'

for path in actor_source_base_path.glob('**/*'):
if not path.is_file():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from apify_shared.consts import ActorPermissionLevel
from crawlee._utils.crypto import crypto_random_object_id

from .._utils import generate_unique_resource_name
from ._utils import generate_unique_resource_name
from apify import Actor
from apify._models import ActorRun

Expand Down
Loading