diff --git a/README.md b/README.md index a34bacca..24cef390 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ async def main() -> None: # Extract the desired data. data = { - 'url': actor_input['url'], + 'url': request.url, 'title': soup.title.string, 'h1s': [h1.text for h1 in soup.find_all('h1')], 'h2s': [h2.text for h2 in soup.find_all('h2')], @@ -165,10 +165,11 @@ To create and run Python Actors locally, check the documentation for To see how you can use the Apify SDK with other popular libraries used for web scraping, check out our guides for using -[Requests and HTTPX](https://docs.apify.com/sdk/python/docs/guides/requests-and-httpx), -[Beautiful Soup](https://docs.apify.com/sdk/python/docs/guides/beautiful-soup), +[BeautifulSoup with HTTPX](https://docs.apify.com/sdk/python/docs/guides/beautifulsoup-httpx), +[Parsel with Impit](https://docs.apify.com/sdk/python/docs/guides/parsel-impit), [Playwright](https://docs.apify.com/sdk/python/docs/guides/playwright), [Selenium](https://docs.apify.com/sdk/python/docs/guides/selenium), +[Crawlee](https://docs.apify.com/sdk/python/docs/guides/crawlee), or [Scrapy](https://docs.apify.com/sdk/python/docs/guides/scrapy). ## Usage concepts diff --git a/docs/01_introduction/index.mdx b/docs/01_introduction/index.mdx index 066687c4..2e803979 100644 --- a/docs/01_introduction/index.mdx +++ b/docs/01_introduction/index.mdx @@ -1,9 +1,8 @@ --- id: introduction title: Overview -sidebar_label: Overview slug: /overview -description: 'The official library for creating Apify Actors in Python, providing tools for web scraping, automation, and data storage integration.' +description: The official library for creating Apify Actors in Python, providing tools for web scraping, automation, and data storage integration. --- import CodeBlock from '@theme/CodeBlock'; diff --git a/docs/01_introduction/quick-start.mdx b/docs/01_introduction/quick-start.mdx index 1e568c5b..da166da9 100644 --- a/docs/01_introduction/quick-start.mdx +++ b/docs/01_introduction/quick-start.mdx @@ -1,8 +1,7 @@ --- id: quick-start title: Quick start -sidebar_label: Quick start -description: 'Get started with the Apify SDK for Python by creating your first Actor and learning the basics.' +description: Get started with the Apify SDK for Python by creating your first Actor and learning the basics. --- Learn how to create and run Actors using the Apify SDK for Python. diff --git a/docs/02_concepts/01_actor_lifecycle.mdx b/docs/02_concepts/01_actor_lifecycle.mdx index bf07cf5d..5d2ebb28 100644 --- a/docs/02_concepts/01_actor_lifecycle.mdx +++ b/docs/02_concepts/01_actor_lifecycle.mdx @@ -1,6 +1,7 @@ --- id: actor-lifecycle title: Actor lifecycle +description: How an Apify Actor starts, runs, and shuts down, including context manager and manual control patterns. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; diff --git a/docs/02_concepts/02_actor_input.mdx b/docs/02_concepts/02_actor_input.mdx index 3a4deabb..5a0e9ecc 100644 --- a/docs/02_concepts/02_actor_input.mdx +++ b/docs/02_concepts/02_actor_input.mdx @@ -1,6 +1,7 @@ --- id: actor-input title: Actor input +description: Read and validate input data passed to your Actor at runtime. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; diff --git a/docs/02_concepts/03_storages.mdx b/docs/02_concepts/03_storages.mdx index 193ce36f..17fe6088 100644 --- a/docs/02_concepts/03_storages.mdx +++ b/docs/02_concepts/03_storages.mdx @@ -1,6 +1,7 @@ --- id: storages title: Working with storages +description: Use datasets, key-value stores, and request queues to persist Actor data. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; diff --git a/docs/02_concepts/04_actor_events.mdx b/docs/02_concepts/04_actor_events.mdx index 9ce7fd59..83343f28 100644 --- a/docs/02_concepts/04_actor_events.mdx +++ b/docs/02_concepts/04_actor_events.mdx @@ -1,6 +1,7 @@ --- id: actor-events title: Actor events & state persistence +description: Handle platform events like state persistence and graceful shutdown in your Actors. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; diff --git a/docs/02_concepts/05_proxy_management.mdx b/docs/02_concepts/05_proxy_management.mdx index 64420eee..8579d5da 100644 --- a/docs/02_concepts/05_proxy_management.mdx +++ b/docs/02_concepts/05_proxy_management.mdx @@ -1,6 +1,7 @@ --- id: proxy-management title: Proxy management +description: Configure Apify Proxy or your own proxies for IP rotation and session management. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; @@ -39,7 +40,7 @@ All your proxy needs are managed by the [`ProxyConfiguration`](../../reference/c ### Apify proxy vs. your own proxies -The `ProxyConfiguration` class covers both Apify Proxy and custom proxy URLs, so that you can easily switch between proxy providers. However, some features of the class are available only to Apify Proxy users, mainly because Apify Proxy is what one would call a super-proxy. It's not a single proxy server, but an API endpoint that allows connectionthrough millions of different IP addresses. So the class essentially has two modes: Apify Proxy or Your proxy. +The `ProxyConfiguration` class covers both Apify Proxy and custom proxy URLs, so that you can easily switch between proxy providers. However, some features of the class are available only to Apify Proxy users, mainly because Apify Proxy is what one would call a super-proxy. It's not a single proxy server, but an API endpoint that allows connection through millions of different IP addresses. So the class essentially has two modes: Apify Proxy or Your proxy. The difference is easy to remember. Using the `proxy_url` or `new_url_function` arguments enables use of your custom proxy URLs, whereas all the other options are there to configure Apify Proxy. Visit the [Apify Proxy docs](https://docs.apify.com/proxy) for more info on how these parameters work. diff --git a/docs/02_concepts/06_interacting_with_other_actors.mdx b/docs/02_concepts/06_interacting_with_other_actors.mdx index 880cbb89..086081af 100644 --- a/docs/02_concepts/06_interacting_with_other_actors.mdx +++ b/docs/02_concepts/06_interacting_with_other_actors.mdx @@ -1,6 +1,7 @@ --- id: interacting-with-other-actors title: Interacting with other Actors +description: Start, call, and metamorph other Actors from within your Actor code. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; @@ -40,9 +41,9 @@ The [`Actor.call_task`](../../reference/class/Actor#call_task) method starts an The [`Actor.metamorph`](../../reference/class/Actor#metamorph) operation transforms an Actor run into a run of another Actor with a new input. This feature is useful if you want to use another Actor to finish the work of your current Actor, instead of internally starting a new Actor run and waiting for its finish. With metamorph, you can easily create new Actors on top of existing ones, and give your users nicer input structure and user interface for the final Actor. For the users of your Actors, the metamorph operation is completely transparent; they will just see your Actor got the work done. -Internally, the system stops the container corresponding to the original Actor run and starts a new container using a different container image. All the default storages are preserved,and the new Actor input is stored under the `INPUT-METAMORPH-1` key in the same default key-value store. +Internally, the system stops the container corresponding to the original Actor run and starts a new container using a different container image. All the default storages are preserved, and the new Actor input is stored under the `INPUT-METAMORPH-1` key in the same default key-value store. -To make you Actor compatible with the metamorph operation, use [`Actor.get_input`](../../reference/class/Actor#get_input) instead of [`Actor.get_value('INPUT')`](../../reference/class/Actor#get_value) to read your Actor input. This method will fetch the input using the right key in a case of metamorphed run. +To make your Actor compatible with the metamorph operation, use [`Actor.get_input`](../../reference/class/Actor#get_input) instead of [`Actor.get_value('INPUT')`](../../reference/class/Actor#get_value) to read your Actor input. This method will fetch the input using the right key in a case of metamorphed run. For example, imagine you have an Actor that accepts a hotel URL on input, and then internally uses the [`apify/web-scraper`](https://apify.com/apify/web-scraper) public Actor to scrape all the hotel reviews. The metamorphing code would look as follows: diff --git a/docs/02_concepts/07_webhooks.mdx b/docs/02_concepts/07_webhooks.mdx index 04697bbb..12823b63 100644 --- a/docs/02_concepts/07_webhooks.mdx +++ b/docs/02_concepts/07_webhooks.mdx @@ -1,6 +1,7 @@ --- id: webhooks title: Creating webhooks +description: Set up webhooks to trigger actions when Actor run events occur. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; @@ -14,7 +15,7 @@ You can learn more in the [documentation for webhooks](https://docs.apify.com/pl ## Creating an ad-hoc webhook dynamically -Besides creating webhooks manually in Apify Console, or through the Apify API,you can also create [ad-hoc webhooks](https://docs.apify.com/platform/integrations/webhooks/ad-hoc-webhooks) dynamically from the code of your Actor using the [`Actor.add_webhook`](../../reference/class/Actor#add_webhook) method: +Besides creating webhooks manually in Apify Console, or through the Apify API, you can also create [ad-hoc webhooks](https://docs.apify.com/platform/integrations/webhooks/ad-hoc-webhooks) dynamically from the code of your Actor using the [`Actor.add_webhook`](../../reference/class/Actor#add_webhook) method: {WebhookExample} diff --git a/docs/02_concepts/08_access_apify_api.mdx b/docs/02_concepts/08_access_apify_api.mdx index ff6fefb1..c4d9bc35 100644 --- a/docs/02_concepts/08_access_apify_api.mdx +++ b/docs/02_concepts/08_access_apify_api.mdx @@ -1,6 +1,7 @@ --- id: access-apify-api title: Accessing Apify API +description: Use the built-in Apify API client to access platform features not covered by the SDK. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; @@ -24,7 +25,7 @@ For example, to get the details of your user, you can use this snippet: ## Actor new client -If you want to create a completely new instance of the client, for example, to get a client for a different user or change the configuration of the client,you can use the [`Actor.new_client`](../../reference/class/Actor#new_client) method: +If you want to create a completely new instance of the client, for example, to get a client for a different user or change the configuration of the client, you can use the [`Actor.new_client`](../../reference/class/Actor#new_client) method: {ActorNewClientExample} diff --git a/docs/02_concepts/09_logging.mdx b/docs/02_concepts/09_logging.mdx index c1bd26c6..2579b6d8 100644 --- a/docs/02_concepts/09_logging.mdx +++ b/docs/02_concepts/09_logging.mdx @@ -1,6 +1,7 @@ --- id: logging title: Logging +description: Configure log levels, formatting, and log redirection between Actor runs. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; @@ -14,7 +15,7 @@ The Apify SDK is logging useful information through the [`logging`](https://docs ## Automatic configuration -When you create an Actor from an Apify-provided template, either in Apify Console or through the Apify CLI, you do not have to configure the logger yourself. The template already contains initialization code for the logger,which sets the logger level to `DEBUG` and the log formatter to [`ActorLogFormatter`](../../reference/class/ActorLogFormatter). +When you create an Actor from an Apify-provided template, either in Apify Console or through the Apify CLI, you do not have to configure the logger yourself. The template already contains initialization code for the logger, which sets the logger level to `DEBUG` and the log formatter to [`ActorLogFormatter`](../../reference/class/ActorLogFormatter). ## Manual configuration @@ -24,7 +25,7 @@ In Python's default behavior, if you don't configure the logger otherwise, only ### Configuring the log formatting -By default, only the log message is printed out to the output, without any formatting. To have a nicer output, with the log level printed in color, the messages nicely aligned, and extra log fields printed out,you can use the [`ActorLogFormatter`](../../reference/class/ActorLogFormatter) class from the `apify.log` module. +By default, only the log message is printed out to the output, without any formatting. To have a nicer output, with the log level printed in color, the messages nicely aligned, and extra log fields printed out, you can use the [`ActorLogFormatter`](../../reference/class/ActorLogFormatter) class from the `apify.log` module. ### Example log configuration @@ -40,7 +41,7 @@ This configuration will cause all levels of messages to be printed to the standa Here you can see how all the log levels would look like. -You can use the `extra` argument for all log levels, it's not specific to the warning level. When you use `Logger.exception`, there is no need to pass the Exception object to the log manually, it will automatiacally infer it from the current execution context and print the exception details. +You can use the `extra` argument for all log levels, it's not specific to the warning level. When you use `Logger.exception`, there is no need to pass the Exception object to the log manually, it will automatically infer it from the current execution context and print the exception details. {LoggerUsageExample} diff --git a/docs/02_concepts/10_configuration.mdx b/docs/02_concepts/10_configuration.mdx index 4d1e83e8..2cdecf2c 100644 --- a/docs/02_concepts/10_configuration.mdx +++ b/docs/02_concepts/10_configuration.mdx @@ -1,6 +1,7 @@ --- id: actor-configuration title: Actor configuration +description: Customize Actor behavior through the Configuration class or environment variables. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; @@ -9,9 +10,9 @@ import ConfigExample from '!!raw-loader!roa-loader!./code/10_config.py'; The [`Actor`](../../reference/class/Actor) class gets configured using the [`Configuration`](../../reference/class/Configuration) class, which initializes itself based on the provided environment variables. -If you're using the Apify SDK in your Actors on the Apify platform, or Actors running locally through the Apify CLI, you don't need to configure the `Actor` class manually,unless you have some specific requirements, everything will get configured automatically. +If you're using the Apify SDK in your Actors on the Apify platform, or Actors running locally through the Apify CLI, you don't need to configure the `Actor` class manually, unless you have some specific requirements, everything will get configured automatically. -If you need some special configuration, you can adjust it either through the `Configuration` class directly,or by setting environment variables when running the Actor locally. +If you need some special configuration, you can adjust it either through the `Configuration` class directly, or by setting environment variables when running the Actor locally. To see the full list of configuration options, check the `Configuration` class or the list of environment variables that the Actor understands. diff --git a/docs/02_concepts/code/07_webhook.py b/docs/02_concepts/code/07_webhook.py index 76c9153c..3dd48b13 100644 --- a/docs/02_concepts/code/07_webhook.py +++ b/docs/02_concepts/code/07_webhook.py @@ -1,13 +1,13 @@ import asyncio -from apify import Actor, Webhook +from apify import Actor, Webhook, WebhookEventType async def main() -> None: async with Actor: # Create a webhook that will be triggered when the Actor run fails. webhook = Webhook( - event_types=['ACTOR.RUN.FAILED'], # ty: ignore[invalid-argument-type] + event_types=[WebhookEventType.ACTOR_RUN_FAILED], request_url='https://example.com/run-failed', ) diff --git a/docs/02_concepts/code/07_webhook_preventing.py b/docs/02_concepts/code/07_webhook_preventing.py index 3ace707b..ec2334e3 100644 --- a/docs/02_concepts/code/07_webhook_preventing.py +++ b/docs/02_concepts/code/07_webhook_preventing.py @@ -1,13 +1,13 @@ import asyncio -from apify import Actor, Webhook +from apify import Actor, Webhook, WebhookEventType async def main() -> None: async with Actor: # Create a webhook that will be triggered when the Actor run fails. webhook = Webhook( - event_types=['ACTOR.RUN.FAILED'], # ty: ignore[invalid-argument-type] + event_types=[WebhookEventType.ACTOR_RUN_FAILED], request_url='https://example.com/run-failed', ) diff --git a/docs/03_guides/01_beautifulsoup_httpx.mdx b/docs/03_guides/01_beautifulsoup_httpx.mdx index 166261a0..54ee79b3 100644 --- a/docs/03_guides/01_beautifulsoup_httpx.mdx +++ b/docs/03_guides/01_beautifulsoup_httpx.mdx @@ -1,6 +1,7 @@ --- id: beautifulsoup-httpx title: Using BeautifulSoup with HTTPX +description: Build an Apify Actor that scrapes web pages using BeautifulSoup and HTTPX. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; diff --git a/docs/03_guides/02_parsel_impit.mdx b/docs/03_guides/02_parsel_impit.mdx index b68efec4..c9469b3a 100644 --- a/docs/03_guides/02_parsel_impit.mdx +++ b/docs/03_guides/02_parsel_impit.mdx @@ -1,6 +1,7 @@ --- id: parsel-impit title: Using Parsel with Impit +description: Build an Apify Actor that scrapes web pages using Parsel selectors and the Impit HTTP client. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; diff --git a/docs/03_guides/03_playwright.mdx b/docs/03_guides/03_playwright.mdx index 16de8b67..85fc6f00 100644 --- a/docs/03_guides/03_playwright.mdx +++ b/docs/03_guides/03_playwright.mdx @@ -1,6 +1,7 @@ --- id: playwright title: Using Playwright +description: Build an Apify Actor that scrapes dynamic web pages using Playwright browser automation. --- import Tabs from '@theme/Tabs'; diff --git a/docs/03_guides/04_selenium.mdx b/docs/03_guides/04_selenium.mdx index a7c9ed19..707321e5 100644 --- a/docs/03_guides/04_selenium.mdx +++ b/docs/03_guides/04_selenium.mdx @@ -1,6 +1,7 @@ --- id: selenium title: Using Selenium +description: Build an Apify Actor that scrapes dynamic web pages using Selenium WebDriver. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; diff --git a/docs/03_guides/05_crawlee.mdx b/docs/03_guides/05_crawlee.mdx index f6050654..d190d142 100644 --- a/docs/03_guides/05_crawlee.mdx +++ b/docs/03_guides/05_crawlee.mdx @@ -1,6 +1,7 @@ --- id: crawlee title: Using Crawlee +description: Build Apify Actors using Crawlee's BeautifulSoupCrawler, ParselCrawler, or PlaywrightCrawler. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; diff --git a/docs/03_guides/06_scrapy.mdx b/docs/03_guides/06_scrapy.mdx index ac9e5fa2..43ef59d4 100644 --- a/docs/03_guides/06_scrapy.mdx +++ b/docs/03_guides/06_scrapy.mdx @@ -1,6 +1,7 @@ --- id: scrapy title: Using Scrapy +description: Convert Scrapy spiders into Apify Actors with platform storage and proxy integration. --- import CodeBlock from '@theme/CodeBlock'; diff --git a/docs/03_guides/07_running_webserver.mdx b/docs/03_guides/07_running_webserver.mdx index 9c9ef474..8d03683c 100644 --- a/docs/03_guides/07_running_webserver.mdx +++ b/docs/03_guides/07_running_webserver.mdx @@ -1,6 +1,7 @@ --- id: running-webserver title: Running webserver +description: Run an HTTP server inside your Actor for monitoring or serving content during execution. --- import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; diff --git a/docs/04_upgrading/upgrading_to_v2.md b/docs/04_upgrading/upgrading_to_v2.md index 1fd1d111..2269cda4 100644 --- a/docs/04_upgrading/upgrading_to_v2.md +++ b/docs/04_upgrading/upgrading_to_v2.md @@ -1,6 +1,7 @@ --- id: upgrading-to-v2 title: Upgrading to v2 +description: Breaking changes and migration guide from Apify SDK v1.x to v2.0. --- This page summarizes the breaking changes between Apify Python SDK v1.x and v2.0. diff --git a/docs/04_upgrading/upgrading_to_v3.md b/docs/04_upgrading/upgrading_to_v3.md index 803db6d8..729ad68b 100644 --- a/docs/04_upgrading/upgrading_to_v3.md +++ b/docs/04_upgrading/upgrading_to_v3.md @@ -1,6 +1,7 @@ --- id: upgrading-to-v3 title: Upgrading to v3 +description: Breaking changes and migration guide from Apify SDK v2.x to v3.0. --- This page summarizes the breaking changes between Apify Python SDK v2.x and v3.0.