pax_global_header 0000666 0000000 0000000 00000000064 15131245267 0014520 g ustar 00root root 0000000 0000000 52 comment=0b9d8da09dd2cb1b74ddf025107e6f584839fbff scrapy-scrapy-20bf2c6/ 0000775 0000000 0000000 00000000000 15131245267 0014726 5 ustar 00root root 0000000 0000000 scrapy-scrapy-20bf2c6/.git-blame-ignore-revs 0000664 0000000 0000000 00000000502 15131245267 0021023 0 ustar 00root root 0000000 0000000 # .git-blame-ignore-revs # adding black formatter to all the code e211ec0aa26ecae0da8ae55d064ea60e1efe4d0d # reapplying black to the code with default line length 303f0a70fcf8067adf0a909c2096a5009162383a # reapplying black again and removing line length on pre-commit black config c5cdd0d30ceb68ccba04af0e71d1b8e6678e2962 scrapy-scrapy-20bf2c6/.gitattributes 0000664 0000000 0000000 00000000034 15131245267 0017616 0 ustar 00root root 0000000 0000000 tests/sample_data/** binary scrapy-scrapy-20bf2c6/.github/ 0000775 0000000 0000000 00000000000 15131245267 0016266 5 ustar 00root root 0000000 0000000 scrapy-scrapy-20bf2c6/.github/ISSUE_TEMPLATE/ 0000775 0000000 0000000 00000000000 15131245267 0020451 5 ustar 00root root 0000000 0000000 scrapy-scrapy-20bf2c6/.github/ISSUE_TEMPLATE/bug_report.md 0000664 0000000 0000000 00000002763 15131245267 0023153 0 ustar 00root root 0000000 0000000 --- name: Bug report about: Report a problem to help us improve --- ### Description [Description of the issue] ### Steps to Reproduce 1. [First Step] 2. [Second Step] 3. [and so on...] **Expected behavior:** [What you expect to happen] **Actual behavior:** [What actually happens] **Reproduces how often:** [What percentage of the time does it reproduce?] ### Versions Please paste here the output of executing `scrapy version --verbose` in the command line. ### Additional context Any additional information, configuration, data or output from commands that might be necessary to reproduce or understand the issue. Please try not to include screenshots of code or the command line, paste the contents as text instead. You can use [GitHub Flavored Markdown](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) to make the text look better. scrapy-scrapy-20bf2c6/.github/ISSUE_TEMPLATE/feature_request.md 0000664 0000000 0000000 00000002370 15131245267 0024200 0 ustar 00root root 0000000 0000000 --- name: Feature request about: Suggest an idea for an enhancement or new feature --- ## Summary One paragraph explanation of the feature. ## Motivation Why are we doing this? What use cases does it support? What is the expected outcome? ## Describe alternatives you've considered A clear and concise description of the alternative solutions you've considered. Be sure to explain why Scrapy's existing customizability isn't suitable for this feature. ## Additional context Any additional information about the feature request here. scrapy-scrapy-20bf2c6/.github/ISSUE_TEMPLATE/question.md 0000664 0000000 0000000 00000000644 15131245267 0022646 0 ustar 00root root 0000000 0000000 --- name: Question / Help about: Ask a question about Scrapy or ask for help with your Scrapy code. --- Thanks for taking an interest in Scrapy! The Scrapy GitHub issue tracker is not meant for questions or help. Please ask for help in the [Scrapy community resources](https://scrapy.org/community/) instead. The GitHub issue tracker's purpose is to deal with bug reports and feature requests for the project itself. scrapy-scrapy-20bf2c6/.github/workflows/ 0000775 0000000 0000000 00000000000 15131245267 0020323 5 ustar 00root root 0000000 0000000 scrapy-scrapy-20bf2c6/.github/workflows/checks.yml 0000664 0000000 0000000 00000002163 15131245267 0022310 0 ustar 00root root 0000000 0000000 name: Checks on: push: branches: - master - '[0-9]+.[0-9]+' pull_request: concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: checks: runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - python-version: "3.13" env: TOXENV: pylint - python-version: "3.10" env: TOXENV: typing - python-version: "3.10" env: TOXENV: typing-tests - python-version: "3.13" # Keep in sync with .readthedocs.yml env: TOXENV: docs - python-version: "3.13" env: TOXENV: twinecheck steps: - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Run check env: ${{ matrix.env }} run: | pip install -U tox tox pre-commit: runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 - uses: pre-commit/action@v3.0.1 scrapy-scrapy-20bf2c6/.github/workflows/publish.yml 0000664 0000000 0000000 00000001206 15131245267 0022513 0 ustar 00root root 0000000 0000000 name: Publish on: push: tags: - '[0-9]+.[0-9]+.[0-9]+' concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: publish: name: Upload release to PyPI runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/Scrapy permissions: id-token: write steps: - uses: actions/checkout@v5 - uses: actions/setup-python@v6 with: python-version: "3.13" - run: | python -m pip install --upgrade build python -m build - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 scrapy-scrapy-20bf2c6/.github/workflows/tests-macos.yml 0000664 0000000 0000000 00000001477 15131245267 0023321 0 ustar 00root root 0000000 0000000 name: macOS on: push: branches: - master - '[0-9]+.[0-9]+' pull_request: concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: tests: runs-on: macos-latest strategy: fail-fast: false matrix: python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Run tests run: | pip install -U tox tox -e py - name: Upload coverage report uses: codecov/codecov-action@v5 - name: Upload test results if: ${{ !cancelled() }} uses: codecov/codecov-action@v5 with: report_type: test_results scrapy-scrapy-20bf2c6/.github/workflows/tests-ubuntu.yml 0000664 0000000 0000000 00000004323 15131245267 0023532 0 ustar 00root root 0000000 0000000 name: Ubuntu on: push: branches: - master - '[0-9]+.[0-9]+' pull_request: concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: tests: runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - python-version: "3.10" env: TOXENV: py - python-version: "3.11" env: TOXENV: py - python-version: "3.12" env: TOXENV: py - python-version: "3.13" env: TOXENV: py - python-version: "3.13" env: TOXENV: default-reactor - python-version: pypy3.11 env: TOXENV: pypy3 # pinned deps - python-version: "3.10.19" env: TOXENV: pinned - python-version: "3.10.19" env: TOXENV: default-reactor-pinned - python-version: pypy3.11 env: TOXENV: pypy3-pinned - python-version: "3.10.19" env: TOXENV: extra-deps-pinned - python-version: "3.10.19" env: TOXENV: botocore-pinned - python-version: "3.13" env: TOXENV: extra-deps - python-version: pypy3.11 env: TOXENV: pypy3-extra-deps - python-version: "3.13" env: TOXENV: botocore - python-version: "3.13" env: TOXENV: mitmproxy steps: - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install system libraries if: contains(matrix.python-version, 'pypy') || contains(matrix.env.TOXENV, 'pinned') run: | sudo apt-get update sudo apt-get install libxml2-dev libxslt-dev - name: Run tests env: ${{ matrix.env }} run: | pip install -U tox tox - name: Upload coverage report uses: codecov/codecov-action@v5 - name: Upload test results if: ${{ !cancelled() }} uses: codecov/codecov-action@v5 with: report_type: test_results scrapy-scrapy-20bf2c6/.github/workflows/tests-windows.yml 0000664 0000000 0000000 00000002660 15131245267 0023704 0 ustar 00root root 0000000 0000000 name: Windows on: push: branches: - master - '[0-9]+.[0-9]+' pull_request: concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: tests: runs-on: windows-latest strategy: fail-fast: false matrix: include: - python-version: "3.10" env: TOXENV: py - python-version: "3.11" env: TOXENV: py - python-version: "3.12" env: TOXENV: py - python-version: "3.13" env: TOXENV: py - python-version: "3.13" env: TOXENV: default-reactor # pinned deps - python-version: "3.10.11" env: TOXENV: pinned - python-version: "3.10.11" env: TOXENV: extra-deps-pinned - python-version: "3.13" env: TOXENV: extra-deps steps: - uses: actions/checkout@v5 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Run tests env: ${{ matrix.env }} run: | pip install -U tox tox - name: Upload coverage report uses: codecov/codecov-action@v5 - name: Upload test results if: ${{ !cancelled() }} uses: codecov/codecov-action@v5 with: report_type: test_results scrapy-scrapy-20bf2c6/.gitignore 0000664 0000000 0000000 00000000513 15131245267 0016715 0 ustar 00root root 0000000 0000000 /.vagrant /scrapy.iml *.pyc _trial_temp* dropin.cache docs/build *egg-info .tox/ venv/ .venv/ build/ dist/ .idea/ .vscode/ htmlcov/ .pytest_cache/ .coverage .coverage.* coverage.* *.junit.xml test-output.* .cache/ .mypy_cache/ /tests/keys/localhost.crt /tests/keys/localhost.key # Windows Thumbs.db # OSX miscellaneous .DS_Store scrapy-scrapy-20bf2c6/.pre-commit-config.yaml 0000664 0000000 0000000 00000001145 15131245267 0021210 0 ustar 00root root 0000000 0000000 exclude: | (?x)( ^docs/_static| ^docs/_tests| ^tests/sample_data ) repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.14.2 hooks: - id: ruff-check args: [ --fix ] - id: ruff-format - repo: https://github.com/adamchainz/blacken-docs rev: 1.20.0 hooks: - id: blacken-docs additional_dependencies: - black==25.9.0 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/sphinx-contrib/sphinx-lint rev: v1.0.2 hooks: - id: sphinx-lint scrapy-scrapy-20bf2c6/.readthedocs.yml 0000664 0000000 0000000 00000000601 15131245267 0020011 0 ustar 00root root 0000000 0000000 version: 2 formats: all sphinx: configuration: docs/conf.py fail_on_warning: true build: os: ubuntu-24.04 tools: # For available versions, see: # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python python: "3.13" # Keep in sync with .github/workflows/checks.yml python: install: - requirements: docs/requirements.txt - path: . scrapy-scrapy-20bf2c6/AUTHORS 0000664 0000000 0000000 00000002404 15131245267 0015776 0 ustar 00root root 0000000 0000000 Scrapy was brought to life by Shane Evans while hacking a scraping framework prototype for Mydeco (mydeco.com). It soon became maintained, extended and improved by Insophia (insophia.com), with the initial sponsorship of Mydeco to bootstrap the project. In mid-2011, Scrapinghub (now Zyte) became the new official maintainer. Here is the list of the primary authors & contributors: * Pablo Hoffman * Daniel Graña * Martin Olveyra * Gabriel García * Michael Cetrulo * Artem Bogomyagkov * Damian Canabal * Andres Moreira * Ismael Carnales * Matías Aguirre * German Hoffmann * Anibal Pacheco * Bruno Deferrari * Shane Evans * Ezequiel Rivero * Patrick Mezard * Rolando Espinoza * Ping Yin * Lucian Ursu * Shuaib Khan * Didier Deshommes * Vikas Dhiman * Jochen Maes * Darian Moody * Jordi Lonch * Zuhao Wan * Steven Almeroth * Tom Mortimer-Jones * Chris Tilden * Alexandr N Zamaraev * Emanuel Schorsch * Michal Danilak * Natan Lao * Hasnain Lakhani * Pedro Faustino * Alex Cepoi * Ilya Baryshev * Libor Nenadál * Jae-Myoung Yu * Vladislav Poluhin * Marc Abramowitz * Valentin-Costel Hăloiu * Jason Yeo * Сергей Прохоров * Simon Ratne * Julien Duponchelle * Jochen Maes * Vikas Dhiman * Juan Picca * Nicolás Ramírez scrapy-scrapy-20bf2c6/CODE_OF_CONDUCT.md 0000664 0000000 0000000 00000012555 15131245267 0017535 0 ustar 00root root 0000000 0000000 # Contributor Covenant Code of Conduct ## Our Pledge We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience * Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: * The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or email address, without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at opensource@zyte.com. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. ## Enforcement Guidelines Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: ### 1. Correction **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. ### 2. Warning **Community Impact**: A violation through a single incident or series of actions. **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. ### 3. Temporary Ban **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. **Consequence**: A permanent ban from any sort of public interaction within the community. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.1, available at [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. For answers to common questions about this code of conduct, see the FAQ at [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at [https://www.contributor-covenant.org/translations][translations]. [homepage]: https://www.contributor-covenant.org [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html [Mozilla CoC]: https://github.com/mozilla/diversity [FAQ]: https://www.contributor-covenant.org/faq [translations]: https://www.contributor-covenant.org/translations scrapy-scrapy-20bf2c6/CONTRIBUTING.md 0000664 0000000 0000000 00000000457 15131245267 0017165 0 ustar 00root root 0000000 0000000 The guidelines for contributing are available here: https://docs.scrapy.org/en/master/contributing.html Please do not abuse the issue tracker for support questions. If your issue topic can be rephrased to "How to ...?", please use the support channels to get it answered: https://scrapy.org/community/ scrapy-scrapy-20bf2c6/INSTALL.md 0000664 0000000 0000000 00000000233 15131245267 0016354 0 ustar 00root root 0000000 0000000 For information about installing Scrapy see: * [Local docs](docs/intro/install.rst) * [Online docs](https://docs.scrapy.org/en/latest/intro/install.html) scrapy-scrapy-20bf2c6/LICENSE 0000664 0000000 0000000 00000002755 15131245267 0015744 0 ustar 00root root 0000000 0000000 Copyright (c) Scrapy developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of Scrapy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scrapy-scrapy-20bf2c6/NEWS 0000664 0000000 0000000 00000000022 15131245267 0015417 0 ustar 00root root 0000000 0000000 See docs/news.rst scrapy-scrapy-20bf2c6/README.rst 0000664 0000000 0000000 00000004146 15131245267 0016422 0 ustar 00root root 0000000 0000000 |logo| .. |logo| image:: https://raw.githubusercontent.com/scrapy/scrapy/master/docs/_static/logo.svg :target: https://scrapy.org :alt: Scrapy :width: 480px |version| |python_version| |ubuntu| |macos| |windows| |coverage| |conda| |deepwiki| .. |version| image:: https://img.shields.io/pypi/v/Scrapy.svg :target: https://pypi.org/pypi/Scrapy :alt: PyPI Version .. |python_version| image:: https://img.shields.io/pypi/pyversions/Scrapy.svg :target: https://pypi.org/pypi/Scrapy :alt: Supported Python Versions .. |ubuntu| image:: https://github.com/scrapy/scrapy/workflows/Ubuntu/badge.svg :target: https://github.com/scrapy/scrapy/actions?query=workflow%3AUbuntu :alt: Ubuntu .. |macos| image:: https://github.com/scrapy/scrapy/workflows/macOS/badge.svg :target: https://github.com/scrapy/scrapy/actions?query=workflow%3AmacOS :alt: macOS .. |windows| image:: https://github.com/scrapy/scrapy/workflows/Windows/badge.svg :target: https://github.com/scrapy/scrapy/actions?query=workflow%3AWindows :alt: Windows .. |coverage| image:: https://img.shields.io/codecov/c/github/scrapy/scrapy/master.svg :target: https://codecov.io/github/scrapy/scrapy?branch=master :alt: Coverage report .. |conda| image:: https://anaconda.org/conda-forge/scrapy/badges/version.svg :target: https://anaconda.org/conda-forge/scrapy :alt: Conda Version .. |deepwiki| image:: https://deepwiki.com/badge.svg :target: https://deepwiki.com/scrapy/scrapy :alt: Ask DeepWiki Scrapy_ is a web scraping framework to extract structured data from websites. It is cross-platform, and requires Python 3.10+. It is maintained by Zyte_ (formerly Scrapinghub) and `many other contributors`_. .. _many other contributors: https://github.com/scrapy/scrapy/graphs/contributors .. _Scrapy: https://scrapy.org/ .. _Zyte: https://www.zyte.com/ Install with: .. code:: bash pip install scrapy And follow the documentation_ to learn how to use it. .. _documentation: https://docs.scrapy.org/en/latest/ If you wish to contribute, see Contributing_. .. _Contributing: https://docs.scrapy.org/en/master/contributing.html scrapy-scrapy-20bf2c6/SECURITY.md 0000664 0000000 0000000 00000000463 15131245267 0016522 0 ustar 00root root 0000000 0000000 # Security Policy ## Supported Versions | Version | Supported | | ------- | ------------------ | | 2.14.x | :white_check_mark: | | < 2.14.x | :x: | ## Reporting a Vulnerability Please report the vulnerability using https://github.com/scrapy/scrapy/security/advisories/new. scrapy-scrapy-20bf2c6/codecov.yml 0000664 0000000 0000000 00000000120 15131245267 0017064 0 ustar 00root root 0000000 0000000 comment: layout: "header, diff, tree" coverage: status: project: false scrapy-scrapy-20bf2c6/conftest.py 0000664 0000000 0000000 00000006156 15131245267 0017135 0 ustar 00root root 0000000 0000000 from __future__ import annotations import importlib from pathlib import Path from typing import TYPE_CHECKING import pytest from twisted.web.http import H2_ENABLED from scrapy.utils.reactor import set_asyncio_event_loop_policy from tests.keys import generate_keys from tests.mockserver.http import MockServer if TYPE_CHECKING: from collections.abc import Generator def _py_files(folder): return (str(p) for p in Path(folder).rglob("*.py")) collect_ignore = [ # may need extra deps "docs/_ext", # not a test, but looks like a test "scrapy/utils/testproc.py", "scrapy/utils/testsite.py", "tests/ftpserver.py", "tests/mockserver.py", "tests/pipelines.py", "tests/spiders.py", # contains scripts to be run by tests/test_crawler.py::AsyncCrawlerProcessSubprocess *_py_files("tests/AsyncCrawlerProcess"), # contains scripts to be run by tests/test_crawler.py::AsyncCrawlerRunnerSubprocess *_py_files("tests/AsyncCrawlerRunner"), # contains scripts to be run by tests/test_crawler.py::CrawlerProcessSubprocess *_py_files("tests/CrawlerProcess"), # contains scripts to be run by tests/test_crawler.py::CrawlerRunnerSubprocess *_py_files("tests/CrawlerRunner"), ] base_dir = Path(__file__).parent ignore_file_path = base_dir / "tests" / "ignores.txt" with ignore_file_path.open(encoding="utf-8") as reader: for line in reader: file_path = line.strip() if file_path and file_path[0] != "#": collect_ignore.append(file_path) if not H2_ENABLED: collect_ignore.extend( ( "scrapy/core/downloader/handlers/http2.py", *_py_files("scrapy/core/http2"), ) ) @pytest.fixture(scope="session") def mockserver() -> Generator[MockServer]: with MockServer() as mockserver: yield mockserver @pytest.fixture(scope="session") def reactor_pytest(request) -> str: return request.config.getoption("--reactor") def pytest_configure(config): if config.getoption("--reactor") == "asyncio": # Needed on Windows to switch from proactor to selector for Twisted reactor compatibility. # If we decide to run tests with both, we will need to add a new option and check it here. set_asyncio_event_loop_policy() def pytest_runtest_setup(item): # Skip tests based on reactor markers reactor = item.config.getoption("--reactor") if item.get_closest_marker("only_asyncio") and reactor != "asyncio": pytest.skip("This test is only run with --reactor=asyncio") if item.get_closest_marker("only_not_asyncio") and reactor == "asyncio": pytest.skip("This test is only run without --reactor=asyncio") # Skip tests requiring optional dependencies optional_deps = [ "uvloop", "botocore", "boto3", "mitmproxy", ] for module in optional_deps: if item.get_closest_marker(f"requires_{module}"): try: importlib.import_module(module) except ImportError: pytest.skip(f"{module} is not installed") # Generate localhost certificate files, needed by some tests generate_keys() scrapy-scrapy-20bf2c6/docs/ 0000775 0000000 0000000 00000000000 15131245267 0015656 5 ustar 00root root 0000000 0000000 scrapy-scrapy-20bf2c6/docs/Makefile 0000664 0000000 0000000 00000001171 15131245267 0017316 0 ustar 00root root 0000000 0000000 # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) scrapy-scrapy-20bf2c6/docs/README.rst 0000664 0000000 0000000 00000003055 15131245267 0017350 0 ustar 00root root 0000000 0000000 :orphan: ====================================== Scrapy documentation quick start guide ====================================== This file provides a quick guide on how to compile the Scrapy documentation. Setup the environment --------------------- To compile the documentation you need Sphinx Python library. To install it and all its dependencies run the following command from this dir :: pip install -r requirements.txt Compile the documentation ------------------------- To compile the documentation (to classic HTML output) run the following command from this dir:: make html Documentation will be generated (in HTML format) inside the ``build/html`` dir. View the documentation ---------------------- To view the documentation run the following command:: make htmlview This command will fire up your default browser and open the main page of your (previously generated) HTML documentation. Start over ---------- To clean up all generated documentation files and start from scratch run:: make clean Keep in mind that this command won't touch any documentation source files. Recreating documentation on the fly ----------------------------------- There is a way to recreate the doc automatically when you make changes, you need to install watchdog (``pip install watchdog``) and then use:: make watch Alternative method using tox ---------------------------- To compile the documentation to HTML run the following command:: tox -e docs Documentation will be generated (in HTML format) inside the ``.tox/docs/tmp/html`` dir. scrapy-scrapy-20bf2c6/docs/_ext/ 0000775 0000000 0000000 00000000000 15131245267 0016615 5 ustar 00root root 0000000 0000000 scrapy-scrapy-20bf2c6/docs/_ext/scrapydocs.py 0000664 0000000 0000000 00000011434 15131245267 0021344 0 ustar 00root root 0000000 0000000 # pylint: disable=import-error from collections.abc import Sequence from operator import itemgetter from typing import Any, TypedDict from docutils import nodes from docutils.nodes import Element, General, Node, document from docutils.parsers.rst import Directive from sphinx.application import Sphinx from sphinx.util.nodes import make_refnode class SettingData(TypedDict): docname: str setting_name: str refid: str class SettingslistNode(General, Element): pass class SettingsListDirective(Directive): def run(self) -> Sequence[Node]: return [SettingslistNode()] def is_setting_index(node: Node) -> bool: if node.tagname == "index" and node["entries"]: # type: ignore[index,attr-defined] # index entries for setting directives look like: # [('pair', 'SETTING_NAME; setting', 'std:setting-SETTING_NAME', '')] entry_type, info, _ = node["entries"][0][:3] # type: ignore[index] return entry_type == "pair" and info.endswith("; setting") return False def get_setting_name_and_refid(node: Node) -> tuple[str, str]: """Extract setting name from directive index node""" _, info, refid = node["entries"][0][:3] # type: ignore[index] return info.replace("; setting", ""), refid def collect_scrapy_settings_refs(app: Sphinx, doctree: document) -> None: env = app.builder.env if not hasattr(env, "scrapy_all_settings"): emptyList: list[SettingData] = [] env.scrapy_all_settings = emptyList # type: ignore[attr-defined] for node in doctree.findall(is_setting_index): setting_name, refid = get_setting_name_and_refid(node) env.scrapy_all_settings.append( # type: ignore[attr-defined] SettingData( docname=env.docname, setting_name=setting_name, refid=refid, ) ) def make_setting_element( setting_data: SettingData, app: Sphinx, fromdocname: str ) -> Any: refnode = make_refnode( app.builder, fromdocname, todocname=setting_data["docname"], targetid=setting_data["refid"], child=nodes.Text(setting_data["setting_name"]), ) p = nodes.paragraph() p += refnode item = nodes.list_item() item += p return item def replace_settingslist_nodes( app: Sphinx, doctree: document, fromdocname: str ) -> None: env = app.builder.env for node in doctree.findall(SettingslistNode): settings_list = nodes.bullet_list() settings_list.extend( [ make_setting_element(d, app, fromdocname) for d in sorted(env.scrapy_all_settings, key=itemgetter("setting_name")) # type: ignore[attr-defined] if fromdocname != d["docname"] ] ) node.replace_self(settings_list) def source_role( name, rawtext, text: str, lineno, inliner, options=None, content=None ) -> tuple[list[Any], list[Any]]: ref = "https://github.com/scrapy/scrapy/blob/master/" + text node = nodes.reference(rawtext, text, refuri=ref, **options) return [node], [] def issue_role( name, rawtext, text: str, lineno, inliner, options=None, content=None ) -> tuple[list[Any], list[Any]]: ref = "https://github.com/scrapy/scrapy/issues/" + text node = nodes.reference(rawtext, "issue " + text, refuri=ref) return [node], [] def commit_role( name, rawtext, text: str, lineno, inliner, options=None, content=None ) -> tuple[list[Any], list[Any]]: ref = "https://github.com/scrapy/scrapy/commit/" + text node = nodes.reference(rawtext, "commit " + text, refuri=ref) return [node], [] def rev_role( name, rawtext, text: str, lineno, inliner, options=None, content=None ) -> tuple[list[Any], list[Any]]: ref = "http://hg.scrapy.org/scrapy/changeset/" + text node = nodes.reference(rawtext, "r" + text, refuri=ref) return [node], [] def setup(app: Sphinx) -> None: app.add_crossref_type( directivename="setting", rolename="setting", indextemplate="pair: %s; setting", ) app.add_crossref_type( directivename="signal", rolename="signal", indextemplate="pair: %s; signal", ) app.add_crossref_type( directivename="command", rolename="command", indextemplate="pair: %s; command", ) app.add_crossref_type( directivename="reqmeta", rolename="reqmeta", indextemplate="pair: %s; reqmeta", ) app.add_role("source", source_role) app.add_role("commit", commit_role) app.add_role("issue", issue_role) app.add_role("rev", rev_role) app.add_node(SettingslistNode) app.add_directive("settingslist", SettingsListDirective) app.connect("doctree-read", collect_scrapy_settings_refs) app.connect("doctree-resolved", replace_settingslist_nodes) scrapy-scrapy-20bf2c6/docs/_ext/scrapyfixautodoc.py 0000664 0000000 0000000 00000001074 15131245267 0022560 0 ustar 00root root 0000000 0000000 """ Must be included after 'sphinx.ext.autodoc'. Fixes unwanted 'alias of' behavior. https://github.com/sphinx-doc/sphinx/issues/4422 """ # pylint: disable=import-error from sphinx.application import Sphinx def maybe_skip_member(app: Sphinx, what, name: str, obj, skip: bool, options) -> bool: if not skip: # autodocs was generating a text "alias of" for the following members return name in {"default_item_class", "default_selector_class"} return skip def setup(app: Sphinx) -> None: app.connect("autodoc-skip-member", maybe_skip_member) scrapy-scrapy-20bf2c6/docs/_static/ 0000775 0000000 0000000 00000000000 15131245267 0017304 5 ustar 00root root 0000000 0000000 scrapy-scrapy-20bf2c6/docs/_static/custom.css 0000664 0000000 0000000 00000003227 15131245267 0021334 0 ustar 00root root 0000000 0000000 /* Move lists closer to their introducing paragraph */ .rst-content .section ol p, .rst-content .section ul p { margin-bottom: 0px; } .rst-content p + ol, .rst-content p + ul { margin-top: -18px; /* Compensates margin-top: 24px of p */ } .rst-content dl p + ol, .rst-content dl p + ul { margin-top: -6px; /* Compensates margin-top: 12px of p */ } /*override some styles in sphinx-rtd-dark-mode/static/dark_mode_css/general.css*/ .theme-switcher { right: 0.4em !important; top: 0.6em !important; -webkit-box-shadow: 0px 3px 14px 4px rgba(0, 0, 0, 0.30) !important; box-shadow: 0px 3px 14px 4px rgba(0, 0, 0, 0.30) !important; height: 2em !important; width: 2em !important; } /*place the toggle button for dark mode at the bottom right corner on small screens*/ @media (max-width: 768px) { .theme-switcher { right: 0.4em !important; bottom: 2.6em !important; top: auto !important; } } /*persist blue color at the top left used in default rtd theme*/ html[data-theme="dark"] .wy-side-nav-search, html[data-theme="dark"] .wy-nav-top { background-color: #1d577d !important; } /*all the styles below used to present API objects nicely in dark mode*/ html[data-theme="dark"] .sig.sig-object { border-left-color: #3e4446 !important; background-color: #202325 !important } html[data-theme="dark"] .sig-name, html[data-theme="dark"] .sig-prename, html[data-theme="dark"] .property, html[data-theme="dark"] .sig-param, html[data-theme="dark"] .sig-paren, html[data-theme="dark"] .sig-return-icon, html[data-theme="dark"] .sig-return-typehint, html[data-theme="dark"] .optional { color: #e8e6e3 !important } scrapy-scrapy-20bf2c6/docs/_static/logo.svg 0000664 0000000 0000000 00000016737 15131245267 0021003 0 ustar 00root root 0000000 0000000 scrapy-scrapy-20bf2c6/docs/_static/selectors-sample1.html 0000664 0000000 0000000 00000001233 15131245267 0023534 0 ustar 00root root 0000000 0000000