././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1746550323.936221 parfive-2.2.0/0000755000175100001660000000000015006437064012557 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.codecov.yaml0000644000175100001660000000011015006437052015130 0ustar00runnerdockercoverage: status: project: default: threshold: 0.2% ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.coveragerc0000644000175100001660000000125615006437052014701 0ustar00runnerdocker[run] omit = parfive/__init__* parfive/_dev/* parfive/*setup* parfive/conftest.py parfive/tests/* parfive/version.py */parfive/__init__* */parfive/_dev/* */parfive/*setup* */parfive/conftest.py */parfive/tests/* */parfive/version.py [report] exclude_lines = # Have to re-enable the standard pragma pragma: no cover # Don't complain about packages we have installed except ImportError # Don't complain if tests don't hit assertions raise AssertionError raise NotImplementedError # Don't complain about script hooks def main\(.*\): # Ignore branches that don't pertain to this version of Python pragma: py{ignore_python_version} ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1746550323.929221 parfive-2.2.0/.github/0000755000175100001660000000000015006437064014117 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.github/FUNDING.yml0000644000175100001660000000012015006437052015722 0ustar00runnerdocker# These are supported funding model platforms github: Cadair liberapay: Cadair ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.github/release-drafter.yml0000644000175100001660000000107415006437052017706 0ustar00runnerdockerexclude-contributors: - 'pre-commit-ci[bot]' name-template: 'v$NEXT_MINOR_VERSION' tag-template: 'v$NEXT_MINOR_VERSION' categories: - title: 'Breaking Changes' labels: - 'breaking' - title: 'Enhancements' labels: - 'enhancement' - title: 'Bug Fixes' labels: - 'bug' - title: 'Documentation and code quality' labels: - 'documentation' - title: 'Misc/Internal Changes' labels: - 'misc' exclude-labels: - "no changelog entry" change-template: '- $TITLE @$AUTHOR (#$NUMBER)' template: | ## Changes $CHANGES ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1746550323.929221 parfive-2.2.0/.github/workflows/0000755000175100001660000000000015006437064016154 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.github/workflows/ci_workflows.yml0000644000175100001660000000203515006437052021404 0ustar00runnerdockername: CI on: push: branches: - 'main' - '*.*' - '!*backport*' tags: - 'v*' - '!*dev*' - '!*pre*' - '!*post*' pull_request: # Allow manual runs through the web UI workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: tests: uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1 with: envs: | - linux: py39 - macos: py310 - windows: py311 - linux: py312 - linux: py313 - linux: py313-devdeps - linux: py312-conda toxdeps: tox-conda - linux: mypy pytest: false coverage: 'codecov' secrets: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} publish: needs: tests uses: OpenAstronomy/github-actions-workflows/.github/workflows/publish_pure_python.yml@v1 with: test_extras: tests,ftp test_command: pytest --pyargs parfive secrets: pypi_token: ${{ secrets.pypi_token }} ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.github/workflows/release-drafter.yml0000644000175100001660000000043415006437052021742 0ustar00runnerdockername: Release Drafter on: push: branches: - main jobs: update_release_draft: permissions: contents: write runs-on: ubuntu-latest steps: - uses: release-drafter/release-drafter@v6 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.gitignore0000644000175100001660000000101415006437052014540 0ustar00runnerdocker*.py[cod] .eggs/** # C extensions *.so # Packages *.egg *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 __pycache__ # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox nosetests.xml # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject docs/_build/ docs/api/ htmlcov/ .vscode/ .history pip-wheel-metadata/ parfive/tests/.ipynb_checkpoints/ parfive/tests/predicted-sunspot-radio-flux.txt parfive/_version.py coverage.xml *undo-tree* ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.pre-commit-config.yaml0000644000175100001660000000114015006437052017031 0ustar00runnerdocker# See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: # This should be before any formatting hooks like isort - repo: https://github.com/astral-sh/ruff-pre-commit rev: "v0.11.8" hooks: - id: ruff args: ["--fix"] - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: check-ast - id: check-case-conflict - id: trailing-whitespace - id: check-yaml - id: debug-statements - id: check-added-large-files - id: end-of-file-fixer - id: mixed-line-ending ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.readthedocs.yml0000644000175100001660000000064115006437052015643 0ustar00runnerdocker# .readthedocs.yml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 build: os: ubuntu-22.04 tools: python: "3.10" apt_packages: - graphviz sphinx: builder: html configuration: docs/conf.py fail_on_warning: true python: install: - method: pip path: . extra_requirements: - docs - ftp ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/.ruff.toml0000644000175100001660000000430015006437052014466 0ustar00runnerdockertarget-version = "py39" line-length = 110 exclude = [ ".git,", "__pycache__", "parfive/version.py", ] [lint] select = [ "E", "F", "W", "UP", "PT", "BLE", "A", "C4", "ICN", "G", "INP", "Q", "RSE", "PIE", "T20", "RET", "TID", "PTH", "PD", "PLC", "PLE", "FLY", "I", "NPY", "PERF", "RUF", ] extend-ignore = [ # pycodestyle (E, W) "E501", # ignore line length will use a formatter instead "E702", "E711", "E741", "F405", "F403", "F821", "F841", # pyupgrade (UP) "UP038", # Use | in isinstance - not compatible with models and is slower # pytest (PT) "PT001", # Always use pytest.fixture() "PT023", # Always use () on pytest decorators # Returns (RET) "RET502", # Do not implicitly return None in function able to return non-None value "RET503", # Missing explicit return at the end of function able to return non-None value # flake8-pie (PIE) "PIE808", # Disallow passing 0 as the first argument to range # flake8-use-pathlib (PTH) "PTH123", # open() should be replaced by Path.open() # Ruff (RUF) "RUF003", # Ignore ambiguous quote marks, doesn't allow ' in comments "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar` "RUF013", # PEP 484 prohibits implicit `Optional` "RUF015", # Prefer `next(iter(...))` over single element slice ] [lint.per-file-ignores] "setup.py" = [ "INP001", # File is part of an implicit namespace package. ] "conftest.py" = [ "INP001", # File is part of an implicit namespace package. ] "docs/conf.py" = [ "E402", # Module imports not at top of file "A001", "BLE001", "T201", "I", ] "docs/*.py" = [ "INP001", # File is part of an implicit namespace package. ] "__init__.py" = [ "E402", # Module level import not at top of cell "F401", # Unused import "F403", # from {name} import * used; unable to detect undefined names "F405", # {name} may be undefined, or defined from star imports ] "test_*.py" = [ "E402", # Module level import not at top of cell "B011", "D", "E402", "PGH001", "S101", ] "parfive/main.py" = [ "T201", # Allow print in a script ] [lint.pydocstyle] convention = "numpy" [lint.isort] known-first-party = ["parfive"] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/LICENSE0000644000175100001660000000204715006437052013564 0ustar00runnerdockerCopyright (c) 2017-2020 Stuart Mumford Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1746550323.936221 parfive-2.2.0/PKG-INFO0000644000175100001660000001272415006437064013662 0ustar00runnerdockerMetadata-Version: 2.4 Name: parfive Version: 2.2.0 Summary: A HTTP and FTP parallel file downloader. Author-email: Stuart Mumford License: Copyright (c) 2017-2020 Stuart Mumford Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Project-URL: repository, https://parfive.readthedocs.io/ Classifier: Development Status :: 5 - Production/Stable Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 Requires-Python: >=3.9 Description-Content-Type: text/x-rst License-File: LICENSE Requires-Dist: tqdm>=4.27.0 Requires-Dist: aiohttp Provides-Extra: ftp Requires-Dist: aioftp>=0.17.1; extra == "ftp" Provides-Extra: tests Requires-Dist: pytest; extra == "tests" Requires-Dist: pytest-localserver; extra == "tests" Requires-Dist: pytest-asyncio; extra == "tests" Requires-Dist: pytest-socket; extra == "tests" Requires-Dist: pytest-cov; extra == "tests" Requires-Dist: aiofiles; extra == "tests" Provides-Extra: docs Requires-Dist: sphinx; extra == "docs" Requires-Dist: sphinx-automodapi; extra == "docs" Requires-Dist: sphinx-autodoc-typehints; extra == "docs" Requires-Dist: sphinx-contributors; extra == "docs" Requires-Dist: sphinx-book-theme; extra == "docs" Dynamic: license-file Parfive ======= .. image:: https://img.shields.io/pypi/v/parfive.svg :target: https://pypi.python.org/pypi/parfive :alt: Latest PyPI version A parallel file downloader using asyncio. parfive can handle downloading multiple files in parallel as well as downloading each file in a number of chunks. Usage ----- .. image:: https://asciinema.org/a/EuALahgkiicWHGmrfFsZSLz81.svg :alt: asciicast demo of parfive :target: https://asciinema.org/a/EuALahgkiicWHGmrfFsZSLz81 parfive works by creating a downloader object, appending files to it and then running the download. parfive has a synchronous API, but uses asyncio to paralellise downloading the files. A simple example is:: from parfive import Downloader dl = Downloader() dl.enqueue_file("http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt", path="./") files = dl.download() Parfive also bundles a CLI. The following example will download the two files concurrently.:: $ parfive 'http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip' $ parfive --help usage: parfive [-h] [--max-conn MAX_CONN] [--overwrite] [--no-file-progress] [--directory DIRECTORY] [--print-filenames] URLS [URLS ...] Parfive, the python asyncio based downloader positional arguments: URLS URLs of files to be downloaded. optional arguments: -h, --help show this help message and exit --max-conn MAX_CONN Number of maximum connections. --overwrite Overwrite if the file exists. --no-file-progress Show progress bar for each file. --directory DIRECTORY Directory to which downloaded files are saved. --print-filenames Print successfully downloaded files's names to stdout. Results ^^^^^^^ ``parfive.Downloader.download`` returns a ``parfive.Results`` object, which is a list of the filenames that have been downloaded. It also tracks any files which failed to download. Handling Errors ^^^^^^^^^^^^^^^ If files fail to download, the urls and the response from the server are stored in the ``Results`` object returned by ``parfive.Downloader``. These can be used to inform users about the errors. (Note, the progress bar will finish in an incomplete state if a download fails, i.e. it will show ``4/5 Files Downloaded``). The ``Results`` object is a list with an extra attribute ``errors``, this property returns a list of named tuples, where these named tuples contains the ``.url`` and the ``.response``, which is a ``aiohttp.ClientResponse`` or a ``aiohttp.ClientError`` object. Installation ------------ parfive is available on PyPI, you can install it with pip:: pip install parfive or if you want to use FTP downloads:: pip install parfive[ftp] Requirements ^^^^^^^^^^^^ - Python 3.9 or above - aiohttp - tqdm - aioftp (for downloads over FTP) Licence ------- MIT Licensed Authors ------- `parfive` was written by `Stuart Mumford `__. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/README.rst0000644000175100001660000000570415006437052014251 0ustar00runnerdockerParfive ======= .. image:: https://img.shields.io/pypi/v/parfive.svg :target: https://pypi.python.org/pypi/parfive :alt: Latest PyPI version A parallel file downloader using asyncio. parfive can handle downloading multiple files in parallel as well as downloading each file in a number of chunks. Usage ----- .. image:: https://asciinema.org/a/EuALahgkiicWHGmrfFsZSLz81.svg :alt: asciicast demo of parfive :target: https://asciinema.org/a/EuALahgkiicWHGmrfFsZSLz81 parfive works by creating a downloader object, appending files to it and then running the download. parfive has a synchronous API, but uses asyncio to paralellise downloading the files. A simple example is:: from parfive import Downloader dl = Downloader() dl.enqueue_file("http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt", path="./") files = dl.download() Parfive also bundles a CLI. The following example will download the two files concurrently.:: $ parfive 'http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip' $ parfive --help usage: parfive [-h] [--max-conn MAX_CONN] [--overwrite] [--no-file-progress] [--directory DIRECTORY] [--print-filenames] URLS [URLS ...] Parfive, the python asyncio based downloader positional arguments: URLS URLs of files to be downloaded. optional arguments: -h, --help show this help message and exit --max-conn MAX_CONN Number of maximum connections. --overwrite Overwrite if the file exists. --no-file-progress Show progress bar for each file. --directory DIRECTORY Directory to which downloaded files are saved. --print-filenames Print successfully downloaded files's names to stdout. Results ^^^^^^^ ``parfive.Downloader.download`` returns a ``parfive.Results`` object, which is a list of the filenames that have been downloaded. It also tracks any files which failed to download. Handling Errors ^^^^^^^^^^^^^^^ If files fail to download, the urls and the response from the server are stored in the ``Results`` object returned by ``parfive.Downloader``. These can be used to inform users about the errors. (Note, the progress bar will finish in an incomplete state if a download fails, i.e. it will show ``4/5 Files Downloaded``). The ``Results`` object is a list with an extra attribute ``errors``, this property returns a list of named tuples, where these named tuples contains the ``.url`` and the ``.response``, which is a ``aiohttp.ClientResponse`` or a ``aiohttp.ClientError`` object. Installation ------------ parfive is available on PyPI, you can install it with pip:: pip install parfive or if you want to use FTP downloads:: pip install parfive[ftp] Requirements ^^^^^^^^^^^^ - Python 3.9 or above - aiohttp - tqdm - aioftp (for downloads over FTP) Licence ------- MIT Licensed Authors ------- `parfive` was written by `Stuart Mumford `__. ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1746550323.9302208 parfive-2.2.0/docs/0000755000175100001660000000000015006437064013507 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/docs/Makefile0000644000175100001660000000110515006437052015141 0ustar00runnerdocker# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/docs/conf.py0000644000175100001660000000775315006437052015017 0ustar00runnerdocker""" Configuration file for the Sphinx documentation builder. isort:skip_file """ # flake8: NOQA: E402 # -- stdlib imports ------------------------------------------------------------ from parfive import __version__ import datetime from packaging.version import Version # -- Project information ------------------------------------------------------- project = "Parfive" author = "Stuart Mumford and Contributors" copyright = f"{datetime.datetime.now().year}, {author}" # The full version, including alpha/beta/rc tags release = __version__ parfive_version = Version(__version__) is_release = not (parfive_version.is_prerelease or parfive_version.is_devrelease) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.coverage", "sphinx.ext.doctest", "sphinx.ext.inheritance_diagram", "sphinx.ext.intersphinx", "sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx.ext.todo", "sphinx.ext.viewcode", "sphinx_autodoc_typehints", # must be loaded after napoleon "sphinx_automodapi.automodapi", "sphinx_automodapi.smart_resolver", "sphinx_contributors", ] # Add any paths that contain templates here, relative to this directory. # templates_path = ['_templates'] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. html_extra_path = ["robots.txt"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: source_suffix = ".rst" # The master toctree document. master_doc = "index" # The reST default role (used for this markup: `text`) to use for all # documents. Set to the "smart" one. default_role = "obj" # Disable having a separate return type row napoleon_use_rtype = False # Disable google style docstrings napoleon_google_docstring = False # Type Hint Config typehints_fully_qualified = False typehints_use_rtype = napoleon_use_rtype typehints_defaults = "comma" # -- Options for intersphinx extension ----------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { "python": ("https://docs.python.org/", None), "aiohttp": ("https://docs.aiohttp.org/en/stable", None), "aioftp": ("https://aioftp.readthedocs.io/", None), } # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "sphinx_book_theme" html_theme_options = { "home_page_in_toc": True, "repository_url": "https://github.com/Cadair/parfive", "use_repository_button": True, "use_issues_button": True, "use_download_button": False, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["static"] html_css_files = [ "css/contributors.css", ] html_js_files = [ ( "//gc.zgo.at/count.js", {"async": "async", "data-goatcounter": "https://parfive.goatcounter.com/count"}, ) ] # Render inheritance diagrams in SVG graphviz_output_format = "svg" graphviz_dot_args = [ "-Nfontsize=10", "-Nfontname=Helvetica Neue, Helvetica, Arial, sans-serif", "-Efontsize=10", "-Efontname=Helvetica Neue, Helvetica, Arial, sans-serif", "-Gfontsize=10", "-Gfontname=Helvetica Neue, Helvetica, Arial, sans-serif", ] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/docs/index.rst0000644000175100001660000001344615006437052015355 0ustar00runnerdocker.. currentmodule:: parfive .. _parfive: ======= Parfive ======= Parfive is a small library for downloading files, its objective is to provide a simple API for queuing files for download and then providing excellent feedback to the user about the in progress downloads. It also aims to provide a clear interface for inspecting any failed downloads. The parfive package was motivated by the needs of `SunPy's `__ ``net`` submodule, but should be generally applicable to anyone who wants a user friendly way of downloading multiple files in parallel. Parfive uses asyncio to support downloading multiple files in parallel, and to support downloading a single file in multiple parallel chunks. Parfive supports downloading files over either HTTP or FTP using `aiohttp `__ and `aioftp `__ (``aioftp`` is an optional dependency, which does not need to be installed to download files over HTTP). Parfive provides both a function and coroutine interface, so that it can be used from both synchronous and asynchronous code. It also has opt-in support for using `aiofiles `__ to write downloaded data to disk using a separate thread pool, which may be useful if you are using parfive from within an asyncio application. Installation ------------ parfive can be installed via pip:: pip install parfive or with FTP support:: pip install parfive[ftp] or with conda from conda-forge:: conda install -c conda-forge parfive or from `GitHub `__. Usage ----- Parfive works by creating a downloader object, queuing downloads with it and then running the download. A simple example is:: from parfive import Downloader dl = Downloader() dl.enqueue_file("http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt", path="./") files = dl.download() It's also possible to download a list of URLs to a single destination using the `Downloader.simple_download ` method:: from parfive import Downloader files = Downloader.simple_download(['http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip'], path="./") Parfive also bundles a CLI. The following example will download the two files concurrently:: $ parfive 'http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip' $ parfive --help usage: parfive [-h] [--max-conn MAX_CONN] [--overwrite] [--no-file-progress] [--directory DIRECTORY] [--print-filenames] URLS [URLS ...] Parfive, the python asyncio based downloader positional arguments: URLS URLs of files to be downloaded. optional arguments: -h, --help show this help message and exit --max-conn MAX_CONN Number of maximum connections. --overwrite Overwrite if the file exists. --no-file-progress Show progress bar for each file. --directory DIRECTORY Directory to which downloaded files are saved. --print-filenames Print successfully downloaded files's names to stdout. Options and Customisation ------------------------- Parfive aims to support as many use cases as possible, and therefore has a number of options. There are two main points where you can customise the behaviour of the downloads, in the initialiser to `parfive.Downloader` or when adding a URL to the download queue with `~parfive.Downloader.enqueue_file`. The arguments to the ``Downloader()`` constructor affect all files transferred, and the arguments to ``enqueue_file()`` apply to only that file. By default parfive will transfer 5 files in parallel and, if supported by the remote server, chunk those files and download 5 chunks simultaneously. This behaviour is controlled by the ``max_conn=`` and ``max_splits=`` keyword arguments. Further configuration of the ``Downloader`` instance is done by passing in a `parfive.SessionConfig` object as the ``config=`` keyword argument to ``Downloader()``. See the documentation of that class for more details. Keyword arguments to `~parfive.Downloader.enqueue_file` are passed through to either `aiohttp.ClientSession.get` for HTTP downloads or `aioftp.Client` for FTP downloads. This gives you many per-file options such as headers, authentication, ssl options etc. Parfive API ----------- .. automodapi:: parfive :no-heading: :no-main-docstr: Environment Variables --------------------- Parfive reads the following environment variables, note that as of version 2.0 all environment variables are read at the point where the ``Downloader()`` class is instantiated. * ``PARFIVE_SINGLE_DOWNLOAD`` - If set to ``"True"`` this variable sets ``max_conn`` and ``max_splits`` to one; meaning that no parallelisation of the downloads will occur. * ``PARFIVE_DISABLE_RANGE`` - If set to ``"True"`` this variable will set ``max_splits`` to one; meaning that each file downloaded will only have one concurrent connection, although multiple files may be downloaded simultaneously. * ``PARFIVE_OVERWRITE_ENABLE_AIOFILES`` - If set to ``"True"`` and aiofiles is installed in the system, aiofiles will be used to write files to disk. * ``PARFIVE_DEBUG`` - If set to ``"True"`` will configure the built-in Python logger to log to stderr and set parfive, aiohttp and aioftp to debug levels. * ``PARFIVE_HIDE_PROGESS`` - If set to ``"True"`` no progress bars will be shown. * ``PARFIVE_TOTAL_TIMEOUT`` - Overrides the default aiohttp ``total`` timeout value (unless set in Python). * ``PARFIVE_SOCK_READ_TIMEOUT`` - Overrides the default aiohttp ``sock_read`` timeout value (unless set in Python). Contributors ------------ .. contributors:: Cadair/parfive :avatars: :exclude: pre-commit-ci[bot] :order: ASC Changelog --------- See `GitHub Releases `__ for the release history and changelog. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/docs/robots.txt0000644000175100001660000000032015006437052015550 0ustar00runnerdockerUser-agent: * Allow: /*/latest/ Allow: /en/latest/ # Fallback for bots that don't understand wildcards Allow: /*/stable/ Allow: /en/stable/ # Fallback for bots that don't understand wildcards Disallow: / ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1746550323.926221 parfive-2.2.0/docs/static/0000755000175100001660000000000015006437064014776 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1746550323.9302208 parfive-2.2.0/docs/static/css/0000755000175100001660000000000015006437064015566 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/docs/static/css/contributors.css0000644000175100001660000000061415006437052021033 0ustar00runnerdocker.sphinx-contributors img { border-radius: 50%; } .sphinx-contributors_list { padding-left: 0; } .sphinx-contributors_list__item { padding-right: 0.75em; padding-left: 0.75em; } .sphinx-contributors--avatars .sphinx-contributors_contributor__image { max-width: 100px; } .sphinx-contributors_contributor { width: initial; } .sphinx-contributors { width: initial; } ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/mypy.ini0000644000175100001660000000073515006437052014260 0ustar00runnerdocker[mypy] plugins = pydantic.mypy [mypy-parfive.utils] disallow_untyped_defs = True # Ignore the autogenerated version file [mypy-parfive._version] ignore_missing_imports = True [mypy-tqdm.*] ignore_missing_imports = True [mypy-pytest_localserver.*] ignore_missing_imports = True [mypy-aioftp.*] ignore_missing_imports = True [mypy-aiohttp.*] ignore_missing_imports = True [mypy-pytest.*] ignore_missing_imports = True [mypy-pytest_socket.*] ignore_missing_imports = True ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1746550323.932221 parfive-2.2.0/parfive/0000755000175100001660000000000015006437064014213 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/__init__.py0000644000175100001660000000101715006437052016320 0ustar00runnerdocker""" ******* parfive ******* A parallel file downloader using asyncio. * Documentation: https://parfive.readthedocs.io/en/stable/ * Source code: https://github.com/Cadair/parfive """ import logging as _logging from .config import SessionConfig from .downloader import Downloader from .results import Results __all__ = ["Downloader", "Results", "SessionConfig", "__version__", "log"] try: from ._version import version as __version__ except ImportError: __version__ = "unknown" log = _logging.getLogger("parfive") ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550323.0 parfive-2.2.0/parfive/_version.py0000644000175100001660000000077715006437063016423 0ustar00runnerdocker# file generated by setuptools-scm # don't change, don't track in version control __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"] TYPE_CHECKING = False if TYPE_CHECKING: from typing import Tuple from typing import Union VERSION_TUPLE = Tuple[Union[int, str], ...] else: VERSION_TUPLE = object version: str __version__: str __version_tuple__: VERSION_TUPLE version_tuple: VERSION_TUPLE __version__ = version = '2.2.0' __version_tuple__ = version_tuple = (2, 2, 0) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/config.py0000644000175100001660000001722115006437052016032 0ustar00runnerdockerimport importlib.util import os import platform import warnings from collections.abc import Iterable from typing import Callable, Optional, Union try: from typing import Literal # Added in Python 3.8 except ImportError: from typing_extensions import Literal # type: ignore from dataclasses import dataclass, field import aiohttp import parfive from parfive.utils import ParfiveUserWarning __all__ = ["DownloaderConfig", "SessionConfig"] def _default_headers(): return { "User-Agent": f"parfive/{parfive.__version__}" f" aiohttp/{aiohttp.__version__}" f" python/{platform.python_version()}" } def _default_aiohttp_session(config: "SessionConfig") -> aiohttp.ClientSession: """ The aiohttp session with the kwargs stored by this config. Notes ----- `aiohttp.ClientSession` expects to be instantiated in a asyncio context where it can get a running loop. """ return aiohttp.ClientSession(headers=config.headers, requote_redirect_url=False) @dataclass class EnvConfig: """ Configuration read from environment variables. """ # Session scoped env vars serial_mode: bool = field(default=False, init=False) disable_range: bool = field(default=False, init=False) hide_progress: bool = field(default=False, init=False) debug_logging: bool = field(default=False, init=False) timeout_total: float = field(default=0, init=False) timeout_sock_read: float = field(default=90, init=False) override_use_aiofiles: bool = field(default=False, init=False) def __post_init__(self): self.serial_mode = "PARFIVE_SINGLE_DOWNLOAD" in os.environ self.disable_range = "PARFIVE_DISABLE_RANGE" in os.environ self.hide_progress = "PARFIVE_HIDE_PROGRESS" in os.environ self.debug_logging = "PARFIVE_DEBUG" in os.environ self.timeout_total = float(os.environ.get("PARFIVE_TOTAL_TIMEOUT", 0)) self.timeout_sock_read = float(os.environ.get("PARFIVE_SOCK_READ_TIMEOUT", 90)) self.override_use_aiofiles = "PARFIVE_OVERWRITE_ENABLE_AIOFILES" in os.environ @dataclass class SessionConfig: """ Configuration options for `parfive.Downloader`. """ http_proxy: Optional[str] = None """ The URL of a proxy to use for HTTP requests. Will default to the value of the ``HTTP_PROXY`` env var. """ https_proxy: Optional[str] = None """ The URL of a proxy to use for HTTPS requests. Will default to the value of the ``HTTPS_PROXY`` env var. """ headers: Optional[dict[str, str]] = field(default_factory=_default_headers) """ Headers to be passed to all requests made by this session. These headers are passed to the `aiohttp.ClientSession` along with ``aiohttp_session_kwargs``. The default value for headers is setting the user agent to a string with the version of parfive, aiohttp and Python. To use aiohttp's default headers set this argument to an empty dictionary. """ chunksize: float = 1024 """ The default chunksize to be used for transfers over HTTP. """ file_progress: bool = True """ If `True` (the default) a progress bar will be shown (if any progress bars are shown) for every file, in addition for one showing progress of downloading all file. """ notebook: Union[bool, None] = None """ Override automatic detection of Jupyter notebook for drawing progress bars. If `None` `tqdm` will automatically detect if it can draw rich notebook progress bars. If `False` or `True` notebook mode will be forced off or on. """ log_level: Optional[str] = None """ If not `None` configure the logger to log to stderr with this log level. """ use_aiofiles: Optional[bool] = False """ Enables using `aiofiles` to write files to disk in their own thread pool. The default value is `False`. This argument will be overridden by the ``PARFIVE_OVERWRITE_ENABLE_AIOFILES`` environment variable. If `aiofiles` can not be imported then this will be set to `False`. """ timeouts: Optional[aiohttp.ClientTimeout] = None """ The `aiohttp.ClientTimeout` object to control the timeouts used for all HTTP requests. By default the ``total`` timeout is set to `0` (never timeout) and the ``sock_read`` timeout is set to `90` seconds. These defaults can also be overridden by the ``PARFIVE_TOTAL_TIMEOUT`` and ``PARFIVE_SOCK_READ_TIMEOUT`` environment variables. """ aiohttp_session_generator: Optional[Callable[["SessionConfig"], aiohttp.ClientSession]] = None """ A function to override the generation of the `aiohttp.ClientSession` object. Due to the fact that this session needs to be instantiated inside the asyncio context this option is a function. This function takes one argument which is the instance of this ``SessionConfig`` class. It is expected that you pass the ``.headers`` attribute of the config instance through to the ``headers=`` keyword argument of the session you instantiate. """ env: EnvConfig = field(default_factory=EnvConfig) done_callbacks: Iterable[Callable[[str, str, Optional[Exception]], None]] = () """ A list of functions to be called when a download is completed. The signature of the function to be called is `f(filepath: str, url: str, error: Optional[Exception])`. If successful, error will be None, else the occured exception or asyncio.CancelledError. """ @staticmethod def _aiofiles_importable(): return importlib.util.find_spec("aiofiles") is not None def _compute_aiofiles(self, use_aiofiles): use_aiofiles = use_aiofiles or self.env.override_use_aiofiles if use_aiofiles and not self._aiofiles_importable(): warnings.warn( "Can not use aiofiles even though use_aiofiles is set to True as aiofiles can not be imported.", ParfiveUserWarning, ) use_aiofiles = False return use_aiofiles def __post_init__(self): if self.timeouts is None: timeouts = { "total": self.env.timeout_total, "sock_read": self.env.timeout_sock_read, } self.timeouts = aiohttp.ClientTimeout(**timeouts) if self.http_proxy is None: self.http_proxy = os.environ.get("HTTP_PROXY", None) if self.https_proxy is None: self.https_proxy = os.environ.get("HTTPS_PROXY", None) if self.use_aiofiles is not None: self.use_aiofiles = self._compute_aiofiles(self.use_aiofiles) if self.env.debug_logging: self.log_level = "DEBUG" @dataclass class DownloaderConfig: """ Hold all downloader session state. """ max_conn: int = 5 max_splits: int = 5 progress: bool = True overwrite: Union[bool, Literal["unique"]] = False config: Optional[SessionConfig] = field(default_factory=SessionConfig) env: EnvConfig = field(default_factory=EnvConfig) def __post_init__(self): if self.config is None: self.config = SessionConfig() self.max_conn = 1 if self.env.serial_mode else self.max_conn self.max_splits = 1 if self.env.serial_mode or self.env.disable_range else self.max_splits self.progress = False if self.env.hide_progress else self.progress if self.progress is False: self.file_progress = False def __getattr__(self, __name: str): return getattr(self.config, __name) def aiohttp_client_session(self): if self.config.aiohttp_session_generator is None: return _default_aiohttp_session(self.config) return self.config.aiohttp_session_generator(self.config) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/conftest.py0000644000175100001660000000124615006437052016412 0ustar00runnerdockerfrom functools import partial import pytest from parfive.tests.localserver import MultiPartTestServer, SimpleTestServer, error_on_paths @pytest.fixture def namedserver(): server = SimpleTestServer() server.start_server() yield server server.stop_server() @pytest.fixture def testserver(): server = SimpleTestServer(callback=partial(error_on_paths, ["testfile_2.txt"])) server.start_server() yield server server.stop_server() @pytest.fixture def multipartserver(): """ A server that can handle multi-part file downloads """ server = MultiPartTestServer() server.start_server() yield server server.stop_server() ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/downloader.py0000644000175100001660000011062115006437052016721 0ustar00runnerdockerimport asyncio import contextlib import logging import os import pathlib import signal import threading import urllib.parse import warnings from functools import partial, reduce from typing import Any, Callable, Literal, Optional, Union import aiohttp from tqdm import tqdm as tqdm_std from tqdm.auto import tqdm as tqdm_auto import parfive from .config import DownloaderConfig, SessionConfig from .results import Results from .utils import ( ChecksumMismatch, FailedDownload, MultiPartDownloadError, Token, _QueueList, cancel_task, check_file_hash, default_name, get_filepath, get_ftp_size, get_http_size, remove_file, run_task_in_thread, session_head_or_get, validate_checksum_format, ) try: import aioftp except ImportError: # pragma: nocover aioftp = None __all__ = ["Downloader"] class Downloader: """ Download files in parallel. Parameters ---------- max_conn The number of parallel download slots. max_splits The maximum number of splits to use to download a file (server dependent). progress If `True` show a main progress bar showing how many of the total files have been downloaded. If `False`, no progress bars will be shown at all. overwrite Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file, if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. config A config object containing more complex settings for this ``Downloader`` instance. """ def __init__( self, max_conn: int = 5, max_splits: int = 5, progress: bool = True, overwrite: Union[bool, Literal["unique"]] = False, config: Optional[SessionConfig] = None, ): self.config = DownloaderConfig( max_conn=max_conn, max_splits=max_splits, progress=progress, overwrite=overwrite, config=config, ) self._init_queues() # Configure progress bars self.tqdm = tqdm_auto if self.config.notebook is not None: if self.config.notebook is True: from tqdm.notebook import tqdm as tqdm_notebook self.tqdm = tqdm_notebook elif self.config.notebook is False: self.tqdm = tqdm_std else: raise ValueError("The notebook keyword argument should be one of None, True or False.") self._configure_logging() def _init_queues(self): # Setup queues self.http_queue = _QueueList() self.ftp_queue = _QueueList() def _generate_tokens(self): # Create a Queue with max_conn tokens queue = asyncio.Queue(maxsize=self.config.max_conn) for i in range(self.config.max_conn): queue.put_nowait(Token(i + 1)) return queue def _configure_logging(self): # pragma: no cover if self.config.log_level is None: return sh = logging.StreamHandler() sh.setLevel(self.config.log_level) formatter = logging.Formatter("%(name)s - %(levelname)s - %(message)s") sh.setFormatter(formatter) parfive.log.addHandler(sh) parfive.log.setLevel(self.config.log_level) aiohttp_logger = logging.getLogger("aiohttp.client") aioftp_logger = logging.getLogger("aioftp.client") aioftp_logger.addHandler(sh) aioftp_logger.setLevel(self.config.log_level) aiohttp_logger.addHandler(sh) aiohttp_logger.setLevel(self.config.log_level) parfive.log.debug("Configured parfive to run with debug logging...") @property def queued_downloads(self): """ The total number of files already queued for download. """ return len(self.http_queue) + len(self.ftp_queue) def enqueue_file( self, url: str, path: Union[str, os.PathLike, None] = None, filename: Union[str, Callable[[str, Optional[aiohttp.ClientResponse]], os.PathLike], None] = None, overwrite: Union[bool, Literal["unique"], None] = None, checksum: Union[str, bool, None] = None, **kwargs, ): """ Add a file to the download queue. Parameters ---------- url The URL to retrieve. path The directory to retrieve the file into, if `None` defaults to the current directory. filename The filename to save the file as. Can also be a callable which takes two arguments the url and the response object from opening that URL, and returns the filename. (Note, for FTP downloads the response will be ``None``.) If `None` the HTTP headers will be read for the filename, or the last segment of the URL will be used. overwrite Determine how to handle downloading if a file already exists with the same name. If `False` the file download will be skipped and the path returned to the existing file (if any checksum also matches, see below), if `True` the file will be downloaded and the existing file will be overwritten, if `'unique'` the filename will be modified to be unique. If `None` the value set when constructing the `~parfive.Downloader` object will be used. checksum Provide a checksum, or request one from the server, to compare to any existing file and verify the downloaded file. This option can either be a string or `True`, if `True` the checksum provided by the server (if any) will be used, and if it is a string the it should be of the form ``=``. Valid algorithms are any provided by `hashlib` but the HTTP spec allows ``sha-512``, ``sha-256``, ``md5`` or ``sha``. If ``True`` then where provided by the server (in the ``Repr-Digest`` or ``Content-Digest`` headers) the checksum will be validated against the checksum returned by the server. When ``overwrite=False`` and ``checksum=`` is set the checksum will also be used to determine if any local file should be overwritten. If the checksum is `True` then the server provided checksum will be used to compare to the local file and download skipped if it matches. If the checksum is explicitly passed, the download will fail early if the server provides a checksum for the file which doesn't match the one provided by the user. kwargs : `dict` Extra keyword arguments are passed to `aiohttp.ClientSession.request` or `aioftp.Client.context` depending on the protocol. """ overwrite = overwrite or self.config.overwrite if isinstance(checksum, str): validate_checksum_format(checksum) if path is None and filename is None: raise ValueError("Either path or filename must be specified.") if path is None: path = "./" path = pathlib.Path(path) filepath: Callable[[str, Optional[aiohttp.ClientResponse]], os.PathLike] if not filename: filepath = partial(default_name, path) elif callable(filename): filepath = filename else: # Define a function because get_file expects a callback def filepath(url, resp): return path / filename scheme = urllib.parse.urlparse(url).scheme if scheme in ("http", "https"): get_file = partial( self._get_http, url=url, filepath_partial=filepath, overwrite=overwrite, checksum=checksum, **kwargs, ) self.http_queue.append(get_file) elif scheme == "ftp": if aioftp is None: raise ValueError("The aioftp package must be installed to download over FTP.") get_file = partial( self._get_ftp, url=url, filepath_partial=filepath, overwrite=overwrite, **kwargs ) self.ftp_queue.append(get_file) else: raise ValueError("URL must start with either 'http' or 'ftp'.") @staticmethod def _add_shutdown_signals(loop, task): if os.name == "nt": return if threading.current_thread() != threading.main_thread(): warnings.warn( "This download has been started in a thread which is not the main thread. You will not be able to interrupt the download.", UserWarning, ) return for sig in (signal.SIGINT, signal.SIGTERM): loop.add_signal_handler(sig, task.cancel) def _run_in_loop(self, coro): """ Take a coroutine and figure out where to run it and how to cancel it. """ try: loop = asyncio.get_running_loop() except RuntimeError: loop = None # If we already have a loop and it's already running then we should # make a new loop (as we are probably in a Jupyter Notebook) should_run_in_thread = loop and loop.is_running() # If we don't already have a loop, make a new one if should_run_in_thread or loop is None: loop = asyncio.new_event_loop() # Wrap up the coroutine in a task so we can cancel it later task = loop.create_task(coro) # Add handlers for shutdown signals self._add_shutdown_signals(loop, task) # Execute the task if should_run_in_thread: return run_task_in_thread(loop, task) return loop.run_until_complete(task) async def run_download(self): """ Download all files in the queue. Returns ------- `parfive.Results` A list of files downloaded. """ tasks = set() with self._get_main_pb(self.queued_downloads) as main_pb: try: if len(self.http_queue): tasks.add(asyncio.create_task(self._run_http_download(main_pb))) if len(self.ftp_queue): tasks.add(asyncio.create_task(self._run_ftp_download(main_pb))) dl_results = await asyncio.gather(*tasks, return_exceptions=True) except asyncio.CancelledError: for task in tasks: task.cancel() dl_results = await asyncio.gather(*tasks, return_exceptions=True) finally: return self._format_results(dl_results, main_pb) def _format_results(self, retvals, main_pb): # Squash all nested lists into a single flat list if retvals and isinstance(retvals[0], list): retvals = list(reduce(list.__add__, retvals)) errors = sum([isinstance(i, FailedDownload) for i in retvals]) if errors: total_files = self.queued_downloads message = f"{errors}/{total_files} files failed to download. Please check `.errors` for details" if main_pb: main_pb.write(message) else: parfive.log.info(message) results = Results() # Iterate through the results and store any failed download errors in # the errors list of the results object. for res in retvals: if isinstance(res, FailedDownload): results.add_error(res.filepath_partial, res.url, res.exception) parfive.log.info("%s failed to download with exception\n%s", res.url, res.exception) elif isinstance(res, Exception): raise res else: requested_url, filepath = res results.append(path=filepath, url=requested_url) return results def download(self): """ Download all files in the queue. Returns ------- `parfive.Results` A list of files downloaded. Notes ----- This is a synchronous version of `~parfive.Downloader.run_download`, an `asyncio` event loop will be created to run the download (in it's own thread if a loop is already running). """ return self._run_in_loop(self.run_download()) def retry(self, results: Results): """ Retry any failed downloads in a results object. .. note:: This will start a new event loop. Parameters ---------- results : `parfive.Results` A previous results object, the ``.errors`` property will be read and the downloads retried. Returns ------- `parfive.Results` A modified version of the input ``results`` with all the errors from this download attempt and any new files appended to the list of file paths. """ # Reset the queues self._init_queues() for err in results.errors: self.enqueue_file(err.url, filename=err.filepath_partial) new_res = self.download() # Append paths and urls overwrite errors results += new_res results._urls += new_res.urls results._errors = new_res.errors return results @classmethod def simple_download(cls, urls, *, path="./", overwrite=None): """ Download a series of URLs to a single destination. Parameters ---------- urls : iterable A sequence of URLs to download. path : `pathlib.Path`, optional The destination directory for the downloaded files. Defaults to the current directory. overwrite: `bool`, optional Overwrite the files at the destination directory. If `False` the URL will not be downloaded if a file with the corresponding filename already exists. Returns ------- `parfive.Results` A list of files downloaded. """ dl = cls() for url in urls: dl.enqueue_file(url, path=path, overwrite=overwrite) return dl.download() def _get_main_pb(self, total): """ Return the tqdm instance if we want it, else return a contextmanager that just returns None. """ if self.config.progress: return self.tqdm(total=total, unit="file", desc="Files Downloaded", position=0) return contextlib.contextmanager(lambda: iter([None]))() async def _run_http_download(self, main_pb): async with self.config.aiohttp_client_session() as session: futures = await self._run_from_queue( self.http_queue.generate_queue(), self._generate_tokens(), main_pb, session=session, ) try: # Wait for all the coroutines to finish done, _ = await asyncio.wait(futures) except asyncio.CancelledError: for task in futures: task.cancel() return await asyncio.gather(*futures, return_exceptions=True) async def _run_ftp_download(self, main_pb): futures = await self._run_from_queue( self.ftp_queue.generate_queue(), self._generate_tokens(), main_pb, ) try: # Wait for all the coroutines to finish done, _ = await asyncio.wait(futures) except asyncio.CancelledError: for task in futures: task.cancel() return await asyncio.gather(*futures, return_exceptions=True) async def _run_from_queue(self, queue, tokens, main_pb, *, session=None): futures = [] try: while not queue.empty(): get_file = await queue.get() token = await tokens.get() file_pb = self.tqdm if self.config.file_progress else False future = asyncio.create_task(get_file(session, token=token, file_pb=file_pb)) def callback(token, future, main_pb): try: tokens.put_nowait(token) # Update the main progressbar if main_pb and not future.exception(): main_pb.update(1) except asyncio.CancelledError: return future.add_done_callback(partial(callback, token, main_pb=main_pb)) futures.append(future) except asyncio.CancelledError: for task in futures: task.cancel() return futures async def _get_http( self, session: aiohttp.ClientSession, *, url: str, filepath_partial: Callable, chunksize: Union[int, None] = None, file_pb: Union[tqdm_std, bool, None] = None, token: Token, overwrite: Union[bool, Literal["unique"]], max_splits: Union[int, None] = None, checksum: Union[str, bool, None] = None, **kwargs: dict[str, Any], ) -> tuple[str, str]: """ Read the file from the given url into the filename given by ``filepath_partial``. Parameters ---------- session The `aiohttp.ClientSession` to use to retrieve the files. url The url to retrieve. filepath_partial A function to call which returns the filepath to save the url to. Takes two arguments ``resp, url``. chunksize The number of bytes to read into the file at a time. file_pb Should progress bars be displayed for each file downloaded. token A token for this download slot. overwrite Overwrite the file if it already exists. max_splits Number of maximum concurrent connections per file. checksum If given the downloaded file will be verified against the given checksum. The format of the checksum string should be ``=``, valid algorithms are ``sha-512``, ``sha-256``, ``md5`` or ``sha``. If ``True`` then where provided by the server (in the ``Repr-Digest`` or ``Content-Digest`` headers) the checksum will be validated against the checksum returned by the server. kwargs Extra keyword arguments are passed to `aiohttp.ClientSession.get`. Returns ------- url The URL downloaded path The name of the file saved. """ if chunksize is None: chunksize = 1024 if max_splits is None: max_splits = self.config.max_splits # Define filepath and writer here as we use them in the except block filepath = writer = None tasks: list[asyncio.Task] = [] try: scheme = urllib.parse.urlparse(url).scheme if scheme == "http": kwargs["proxy"] = self.config.http_proxy elif scheme == "https": kwargs["proxy"] = self.config.https_proxy request_headers = {} if checksum: checksum_priority = { "sha-512": 8, "sha-256": 7, "sha": 3, "md5": 2, } # If the user has specified a checksum format as the # server for that one with highest priority. if isinstance(checksum, str): user_alg = checksum.split("=")[0] checksum_priority[user_alg] = 10 # As we are doing a HEAD / GET request for the whole # file, the Repr-Digest and Content-Digest headers # should be the same, so use both. request_headers["Want-Repr-Digest"] = request_headers["Want-Content-Digest"] = ", ".join( [f"{k}={v}" for k, v in checksum_priority.items()] ) async with session_head_or_get( session, url, headers=request_headers, timeout=self.config.timeouts, **kwargs ) as resp: parfive.log.debug( "%s request made to %s with headers=%s", resp.request_info.method, resp.request_info.url, resp.request_info.headers, ) parfive.log.debug( "%s Response received from %s with headers=%s", resp.status, resp.request_info.url, resp.headers, ) if resp.status < 200 or resp.status >= 300: raise FailedDownload(filepath_partial, url, resp) filepath, use_local_file_if_possible = get_filepath(filepath_partial(resp, url), overwrite) # Get the expected checksum from the headers header_checksum: Union[str, None] = resp.headers.get( "Repr-Digest", resp.headers.get("Content-Digest", None) ) if checksum is True: checksum = header_checksum if checksum is None: parfive.log.info( "Expected server to provide checksum for url '%s' but none returned.", url ) if use_local_file_if_possible: if isinstance(checksum, str): with filepath.open(mode="rb") as fobj: checksum_matches = check_file_hash(fobj, checksum, accept_invalid_checksum=True) if checksum_matches: parfive.log.debug( "File %s already exists, checksum matches and overwrite is False; skipping download.", filepath, ) return url, str(filepath) else: parfive.log.debug( "File %s already exists and overwrite is False; skipping download.", filepath, ) return url, str(filepath) if isinstance(checksum, str) and header_checksum is not None: try: header_alg, header_chk = validate_checksum_format(header_checksum) user_alg, user_chk = validate_checksum_format(checksum) if header_alg == user_alg: if header_chk != user_chk: raise FailedDownload( filepath, url, ChecksumMismatch( "Server provided checksum and user provided checksum do not match, download skipped" ), ) else: parfive.log.info( "Not comparing user provided checksum to server provided checksum" " as algorithms do not match (got %s from the server).", header_alg, ) except ValueError as e: parfive.log.info( "Failed to compare user checksum to server checksum due to error: %s", e ) if callable(file_pb): file_pb = file_pb( position=token.n, unit="B", unit_scale=True, desc=filepath.name, leave=False, total=get_http_size(resp), ) else: file_pb = None # This queue will contain the downloaded chunks and their offsets # as tuples: (offset, chunk) downloaded_chunk_queue: asyncio.Queue = asyncio.Queue() writer = asyncio.create_task(self._write_worker(downloaded_chunk_queue, file_pb, filepath)) if ( not self.config.env.disable_range and max_splits and resp.headers.get("Accept-Ranges", None) == "bytes" and "Content-length" in resp.headers ): content_length = int(resp.headers["Content-length"]) split_length = max(1, content_length // max_splits) ranges: list[list[Union[int, str]]] = [ [start, start + split_length] for start in range(0, content_length, split_length) ] # let the last part download everything ranges[-1][1] = "" tasks.extend( asyncio.create_task( self._http_download_worker( session, url, chunksize, _range, downloaded_chunk_queue, **kwargs, ) ) for _range in ranges ) else: tasks.append( asyncio.create_task( self._http_download_worker( session, url, chunksize, None, downloaded_chunk_queue, **kwargs, ) ) ) # Close the initial request here before we start transferring data. # run all the download workers await asyncio.gather(*tasks) # join() waits till all the items in the queue have been processed await downloaded_chunk_queue.join() with filepath.open(mode="rb") as fobj: if isinstance(checksum, str) and not check_file_hash( fobj, checksum, accept_invalid_checksum=True ): raise FailedDownload( filepath, url, ChecksumMismatch("Downloaded checksum doesn't match.") ) for callback in self.config.done_callbacks: callback(filepath, url, None) return url, str(filepath) except (Exception, asyncio.CancelledError) as e: for task in tasks: task.cancel() # We have to cancel the writer here before we try and remove the # file so it's closed (otherwise windows gets angry). if writer is not None: await cancel_task(writer) # Set writer to None so we don't cancel it twice. writer = None # If filepath is None then the exception occurred before the request # computed the filepath, so we have no file to cleanup if filepath is not None: remove_file(filepath) for callback in self.config.done_callbacks: callback(filepath, url, e) raise FailedDownload(filepath_partial, url, e) finally: if writer is not None: writer.cancel() if isinstance(file_pb, self.tqdm): file_pb.close() async def _write_worker(self, queue, file_pb, filepath): """ Worker for writing the downloaded chunk to the file. The downloaded chunk is put into a asyncio Queue by a download worker. This worker gets the chunk from the queue and write it to the file using the specified offset of the chunk. Parameters ---------- queue: `asyncio.Queue` Queue for chunks file_pb : `tqdm.tqdm` or `False` Should progress bars be displayed for each file downloaded. filepath: `pathlib.Path` Path to the which the file should be downloaded. """ if self.config.use_aiofiles: await self._async_write_worker(queue, file_pb, filepath) else: await self._blocking_write_worker(queue, file_pb, filepath) async def _async_write_worker(self, queue, file_pb, filepath): import aiofiles async with aiofiles.open(filepath, mode="wb") as f: while True: offset, chunk = await queue.get() await f.seek(offset) await f.write(chunk) await f.flush() # Update the progressbar for file if file_pb is not None: file_pb.update(len(chunk)) queue.task_done() async def _blocking_write_worker(self, queue, file_pb, filepath): with open(filepath, "wb") as f: while True: offset, chunk = await queue.get() f.seek(offset) f.write(chunk) f.flush() # Update the progressbar for file if file_pb is not None: file_pb.update(len(chunk)) queue.task_done() async def _http_download_worker(self, session, url, chunksize, http_range, queue, **kwargs): """ Worker for downloading chunks from http urls. This function downloads the chunk from the specified http range and puts the chunk in the asyncio Queue. If no range is specified, then the whole file is downloaded via chunks and put in the queue. Parameters ---------- session : `aiohttp.ClientSession` The `aiohttp.ClientSession` to use to retrieve the files. url : `str` The url to retrieve. chunksize : `int` The number of bytes to read into the file at a time. http_range: (`int`, `int`) or `None` Start and end bytes of the file. In None, then no `Range` header is specified in request and the whole file will be downloaded. queue: `asyncio.Queue` Queue to put the download chunks. kwargs : `dict` Extra keyword arguments are passed to `aiohttp.ClientSession.get`. """ headers = kwargs.pop("headers", {}) if http_range: headers["Range"] = "bytes={}-{}".format(*http_range) # init offset to start of range offset, _ = http_range else: offset = 0 async with session.get(url, timeout=self.config.timeouts, headers=headers, **kwargs) as resp: parfive.log.debug( "%s request made for download to %s with headers=%s", resp.request_info.method, resp.request_info.url, resp.request_info.headers, ) parfive.log.debug( "%s Response received from %s with headers=%s", resp.status, resp.request_info.url, resp.headers, ) if resp.status < 200 or resp.status >= 300: raise MultiPartDownloadError(resp) while True: chunk = await resp.content.read(chunksize) if not chunk: break await queue.put((offset, chunk)) offset += len(chunk) async def _get_ftp( self, session=None, *, url, filepath_partial, file_pb=None, token, overwrite, **kwargs, ): """ Read the file from the given url into the filename given by ``filepath_partial``. Parameters ---------- session : `None` A placeholder for API compatibility with ``_get_http`` url : `str` The url to retrieve. filepath_partial : `callable` A function to call which returns the filepath to save the url to. Takes two arguments ``resp, url``. file_pb : `tqdm.tqdm` or `False` Should progress bars be displayed for each file downloaded. token : `parfive.downloader.Token` A token for this download slot. overwrite : `bool` Whether to overwrite the file if it already exists. kwargs : `dict` Extra keyword arguments are passed to `aioftp.Client.context`. Returns ------- `str` The name of the file saved. """ filepath = writer = None parse = urllib.parse.urlparse(url) try: async with aioftp.Client.context(parse.hostname, **kwargs) as client: parfive.log.debug("Connected to ftp server %s", parse.hostname) if parse.username and parse.password: parfive.log.debug("Explicitly Logging in with %s:%s", parse.username, parse.password) await client.login(parse.username, parse.password) # This has to be done before we start streaming the file: filepath, skip = get_filepath(filepath_partial(None, url), overwrite) if skip: parfive.log.debug( "File %s already exists and overwrite is False; skipping download.", filepath, ) return url, str(filepath) if callable(file_pb): total_size = await get_ftp_size(client, parse.path) file_pb = file_pb( position=token.n, unit="B", unit_scale=True, desc=filepath.name, leave=False, total=total_size, ) else: file_pb = None parfive.log.debug("Downloading file %s from %s", parse.path, parse.hostname) async with client.download_stream(parse.path) as stream: downloaded_chunks_queue = asyncio.Queue() download_workers = [] writer = asyncio.create_task( self._write_worker(downloaded_chunks_queue, file_pb, filepath) ) download_workers.append( asyncio.create_task(self._ftp_download_worker(stream, downloaded_chunks_queue)) ) await asyncio.gather(*download_workers) await downloaded_chunks_queue.join() for callback in self.config.done_callbacks: callback(filepath, url, None) return url, str(filepath) except (Exception, asyncio.CancelledError) as e: if writer is not None: await cancel_task(writer) writer = None # If filepath is None then the exception occurred before the request # computed the filepath, so we have no file to cleanup if filepath is not None: remove_file(filepath) filepath = None for callback in self.config.done_callbacks: callback(filepath, url, e) raise FailedDownload(filepath_partial, url, e) finally: # Just make sure we close the file. if writer is not None: writer.cancel() if isinstance(file_pb, self.tqdm): file_pb.close() async def _ftp_download_worker(self, stream, queue): """ Similar to `Downloader._http_download_worker`. See that function's documentation for more info. Parameters ---------- stream: `aioftp.StreamIO` Stream of the file to be downloaded. queue: `asyncio.Queue` Queue to put the download chunks. """ offset = 0 async for chunk in stream.iter_by_block(): # Write this chunk to the output file. await queue.put((offset, chunk)) offset += len(chunk) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/main.py0000644000175100001660000000527715006437052015521 0ustar00runnerdockerimport argparse import sys from parfive import Downloader, SessionConfig, __version__ def main(): args = parse_args(sys.argv[1:]) run_parfive(args) def run_parfive(args): log_level = "DEBUG" if args.verbose else None config = SessionConfig(file_progress=not args.no_file_progress, log_level=log_level) downloader = Downloader( max_conn=args.max_conn, max_splits=args.max_splits, progress=not args.no_progress, overwrite=args.overwrite, config=config, ) for url in args.urls: downloader.enqueue_file(url, path=args.directory) results = downloader.download() if args.print_filenames: for i in results: print(i) err_str = "" for err in results.errors: err_str += f"{err.url} \t {err.exception}\n" if err_str: print(err_str, file=sys.stderr) sys.exit(1) sys.exit(0) def parse_args(args): parser = argparse.ArgumentParser(description="Parfive: A parallel file downloader written in Python.") parser.add_argument("urls", metavar="URLS", type=str, nargs="+", help="URLs of files to be downloaded.") parser.add_argument("--max-conn", type=int, default=5, help="Maximum number of parallel file downloads.") parser.add_argument( "--max-splits", type=int, default=5, help="Maximum number of parallel connections per file (only used if supported by the server).", ) parser.add_argument( "--directory", type=str, default="./", help="Directory to which downloaded files are saved." ) parser.add_argument( "--overwrite", action="store_const", const=True, default=False, help="Overwrite if the file exists.", ) parser.add_argument( "--no-progress", action="store_const", const=True, default=False, dest="no_progress", help="Show progress indicators during download.", ) parser.add_argument( "--no-file-progress", action="store_const", const=True, default=False, dest="no_file_progress", help="Show progress bar for each file.", ) parser.add_argument( "--print-filenames", action="store_const", const=True, default=False, dest="print_filenames", help="Print successfully downloaded files's names to stdout.", ) parser.add_argument( "--verbose", action="store_const", const=True, default=False, help="Log debugging output while transferring the files.", ) parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}") return parser.parse_args(args) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/results.py0000644000175100001660000000572715006437052016276 0ustar00runnerdockerfrom collections import UserList, namedtuple import aiohttp from .utils import FailedDownload __all__ = ["Results"] class Error(namedtuple("error", ("filepath_partial", "url", "exception"))): def __str__(self): filepath_partial = "" if isinstance(self.filepath_partial, str): filepath_partial = f"{self.filepath_partial},\n" return filepath_partial + f"{self.url},\n{self.exception}" def __repr__(self): return f"{object.__repr__(self)}\n{self}" class Results(UserList): """ The results of a download from `parfive.Downloader.download`. This object contains the filenames of successful downloads as well, a list of all urls requested in the `~parfive.Results.urls` property and a list of any errors encountered in the `~parfive.Results.errors` property. """ def __init__(self, *args, errors=None, urls=None): super().__init__(*args) self._errors = errors or [] self._urls = urls or [] def _get_nice_resp_repr(self, response): # This is a modified version of aiohttp.ClientResponse.__repr__ if isinstance(response, aiohttp.ClientResponse): ascii_encodable_url = str(response.url) if response.reason: ascii_encodable_reason = response.reason.encode("ascii", "backslashreplace").decode("ascii") else: ascii_encodable_reason = response.reason return f"" return repr(response) def __str__(self): out = super().__repr__() if self.errors: out += "\nErrors:\n" for error in self.errors: if isinstance(error, FailedDownload): resp = self._get_nice_resp_repr(error.exception) out += f"(url={error.url}, response={resp})\n" else: out += f"({error!r})" return out def __repr__(self): out = object.__repr__(self) out += "\n" out += str(self) return out def append(self, *, path, url): super().append(path) self._urls.append(url) def add_error(self, filename, url, exception): """ Add an error to the results. """ if isinstance(exception, aiohttp.ClientResponse): exception._headers = None self._errors.append(Error(filename, url, exception)) @property def errors(self): """ A list of errors encountered during the download. The errors are represented as a tuple containing ``(filepath, url, exception)`` where ``filepath`` is a function for generating a filepath, ``url`` is the url to be downloaded and ``exception`` is the error raised during download. """ return self._errors @property def urls(self): """ A list of requested urls. """ return self._urls ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1746550323.934221 parfive-2.2.0/parfive/tests/0000755000175100001660000000000015006437064015355 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/tests/__init__.py0000644000175100001660000000000015006437052017451 0ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/tests/localserver.py0000644000175100001660000000717615006437052020260 0ustar00runnerdockerimport abc from collections import defaultdict from pytest_localserver.http import WSGIServer class BaseTestServer(abc.ABC): """ A pytest-localserver server which allows you to customise it's responses. Parameters ---------- callback A callable with signature ``(request_number, environ, start_response)``. If the callback returns anything other than `None` it is assumed that the callback has handled the WSGI request. If the callback returns `None` then `default_request_handler` is returned which will handle the WSGI request. """ def __init__(self, callback=None): self.requests = [] self.server = WSGIServer(application=self.request_handler) self.callback = callback self.requests_by_method = defaultdict(lambda: 0) def callback_handler(self, environ, start_response): if self.callback is not None: return self.callback(self, environ, start_response) def request_handler(self, environ, start_response): self.requests.append(environ) callback_return = self.callback_handler(environ, start_response) self.requests_by_method[environ["REQUEST_METHOD"]] += 1 if callback_return: return callback_return return self.default_request_handler(environ, start_response) @abc.abstractmethod def default_request_handler(self, environ, start_response): return def start_server(self): self.server.start() def stop_server(self): self.server.stop() @property def url(self): return self.server.url class SimpleTestServer(BaseTestServer): def default_request_handler(self, environ, start_response): status = "200 OK" response_headers = [ ("Content-type", "text/plain"), ("Content-Disposition", f"attachment; filename={environ['PATH_INFO'].strip('/')}"), ] start_response(status, response_headers) return [b"Hello world!\n"] class MultiPartTestServer(BaseTestServer): def default_request_handler(self, environ, start_response): content = b"a" * 100 bytes_end = content_length = len(content) bytes_start = 0 http_range = environ.get("HTTP_RANGE", None) if http_range: http_range = http_range.split("bytes=")[1] bytes_start = int(http_range.split("-")[0]) bytes_end = http_range.split("-")[1] if not bytes_end: bytes_end = content_length bytes_end = int(bytes_end) content_length = bytes_end - bytes_start status = "200 OK" response_headers = [ ("Content-type", "text/plain"), ("Content-Length", content_length), ("Accept-Ranges", "bytes"), ("Content-Disposition", "attachment; filename=testfile.txt"), ] start_response(status, response_headers) part = content[bytes_start:bytes_end] return [part] def error_on_paths(paths, server, environ, start_response): if (path := environ["PATH_INFO"].strip("/")) in paths: # Once we error on a GET request serve it next time if environ["REQUEST_METHOD"] == "GET": paths.remove(path) status = "404" response_headers = [("Content-type", "text/plain")] start_response(status, response_headers) return [b""] def error_on_nth_get_request(n, server, environ, start_response): if server.requests_by_method["GET"] == n: status = "404" response_headers = [("Content-type", "text/plain")] start_response(status, response_headers) return [b""] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/tests/simple_download_test.ipynb0000644000175100001660000002402715006437052022641 0ustar00runnerdocker{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import parfive" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "dl = parfive.Downloader()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "dl.enqueue_file(\"http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt\", path=\"./\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "nbreg": { "diff_ignore": [ "/outputs/0/data/" ] } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "db630dac73074c248fb6ecc163e1fd75", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, description='Files Downloaded', max=1.0, style=ProgressStyle(descripti…" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "files = dl.download()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", "['predicted-sunspot-radio-flux.txt']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files" ] } ], "metadata": { "celltoolbar": "Edit Metadata", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "nbreg": { "diff_ignore": [ "/metadata/widgets" ] }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { "0cc46587e5314f8a95e5373eed4af735": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "Files Downloaded: 100%", "description_tooltip": null, "layout": "IPY_MODEL_d3f4f43178564251aa135d08e87f47db", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ef5d56b1823d442c829e821516cdbff0", "value": 1 } }, "4a1f3a7b8acd4d1daeebd13033a5ba05": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "658c594e4c7f44e986c5e8d346e290ef": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "720f662a1e20406ba71816e33cbdbe30": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c5a8b53e793f4b328543dd9fedd64b6f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_658c594e4c7f44e986c5e8d346e290ef", "placeholder": "​", "style": "IPY_MODEL_4a1f3a7b8acd4d1daeebd13033a5ba05", "value": " 1/1 [00:00<00:00, 10.57file/s]" } }, "d3f4f43178564251aa135d08e87f47db": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "db630dac73074c248fb6ecc163e1fd75": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0cc46587e5314f8a95e5373eed4af735", "IPY_MODEL_c5a8b53e793f4b328543dd9fedd64b6f" ], "layout": "IPY_MODEL_720f662a1e20406ba71816e33cbdbe30" } }, "ef5d56b1823d442c829e821516cdbff0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "initial" } } }, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 } ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/tests/test_aiofiles.py0000644000175100001660000000305615006437052020562 0ustar00runnerdockerimport os from unittest.mock import patch import pytest import parfive from parfive import Downloader from parfive.config import SessionConfig from parfive.utils import ParfiveUserWarning @pytest.mark.parametrize("use_aiofiles", [True, False]) def test_enable_aiofiles_constructor(use_aiofiles): dl = Downloader(config=parfive.SessionConfig(use_aiofiles=use_aiofiles)) assert dl.config.use_aiofiles == use_aiofiles, f"expected={use_aiofiles}, got={dl.config.use_aiofiles}" @patch.dict(os.environ, {"PARFIVE_OVERWRITE_ENABLE_AIOFILES": "some_value_to_enable_it"}) @pytest.mark.parametrize("use_aiofiles", [True, False]) def test_enable_aiofiles_env_overwrite_always_enabled(use_aiofiles): dl = Downloader(config=parfive.SessionConfig(use_aiofiles=use_aiofiles)) assert dl.config.use_aiofiles is True @patch("parfive.config.SessionConfig._aiofiles_importable", lambda self: False) def test_enable_no_aiofiles(): with pytest.warns(ParfiveUserWarning): dl = Downloader(config=parfive.SessionConfig(use_aiofiles=True)) assert dl.config.use_aiofiles is False dl = Downloader(config=parfive.SessionConfig(use_aiofiles=False)) assert dl.config.use_aiofiles is False def test_aiofiles_session_config(): c = SessionConfig(use_aiofiles=True) assert c.use_aiofiles is True @patch("parfive.config.SessionConfig._aiofiles_importable", lambda self: False) def test_aiofiles_session_config_no_aiofiles_warn(): with pytest.warns(ParfiveUserWarning): c = SessionConfig(use_aiofiles=True) assert c.use_aiofiles is False ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/tests/test_config.py0000644000175100001660000000226115006437052020231 0ustar00runnerdockerimport ssl import aiohttp from parfive.config import SessionConfig from parfive.downloader import Downloader def test_session_config_defaults(): c = SessionConfig() assert c.aiohttp_session_generator is None assert isinstance(c.timeouts, aiohttp.ClientTimeout) assert c.timeouts.total == 0 assert c.timeouts.sock_read == 90 assert c.http_proxy is None assert c.https_proxy is None assert c.chunksize == 1024 assert c.use_aiofiles is False assert len(c.done_callbacks) == 0 assert isinstance(c.headers, dict) assert "User-Agent" in c.headers assert "parfive" in c.headers["User-Agent"] def test_session_config_env_defaults(): c = SessionConfig() assert c.env.serial_mode is False assert c.env.disable_range is False assert c.env.hide_progress is False assert c.env.timeout_total == 0 assert c.env.timeout_sock_read == 90 def test_ssl_context(): # Assert that the unpickalable SSL context object doesn't anger the # dataclass gods def gen(config): return aiohttp.ClientSession(context=ssl.create_default_context()) c = SessionConfig(aiohttp_session_generator=gen) d = Downloader(config=c) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/tests/test_downloader.py0000644000175100001660000005711415006437052021131 0ustar00runnerdockerimport os import platform import threading from pathlib import Path from unittest import mock from unittest.mock import MagicMock, patch import aiohttp import pytest from aiohttp import ClientConnectorError, ClientTimeout import parfive from parfive.config import SessionConfig from parfive.downloader import Downloader, FailedDownload, Results, Token from parfive.utils import ChecksumMismatch, check_file_hash skip_windows = pytest.mark.skipif(platform.system() == "Windows", reason="Windows.") def validate_test_file(f): assert len(f) == 1 assert Path(f[0]).name == "testfile.fits" with Path(f[0]).open(mode="rb") as fobj: assert check_file_hash( fobj, "sha-256=a1c58cd340e3bd33f94524076f1fa5cf9a7f13c59d5272a9d4bc0b5bc436d9b3" ) def test_setup(): dl = Downloader() assert isinstance(dl, Downloader) assert len(dl.http_queue) == 0 assert len(dl.ftp_queue) == 0 assert dl._generate_tokens().qsize() == 5 def test_download(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={"Content-Disposition": "attachment; filename=testfile.fits"} ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 f = dl.download() assert len(f.urls) == 1 assert f.urls[0] == httpserver.url validate_test_file(f) def test_simple_download(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={"Content-Disposition": "attachment; filename=testfile.fits"} ) f = Downloader.simple_download([httpserver.url], path=Path(tmpdir)) validate_test_file(f) def test_changed_max_conn(httpserver, tmpdir): # Check that changing max_conn works after creating Downloader tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={"Content-Disposition": "attachment; filename=testfile.fits"} ) dl = Downloader(max_conn=4) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) dl.max_conn = 3 f = dl.download() validate_test_file(f) @pytest.mark.asyncio @pytest.mark.parametrize("use_aiofiles", [True, False]) async def test_async_download(httpserver, tmpdir, use_aiofiles): httpserver.serve_content( "SIMPLE = T", headers={"Content-Disposition": "attachment; filename=testfile.fits"} ) dl = Downloader(config=SessionConfig(use_aiofiles=use_aiofiles)) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 f = await dl.run_download() validate_test_file(f) def test_download_ranged_http(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={"Content-Disposition": "attachment; filename=testfile.fits"} ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir)) assert dl.queued_downloads == 1 f = dl.download() validate_test_file(f) def test_regression_download_ranged_http(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( "S", headers={ "Content-Disposition": "attachment; filename=testfile.fits", "Accept-Ranges": "bytes", }, ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir)) assert dl.queued_downloads == 1 f = dl.download() assert len(f.errors) == 0, f.errors def test_download_partial(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content("SIMPLE = T") dl = Downloader() dl.enqueue_file(httpserver.url, filename=lambda resp, url: Path(tmpdir) / "filename") f = dl.download() assert len(f) == 1 # strip the http:// assert "filename" in f[0] def test_empty_download(tmpdir): dl = Downloader() f = dl.download() assert len(f) == 0 def test_download_filename(httpserver, tmpdir): httpserver.serve_content("SIMPLE = T") fname = "testing123" filename = str(tmpdir.join(fname)) with open(filename, "w") as fh: fh.write("SIMPLE = T") dl = Downloader() dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename def test_download_no_overwrite(httpserver, tmpdir): httpserver.serve_content("SIMPLE = T") fname = "testing123" filename = str(tmpdir.join(fname)) with open(filename, "w") as fh: fh.write("Hello world") dl = Downloader() dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename with open(filename) as fh: # If the contents is the same as when we wrote it, it hasn't been # overwritten assert fh.read() == "Hello world" def test_download_overwrite(httpserver, tmpdir): httpserver.serve_content("SIMPLE = T") fname = "testing123" filename = str(tmpdir.join(fname)) with open(filename, "w") as fh: fh.write("Hello world") dl = Downloader(overwrite=True) dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename with open(filename) as fh: assert fh.read() == "SIMPLE = T" def test_download_unique(httpserver, tmpdir): httpserver.serve_content("SIMPLE = T") fname = "testing123" filename = str(tmpdir.join(fname)) filenames = [filename, filename + ".fits", filename + ".fits.gz"] dl = Downloader(overwrite="unique") # Write files to both the target filenames. for fn in filenames: with open(fn, "w") as fh: fh.write("Hello world") dl.enqueue_file(httpserver.url, filename=fn, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == len(filenames) for fn in f: assert fn not in filenames assert f"{fname}.1" in fn def test_retrieve_some_content(testserver, tmpdir): """ Test that the downloader handles errors properly. """ tmpdir = str(tmpdir) dl = Downloader() nn = 5 for i in range(nn): dl.enqueue_file(testserver.url + f"/testfile_{i}.txt", path=tmpdir) f = dl.download() assert len(f) == nn - 1 assert len(f.errors) == 1 def test_no_progress(httpserver, tmpdir, capsys): tmpdir = str(tmpdir) httpserver.serve_content("SIMPLE = T") dl = Downloader(progress=False) dl.enqueue_file(httpserver.url, path=tmpdir) dl.download() # Check that there was not stdout captured = capsys.readouterr().out assert not captured def throwerror(*args, **kwargs): raise ValueError("Out of Cheese.") @patch("parfive.downloader.default_name", throwerror) def test_raises_other_exception(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content("SIMPLE = T") dl = Downloader() dl.enqueue_file(httpserver.url, path=tmpdir) res = dl.download() assert isinstance(res.errors[0].exception, ValueError) def test_token(): t = Token(5) assert "5" in repr(t) assert "5" in str(t) def test_failed_download(): err = FailedDownload("wibble", "bbc.co.uk", "running away") assert "bbc.co.uk" in repr(err) assert "bbc.co.uk" in repr(err) assert "running away" in str(err) assert "running away" in str(err) def test_results(): res = Results() res.append(path="hello", url="aurl") assert res[0] == "hello" assert res.urls[0] == "aurl" res.add_error("wibble", "notaurl", "out of cheese") assert "notaurl" in repr(res) assert "hello" in repr(res) assert "out of cheese" in repr(res) def test_notaurl(tmpdir): tmpdir = str(tmpdir) dl = Downloader(progress=False) dl.enqueue_file("http://notaurl.wibble/file", path=tmpdir) f = dl.download() assert len(f.errors) == 1 assert isinstance(f.errors[0].exception, aiohttp.ClientConnectionError) def test_wrongscheme(tmpdir): tmpdir = str(tmpdir) dl = Downloader(progress=False) with pytest.raises(ValueError, match="URL must start with either"): dl.enqueue_file("webcal://notaurl.wibble/file", path=tmpdir) def test_retry(tmpdir, testserver): tmpdir = str(tmpdir) dl = Downloader() nn = 5 for i in range(nn): dl.enqueue_file(testserver.url + f"/testfile_{i}.txt", path=tmpdir) f = dl.download() assert len(f) == nn - 1 assert len(f.errors) == 1 f2 = dl.retry(f) assert len(f2) == nn assert len(f2.errors) == 0 assert [f"testfile_{k}.txt" for k in ["0", "1", "3", "4", "2"]] == [f.split("/")[-1] for f in f2.urls] assert "testfile_0.txt" == Path(f2[0]).name assert "testfile_4.txt" == Path(f2[-2]).name assert "testfile_2.txt" == Path(f2[-1]).name def test_empty_retry(): f = Results() dl = Downloader() dl.retry(f) def test_done_callback_error(tmp_path, testserver): def done_callback(filepath, url, error): if error is not None: (tmp_path / "callback.error").touch() dl = Downloader(config=SessionConfig(done_callbacks=[done_callback])) dl.enqueue_file(testserver.url + "/testfile_2.txt", path=tmp_path) f = dl.download() assert len(f.errors) == 1 assert (tmp_path / "callback.error").exists() @skip_windows @pytest.mark.allow_hosts(True) def test_ftp(tmpdir): tmpdir = str(tmpdir) dl = Downloader() dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2011_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2013_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://notaserver/notafile.fileL", path=tmpdir) f = dl.download() assert len(f) == 1 assert len(f.errors) == 3 # I don't know of an alternative server which makes any sense to test this with @pytest.mark.skip("Remote server offline") @skip_windows @pytest.mark.allow_hosts(True) def test_ftp_pasv_command(tmpdir): tmpdir = str(tmpdir) dl = Downloader() dl.enqueue_file( "ftp://ftp.ngdc.noaa.gov/STP/swpc_products/daily_reports/solar_region_summaries/2002/04/20020414SRS.txt", path=tmpdir, passive_commands=["pasv"], ) assert dl.queued_downloads == 1 f = dl.download() assert len(f) == 1 assert len(f.errors) == 0 @skip_windows @pytest.mark.allow_hosts(True) def test_ftp_http(tmpdir, httpserver): tmpdir = str(tmpdir) httpserver.serve_content("SIMPLE = T") dl = Downloader() dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2011_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2013_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://notaserver/notafile.fileL", path=tmpdir) dl.enqueue_file(httpserver.url, path=tmpdir) dl.enqueue_file("http://noaurl.notadomain/noafile", path=tmpdir) assert dl.queued_downloads == 6 f = dl.download() assert len(f) == 2 assert len(f.errors) == 4 def test_default_user_agent(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={"Content-Disposition": "attachment; filename=testfile.fits"} ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 dl.download() assert "User-Agent" in httpserver.requests[0].headers assert ( httpserver.requests[0].headers["User-Agent"] == f"parfive/{parfive.__version__} aiohttp/{aiohttp.__version__} python/{platform.python_version()}" ) def test_custom_user_agent(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={"Content-Disposition": "attachment; filename=testfile.fits"} ) dl = Downloader(config=SessionConfig(headers={"User-Agent": "test value 299792458"})) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 dl.download() assert "User-Agent" in httpserver.requests[0].headers assert httpserver.requests[0].headers["User-Agent"] == "test value 299792458" @patch.dict(os.environ, {"HTTP_PROXY": "http_proxy_url", "HTTPS_PROXY": "https_proxy_url"}) @pytest.mark.parametrize( ("url", "proxy"), [ ("http://test.example.com", "http_proxy_url"), ("https://test.example.com", "https_proxy_url"), ], ) def test_proxy_passed_as_kwargs_to_get(tmpdir, url, proxy): with mock.patch("aiohttp.client.ClientSession._request", new_callable=mock.MagicMock) as patched: dl = Downloader() dl.enqueue_file(url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 dl.download() assert patched.called, "`ClientSession._request` not called" assert list(patched.call_args) == [ ("HEAD", url), { "allow_redirects": False, "timeout": ClientTimeout(total=0, connect=None, sock_read=90, sock_connect=None), "proxy": proxy, "headers": {}, }, ] def test_http_callback_success(httpserver, tmpdir): # Test callback on successful download httpserver.serve_content( "SIMPLE = T", headers={"Content-Disposition": "attachment; filename=testfile.fits"} ) cb = MagicMock() dl = Downloader(config=SessionConfig(done_callbacks=[cb])) dl.enqueue_file(httpserver.url, path=tmpdir, max_splits=None) assert dl.queued_downloads == 1 dl.download() assert cb.call_count == 1 cb_path, cb_url, cb_status = cb.call_args[0] assert cb_path == tmpdir / "testfile.fits" assert httpserver.url == cb_url assert cb_status is None def test_http_callback_fail(tmpdir): # Test callback on failed download cb = MagicMock() dl = Downloader(config=SessionConfig(done_callbacks=[cb])) url = "http://127.0.0.1/myfile.txt" dl.enqueue_file(url, path=tmpdir, max_splits=None) assert dl.queued_downloads == 1 dl.download() assert cb.call_count == 1 cb_path, cb_url, cb_status = cb.call_args[0] assert cb_path is None assert url == cb_url # Returns 404 on windows on GHA, which triggers FailedDownload assert isinstance(cb_status, (ClientConnectorError, FailedDownload)) @pytest.mark.allow_hosts(True) def test_ftp_callback_success(tmpdir): cb = MagicMock() dl = Downloader(config=SessionConfig(done_callbacks=[cb])) url = "ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2011_SRS.tar.gz" dl.enqueue_file(url, path=str(tmpdir)) assert dl.queued_downloads == 1 dl.download() assert cb.call_count == 1 cb_path, cb_url, cb_status = cb.call_args[0] assert cb_path == tmpdir / "2011_SRS.tar.gz" assert url == cb_url assert cb_status is None @mock.patch("aioftp.Client.context", side_effect=ConnectionRefusedError()) def test_ftp_callback_error(tmpdir): # Download should fail as not marked with allowed hosts cb = MagicMock() dl = Downloader(config=SessionConfig(done_callbacks=[cb])) url = "ftp://127.0.0.1/nosuchfile.txt" dl.enqueue_file(url, path=str(tmpdir)) assert dl.queued_downloads == 1 dl.download() assert cb.call_count == 1 cb_path, cb_url, cb_status = cb.call_args[0] assert cb_path is None assert cb_url == url assert isinstance(cb_status, ConnectionRefusedError) class CustomThread(threading.Thread): def __init__(self, *args, **kwargs): self.result = None super().__init__(*args, **kwargs) def run(self): try: self.result = self._target(*self._args, **self._kwargs) finally: del self._target, self._args, self._kwargs @skip_windows def test_download_out_of_main_thread(httpserver, tmpdir, recwarn): tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={"Content-Disposition": "attachment; filename=testfile.fits"} ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) thread = CustomThread(target=dl.download) thread.start() thread.join() validate_test_file(thread.result) # We use recwarn here as for some reason pytest.warns did not reliably pickup this warning. assert len(recwarn) > 0 assert any( "This download has been started in a thread which is not the main thread. You will not be able to interrupt the download." == w.message.args[0] for w in recwarn ) def test_checksum_want_headers(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={ "Content-Disposition": "attachment; filename=testfile.fits", "Repr-Digest": "sha-256=a1c58cd340e3bd33f94524076f1fa5cf9a7f13c59d5272a9d4bc0b5bc436d9b3", }, ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None, checksum=True) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None, checksum=False) assert dl.queued_downloads == 2 f = dl.download() assert len(f.urls) == 2 assert f.urls[0] == httpserver.url validate_test_file(f[0:1]) first_headers = httpserver.requests[0].headers assert "Want-Repr-Digest" in first_headers assert "Want-Content-Digest" in first_headers # Two requests a file second_headers = httpserver.requests[2].headers assert "Want-Repr-Digest" not in second_headers assert "Want-Content-Digest" not in second_headers def test_checksum_invalid(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={ "Content-Disposition": "attachment; filename=testfile.fits", "Content-Digest": "sha-256=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", }, ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None, checksum=True) assert dl.queued_downloads == 1 f = dl.download() assert len(f.errors) == 1 exception = f.errors[0].exception assert isinstance(exception, FailedDownload) assert "checksum doesn't match" in str(exception) def test_early_fail_download_checksum_mismatch(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={ "Content-Disposition": "attachment; filename=testfile.fits", "Repr-Digest": "sha-256=INVALID", }, ) dl = Downloader() dl.enqueue_file( httpserver.url, path=Path(tmpdir), max_splits=None, checksum="sha-256=a1c58cd340e3bd33f94524076f1fa5cf9a7f13c59d5272a9d4bc0b5bc436d9b3", ) assert dl.queued_downloads == 1 f = dl.download() assert len(f.errors) == 1 exc = f.errors[0].exception assert isinstance(exc, FailedDownload) assert isinstance(exc.exception, ChecksumMismatch) assert "Server provided checksum and user provided checksum do not match, download skipped" in str( exc.exception ) def test_server_user_algorithm_mismatch(httpserver, tmp_path, caplog): caplog.set_level("INFO") httpserver.serve_content( "SIMPLE = T", headers={ "Content-Disposition": "attachment; filename=testfile.fits", "Repr-Digest": "sha-512=INVALID", }, ) dl = Downloader() dl.enqueue_file( httpserver.url, path=tmp_path, max_splits=None, checksum="sha-256=a1c58cd340e3bd33f94524076f1fa5cf9a7f13c59d5272a9d4bc0b5bc436d9b3", ) assert dl.queued_downloads == 1 f = dl.download() assert len(f.errors) == 0 assert any( "Not comparing user provided checksum to server provided checksum as algorithms do not match (got sha512 from the server)." in m for m in caplog.messages ) first_headers = httpserver.requests[0].headers assert "Want-Repr-Digest" in first_headers assert "Want-Content-Digest" in first_headers assert "sha-256=10" in first_headers["Want-Repr-Digest"] assert "sha-256=10" in first_headers["Want-Content-Digest"] def test_explicit_checksum(namedserver, tmpdir): tmpdir = str(tmpdir) dl = Downloader() dl.enqueue_file( namedserver.url + "/testfile.txt", path=Path(tmpdir), max_splits=None, checksum="sha-256=0ba904eae8773b70c75333db4de2f3ac45a8ad4ddba1b242f0b3cfc199391dd8", ) dl.enqueue_file( namedserver.url + "/testfile.txt", path=Path(tmpdir), max_splits=None, checksum="sha-256=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX", ) assert dl.queued_downloads == 2 f = dl.download() assert len(f) == 1 assert f.urls[0] == namedserver.url + "/testfile.txt" assert len(f.errors) == 1 exception = f.errors[0].exception assert isinstance(exception, FailedDownload) assert "checksum doesn't match" in str(exception) def test_file_exists_checksum(httpserver, tmpdir, caplog): caplog.set_level("DEBUG") tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={ "Content-Disposition": "attachment; filename=testfile.fits", "Repr-Digest": "sha-256=a1c58cd340e3bd33f94524076f1fa5cf9a7f13c59d5272a9d4bc0b5bc436d9b3", }, ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None, checksum=True) assert dl.queued_downloads == 1 f = dl.download() assert len(f) == 1 dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None, checksum=True) assert dl.queued_downloads == 1 f = dl.download() assert len(f) == 1 # Assert that only three requests have been made, thereby checking # that the file wasn't re-downloaded the second time assert len(httpserver.requests) == 3 assert ( "testfile.fits already exists, checksum matches and overwrite is False; skipping download." in caplog.messages[-1] ) def test_no_server_checksum(httpserver, tmpdir, caplog): caplog.set_level("DEBUG") tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={ "Content-Disposition": "attachment; filename=testfile.fits", }, ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None, checksum=True) assert dl.queued_downloads == 1 f = dl.download() assert len(f) == 1 assert any("Expected server to provide checksum for url" in msg for msg in caplog.messages) def test_invalid_checksum_enqueue(): dl = Downloader() with pytest.raises(ValueError, match="checksum 'wibble' should be of the format ="): dl.enqueue_file("", checksum="wibble") with pytest.raises(ValueError, match="checksum type 'nope' is not supported"): dl.enqueue_file("", checksum="nope=wibble") with pytest.raises(ValueError, match="checksum type 'nope' is not supported"): check_file_hash("", "nope=wibble") @pytest.mark.parametrize("checksum", ["nope=wibble", "wibble"]) def test_invalid_server_checksum(httpserver, tmpdir, caplog, checksum): caplog.set_level("ERROR") tmpdir = str(tmpdir) httpserver.serve_content( "SIMPLE = T", headers={ "Content-Disposition": "attachment; filename=testfile.fits", "Content-Digest": checksum, }, ) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None, checksum=True) assert dl.queued_downloads == 1 f = dl.download() assert len(f.errors) == 0 assert "Got invalid checksum:" in caplog.messages[0] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/tests/test_downloader_multipart.py0000644000175100001660000000240515006437052023223 0ustar00runnerdockerfrom functools import partial from parfive import Downloader from parfive.tests.localserver import error_on_nth_get_request from parfive.utils import MultiPartDownloadError def test_multipart(multipartserver, tmp_path): dl = Downloader(progress=False) max_splits = 5 dl.enqueue_file(multipartserver.url, path=tmp_path, max_splits=max_splits) files = dl.download() # Verify we transferred all the content with open(files[0], "rb") as fobj: assert fobj.read() == b"a" * 100 # Assert that we made the expected number of requests assert len(multipartserver.requests) == max_splits + 1 assert "HTTP_RANGE" not in multipartserver.requests[0] for split_req in multipartserver.requests[1:]: assert "HTTP_RANGE" in split_req def test_multipart_with_error(multipartserver, tmp_path): multipartserver.callback = partial(error_on_nth_get_request, 3) dl = Downloader(progress=False) max_splits = 5 dl.enqueue_file(multipartserver.url, path=tmp_path, max_splits=max_splits) files = dl.download() assert len(files) == 0 assert len(files.errors) == 1 assert isinstance(files.errors[0].exception, MultiPartDownloadError) expected_file = tmp_path / "testfile.txt" assert not expected_file.exists() ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/tests/test_main.py0000644000175100001660000000410415006437052017706 0ustar00runnerdockerfrom pathlib import Path import pytest from parfive.main import parse_args, run_parfive REQUIRED_ARGUMENTS = ["test_url"] def test_no_url(): with pytest.raises(SystemExit): parse_args(["--overwrite"]) def helper(args, name, expected): args = parse_args(REQUIRED_ARGUMENTS + args) assert getattr(args, name) == expected def test_overwrite(): helper(["--overwrite"], "overwrite", True) helper([], "overwrite", False) def test_max_conn(): helper(["--max-conn", "10"], "max_conn", 10) helper([], "max_conn", 5) def test_max_splits(): helper(["--max-splits", "10"], "max_splits", 10) helper([], "max_splits", 5) def test_no_file_progress(): helper(["--no-file-progress"], "no_file_progress", True) helper([], "no_file_progress", False) def test_no_progress(): helper(["--no-progress"], "no_progress", True) helper([], "no_progress", False) def test_print_filenames(): helper(["--print-filenames"], "print_filenames", True) helper([], "print_filenames", False) def test_directory(): helper(["--directory", "/tmp"], "directory", "/tmp") helper([], "directory", "./") def test_verbose(): helper(["--verbose"], "verbose", True) helper([], "verbose", False) @pytest.fixture def test_url(multipartserver): return multipartserver.url @pytest.mark.parametrize( "args", [ [], ["--no-progress"], ["--print-filenames"], ["--verbose"], ], ) def test_run_cli_success(args, test_url, capsys): cliargs = parse_args([*args, test_url]) with pytest.raises(SystemExit) as exit_exc: run_parfive(cliargs) assert exit_exc.value.code == 0 cap_out = capsys.readouterr() if "--print-filenames" in args: assert "testfile.txt" in cap_out.out else: assert "testfile.txt" not in cap_out.out if "--no-progress" in args: assert "Files Downloaded:" not in cap_out.err else: assert "Files Downloaded:" in cap_out.err if "--verbose" in args: assert "DEBUG" in cap_out.err Path("testfile.txt").unlink() ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/tests/test_utils.py0000644000175100001660000000347115006437052020130 0ustar00runnerdockerimport aiohttp import pytest from parfive.utils import session_head_or_get def deny_head(server, environ, start_response): if environ["REQUEST_METHOD"] != "GET": status = "405" response_headers = [("Content-type", "text/plain")] start_response(status, response_headers) return [b""] def head_302(server, environ, start_response): if environ["REQUEST_METHOD"] != "GET": status = "302" response_headers = [("Content-type", "text/plain")] start_response(status, response_headers) return [b""] @pytest.mark.asyncio async def test_head_or_get(namedserver): url = namedserver.url async with aiohttp.ClientSession() as session: async with session_head_or_get( session, url, ) as resp: assert resp.ok assert resp.method == "HEAD" assert resp.status == 200 namedserver.callback = deny_head async with aiohttp.ClientSession() as session: async with session_head_or_get( session, url, ) as resp: assert resp.ok assert resp.method == "GET" assert resp.status == 200 @pytest.mark.asyncio async def test_head_302(namedserver): # blame jsoc url = namedserver.url async with aiohttp.ClientSession() as session: async with session_head_or_get( session, url, ) as resp: assert resp.ok assert resp.method == "HEAD" assert resp.status == 200 namedserver.callback = head_302 async with aiohttp.ClientSession() as session: async with session_head_or_get( session, url, ) as resp: assert resp.ok assert resp.method == "GET" assert resp.status == 200 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/parfive/utils.py0000644000175100001660000002356415006437052015734 0ustar00runnerdockerimport asyncio import hashlib import io import os import pathlib import typing import warnings from collections.abc import AsyncIterator, Generator from concurrent.futures import ThreadPoolExecutor from contextlib import asynccontextmanager from itertools import count from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, Union import aiohttp import parfive if TYPE_CHECKING: import aioftp __all__ = [ "FailedDownload", "Token", "cancel_task", "default_name", "remove_file", ] # Copied out of CPython under PSF Licence 2 def _parseparam(s: str) -> Generator[str, None, None]: while s[:1] == ";": s = s[1:] end = s.find(";") while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: end = s.find(";", end + 1) if end < 0: end = len(s) f = s[:end] yield f.strip() s = s[end:] def parse_header(line: str) -> tuple[str, dict[str, str]]: """Parse a Content-type like header. Return the main content-type and a dictionary of options. """ parts = _parseparam(";" + line) key = parts.__next__() pdict = {} for p in parts: i = p.find("=") if i >= 0: name = p[:i].strip().lower() value = p[i + 1 :].strip() if len(value) >= 2 and value[0] == value[-1] == '"': value = value[1:-1] value = value.replace("\\\\", "\\").replace('\\"', '"') pdict[name] = value return key, pdict def default_name(path: os.PathLike, resp: aiohttp.ClientResponse, url: str) -> os.PathLike: url_filename = url.split("/")[-1] if resp: cdheader = resp.headers.get("Content-Disposition", None) if cdheader: value, params = parse_header(cdheader) name = params.get("filename", url_filename) else: name = url_filename else: name = url_filename return pathlib.Path(path) / name def run_task_in_thread(loop: asyncio.BaseEventLoop, coro: asyncio.Task) -> Any: """ This function returns the asyncio Future after running the loop in a thread. This makes the return value of this function the same as the return of ``loop.run_until_complete``. """ with ThreadPoolExecutor(max_workers=1) as aio_pool: try: future = aio_pool.submit(loop.run_until_complete, coro) except KeyboardInterrupt: future.cancel() return future.result() async def get_ftp_size(client: "aioftp.Client", filepath: os.PathLike) -> int: """ Given an `aioftp.ClientSession` object get the expected size of the file, return ``None`` if the size can not be determined. """ try: size = await client.stat(filepath) size = size.get("size", None) except Exception: # noqa BLE001 parfive.log.info("Failed to get size of FTP file", exc_info=True) size = None return int(size) if size else size def get_http_size(resp: aiohttp.ClientResponse) -> Union[int, str, None]: size = resp.headers.get("content-length", None) return int(size) if size else size def replacement_filename(path: os.PathLike) -> Path: # type: ignore[return] """ Given a path generate a unique filename. """ path = pathlib.Path(path) if not path.exists(): return path suffix = "".join(path.suffixes) for c in count(start=1): if suffix: name, _ = path.name.split(suffix) else: name = path.name new_name = f"{name}.{c}{suffix}" new_path = path.parent / new_name if not new_path.exists(): return new_path def get_filepath(filepath: os.PathLike, overwrite: Union[bool, Literal["unique"]]) -> tuple[Path, bool]: """ Get the filepath to download to and ensure dir exists. Returns ------- `pathlib.Path`, `bool` """ filepath = pathlib.Path(filepath) if filepath.exists(): if not overwrite: return filepath, True if overwrite == "unique": filepath = replacement_filename(filepath) if not filepath.parent.exists(): filepath.parent.mkdir(parents=True) return filepath, False class MultiPartDownloadError(Exception): def __init__(self, response: aiohttp.ClientResponse) -> None: self.response = response class FailedDownload(Exception): def __init__(self, filepath_partial: Union[Path, Callable], url: str, exception: BaseException) -> None: self.filepath_partial = filepath_partial self.url = url self.exception = exception super().__init__() def __repr__(self) -> str: out = super().__repr__() out += f"\n {self.url} {self.exception}" return out def __str__(self) -> str: return f"Download Failed: {self.url} with error {self.exception!s}" class ChecksumMismatch(Exception): """Used when a checksum doesn't match.""" class Token: def __init__(self, n: int) -> None: self.n = n def __repr__(self) -> str: return super().__repr__() + f"n = {self.n}" def __str__(self) -> str: return f"Token {self.n}" _T = TypeVar("_T") class _QueueList(list[_T]): """ A list, with an extra method that empties the list and puts it into a `asyncio.Queue`. Creating the queue can only be done inside a running asyncio loop. """ def generate_queue(self, maxsize: int = 0) -> asyncio.Queue: queue: asyncio.Queue = asyncio.Queue(maxsize=maxsize) for item in self: queue.put_nowait(item) self.clear() return queue class ParfiveUserWarning(UserWarning): """ Raised for not-quite errors. """ class ParfiveFutureWarning(FutureWarning): """ Raised for future changes to the parfive API. """ def remove_file(filepath: os.PathLike) -> None: """ Remove the file from the disk, if it exists """ filepath = Path(filepath) try: filepath.unlink(missing_ok=True) except Exception as remove_exception: # noqa BLE001 warnings.warn( f"Failed to delete possibly incomplete file {filepath} {remove_exception}", ParfiveUserWarning, ) async def cancel_task(task: asyncio.Task) -> bool: """ Call cancel on a task and then wait for it to exit. Return True if the task was cancelled, False otherwise. """ task.cancel() try: await task except asyncio.CancelledError: return True return task.cancelled() try: # Python 3.11 added file_digest from hashlib import file_digest except ImportError: import hashlib # Copied from the stdlib @typing.no_type_check def file_digest(fileobj, digest, /, *, _bufsize=2**18): """ Hash the contents of a file-like object. Returns a digest object. *fileobj* must be a file-like object opened for reading in binary mode. It accepts file objects from open(), io.BytesIO(), and SocketIO objects. The function may bypass Python's I/O and use the file descriptor *fileno* directly. *digest* must either be a hash algorithm name as a *str*, a hash constructor, or a callable that returns a hash object. """ # On Linux we could use AF_ALG sockets and sendfile() to archive zero-copy # hashing with hardware acceleration. if isinstance(digest, str): digestobj = hashlib.new(digest) else: digestobj = digest() if hasattr(fileobj, "getbuffer"): # io.BytesIO object, use zero-copy buffer digestobj.update(fileobj.getbuffer()) return digestobj # Only binary files implement readinto(). if not (hasattr(fileobj, "readinto") and hasattr(fileobj, "readable") and fileobj.readable()): raise ValueError(f"'{fileobj!r}' is not a file-like object in binary reading mode.") # binary file, socket.SocketIO object # Note: socket I/O uses different syscalls than file I/O. buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. view = memoryview(buf) while True: size = fileobj.readinto(buf) if size is None: raise BlockingIOError("I/O operation would block.") if size == 0: break # EOF digestobj.update(view[:size]) return digestobj def validate_checksum_format(checksum: str) -> tuple[str, str]: if "=" not in checksum: raise ValueError(f"checksum '{checksum}' should be of the format =") chk_alg, checksum = checksum.split("=") # Normalise the algorithm name to not have "-" as that might have wider support chk_alg = chk_alg.replace("-", "") if chk_alg not in hashlib.algorithms_available: raise ValueError(f"checksum type '{chk_alg}' is not supported.") return chk_alg, checksum def check_file_hash(fileobj: io.BufferedReader, checksum: str, accept_invalid_checksum: bool = False) -> bool: """ Verify the contents of fileobj match the checksum provided by ``checksum``. """ try: chk_alg, checksum = validate_checksum_format(checksum) except ValueError as e: if not accept_invalid_checksum: raise parfive.log.error("Got invalid checksum: %s", e) # Allow invalid checksums to match return True computed_file_hash = file_digest(fileobj, chk_alg).hexdigest() return computed_file_hash == checksum @asynccontextmanager async def session_head_or_get(session: aiohttp.ClientSession, url: str, **kwargs: dict) -> AsyncIterator: """ Try and make a HEAD request to the resource and fallback to a get request if that fails. """ async with session.head(url, **kwargs) as resp: if resp.status == 200: yield resp return async with session.get(url, **kwargs) as resp: yield resp ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1746550323.935221 parfive-2.2.0/parfive.egg-info/0000755000175100001660000000000015006437064015705 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550323.0 parfive-2.2.0/parfive.egg-info/PKG-INFO0000644000175100001660000001272415006437063017007 0ustar00runnerdockerMetadata-Version: 2.4 Name: parfive Version: 2.2.0 Summary: A HTTP and FTP parallel file downloader. Author-email: Stuart Mumford License: Copyright (c) 2017-2020 Stuart Mumford Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Project-URL: repository, https://parfive.readthedocs.io/ Classifier: Development Status :: 5 - Production/Stable Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 Requires-Python: >=3.9 Description-Content-Type: text/x-rst License-File: LICENSE Requires-Dist: tqdm>=4.27.0 Requires-Dist: aiohttp Provides-Extra: ftp Requires-Dist: aioftp>=0.17.1; extra == "ftp" Provides-Extra: tests Requires-Dist: pytest; extra == "tests" Requires-Dist: pytest-localserver; extra == "tests" Requires-Dist: pytest-asyncio; extra == "tests" Requires-Dist: pytest-socket; extra == "tests" Requires-Dist: pytest-cov; extra == "tests" Requires-Dist: aiofiles; extra == "tests" Provides-Extra: docs Requires-Dist: sphinx; extra == "docs" Requires-Dist: sphinx-automodapi; extra == "docs" Requires-Dist: sphinx-autodoc-typehints; extra == "docs" Requires-Dist: sphinx-contributors; extra == "docs" Requires-Dist: sphinx-book-theme; extra == "docs" Dynamic: license-file Parfive ======= .. image:: https://img.shields.io/pypi/v/parfive.svg :target: https://pypi.python.org/pypi/parfive :alt: Latest PyPI version A parallel file downloader using asyncio. parfive can handle downloading multiple files in parallel as well as downloading each file in a number of chunks. Usage ----- .. image:: https://asciinema.org/a/EuALahgkiicWHGmrfFsZSLz81.svg :alt: asciicast demo of parfive :target: https://asciinema.org/a/EuALahgkiicWHGmrfFsZSLz81 parfive works by creating a downloader object, appending files to it and then running the download. parfive has a synchronous API, but uses asyncio to paralellise downloading the files. A simple example is:: from parfive import Downloader dl = Downloader() dl.enqueue_file("http://data.sunpy.org/sample-data/predicted-sunspot-radio-flux.txt", path="./") files = dl.download() Parfive also bundles a CLI. The following example will download the two files concurrently.:: $ parfive 'http://212.183.159.230/5MB.zip' 'http://212.183.159.230/10MB.zip' $ parfive --help usage: parfive [-h] [--max-conn MAX_CONN] [--overwrite] [--no-file-progress] [--directory DIRECTORY] [--print-filenames] URLS [URLS ...] Parfive, the python asyncio based downloader positional arguments: URLS URLs of files to be downloaded. optional arguments: -h, --help show this help message and exit --max-conn MAX_CONN Number of maximum connections. --overwrite Overwrite if the file exists. --no-file-progress Show progress bar for each file. --directory DIRECTORY Directory to which downloaded files are saved. --print-filenames Print successfully downloaded files's names to stdout. Results ^^^^^^^ ``parfive.Downloader.download`` returns a ``parfive.Results`` object, which is a list of the filenames that have been downloaded. It also tracks any files which failed to download. Handling Errors ^^^^^^^^^^^^^^^ If files fail to download, the urls and the response from the server are stored in the ``Results`` object returned by ``parfive.Downloader``. These can be used to inform users about the errors. (Note, the progress bar will finish in an incomplete state if a download fails, i.e. it will show ``4/5 Files Downloaded``). The ``Results`` object is a list with an extra attribute ``errors``, this property returns a list of named tuples, where these named tuples contains the ``.url`` and the ``.response``, which is a ``aiohttp.ClientResponse`` or a ``aiohttp.ClientError`` object. Installation ------------ parfive is available on PyPI, you can install it with pip:: pip install parfive or if you want to use FTP downloads:: pip install parfive[ftp] Requirements ^^^^^^^^^^^^ - Python 3.9 or above - aiohttp - tqdm - aioftp (for downloads over FTP) Licence ------- MIT Licensed Authors ------- `parfive` was written by `Stuart Mumford `__. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550323.0 parfive-2.2.0/parfive.egg-info/SOURCES.txt0000644000175100001660000000200415006437063017564 0ustar00runnerdocker.codecov.yaml .coveragerc .gitignore .pre-commit-config.yaml .readthedocs.yml .ruff.toml LICENSE README.rst mypy.ini pyproject.toml pytest.ini setup.py tox.ini .github/FUNDING.yml .github/release-drafter.yml .github/workflows/ci_workflows.yml .github/workflows/release-drafter.yml docs/Makefile docs/conf.py docs/index.rst docs/robots.txt docs/static/css/contributors.css parfive/__init__.py parfive/_version.py parfive/config.py parfive/conftest.py parfive/downloader.py parfive/main.py parfive/results.py parfive/utils.py parfive.egg-info/PKG-INFO parfive.egg-info/SOURCES.txt parfive.egg-info/dependency_links.txt parfive.egg-info/entry_points.txt parfive.egg-info/not-zip-safe parfive.egg-info/requires.txt parfive.egg-info/top_level.txt parfive/tests/__init__.py parfive/tests/localserver.py parfive/tests/simple_download_test.ipynb parfive/tests/test_aiofiles.py parfive/tests/test_config.py parfive/tests/test_downloader.py parfive/tests/test_downloader_multipart.py parfive/tests/test_main.py parfive/tests/test_utils.py././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550323.0 parfive-2.2.0/parfive.egg-info/dependency_links.txt0000644000175100001660000000000115006437063021752 0ustar00runnerdocker ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550323.0 parfive-2.2.0/parfive.egg-info/entry_points.txt0000644000175100001660000000005615006437063021203 0ustar00runnerdocker[console_scripts] parfive = parfive.main:main ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550323.0 parfive-2.2.0/parfive.egg-info/not-zip-safe0000644000175100001660000000000115006437063020132 0ustar00runnerdocker ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550323.0 parfive-2.2.0/parfive.egg-info/requires.txt0000644000175100001660000000033715006437063020307 0ustar00runnerdockertqdm>=4.27.0 aiohttp [docs] sphinx sphinx-automodapi sphinx-autodoc-typehints sphinx-contributors sphinx-book-theme [ftp] aioftp>=0.17.1 [tests] pytest pytest-localserver pytest-asyncio pytest-socket pytest-cov aiofiles ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550323.0 parfive-2.2.0/parfive.egg-info/top_level.txt0000644000175100001660000000001015006437063020425 0ustar00runnerdockerparfive ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/pyproject.toml0000644000175100001660000000263315006437052015474 0ustar00runnerdocker[build-system] requires = [ "setuptools>=62.1", "setuptools_scm[toml]>=8.0.0", "wheel", ] build-backend = "setuptools.build_meta" [project] name = "parfive" description = "A HTTP and FTP parallel file downloader." requires-python = ">=3.9" readme = { file = "README.rst", content-type = "text/x-rst" } license = { file = "LICENSE" } authors = [ { name = "Stuart Mumford", email = "stuart@cadair.com" }, ] dependencies = [ "tqdm >=4.27.0", "aiohttp", ] dynamic = ["version"] classifiers = [ "Development Status :: 5 - Production/Stable", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] [project.scripts] parfive = "parfive.main:main" [project.optional-dependencies] ftp = [ "aioftp >=0.17.1", ] tests = [ "pytest", "pytest-localserver", "pytest-asyncio", "pytest-socket", "pytest-cov", "aiofiles", ] docs = [ "sphinx", "sphinx-automodapi", "sphinx-autodoc-typehints", "sphinx-contributors", "sphinx-book-theme", ] [project.urls] repository = "https://parfive.readthedocs.io/" [tool.setuptools] zip-safe = false include-package-data = true [tool.setuptools.packages.find] include = ["parfive*"] [tool.setuptools_scm] version_file = "parfive/_version.py" ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/pytest.ini0000644000175100001660000000070515006437052014607 0ustar00runnerdocker[pytest] minversion = 7.0 testpaths = parfive docs asyncio_mode = strict asyncio_default_fixture_loop_scope = function addopts = --allow-hosts=127.0.0.1,::1 filterwarnings = # Turn all warnings into errors so they do not pass silently. error # Do not fail on pytest config issues (i.e. missing plugins) but do show them always::pytest.PytestConfigWarning # TODO: fixme always::pytest.PytestUnraisableExceptionWarning ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1746550323.936221 parfive-2.2.0/setup.cfg0000644000175100001660000000004615006437064014400 0ustar00runnerdocker[egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/setup.py0000644000175100001660000000007415006437052014267 0ustar00runnerdocker#!/usr/bin/env python from setuptools import setup setup() ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1746550314.0 parfive-2.2.0/tox.ini0000644000175100001660000000362015006437052014070 0ustar00runnerdocker[tox] envlist = py{39,310,311,312} py311-conda py312-devdeps codestyle build_docs isolated_build = True [testenv] setenv = PYTEST_COMMAND = pytest -vvv -s -raR --pyargs parfive --cov-report=xml --cov=parfive --cov-config={toxinidir}/.coveragerc {toxinidir}/docs {posargs} pass_env = # A variable to tell tests we are on a CI system CI # Custom compiler locations (such as ccache) CC # Location of locales (needed by sphinx on some systems) LOCALE_ARCHIVE # If the user has set a LC override we should follow it LC_ALL extras = ftp tests deps = devdeps: aiohttp>=0.0.dev0 commands = pip freeze --all --no-input {env:PYTEST_COMMAND} [testenv:build_docs] changedir = docs description = Invoke sphinx-build to build the HTML docs # Be verbose about the extras rather than using dev for clarity extras = ftp docs commands = sphinx-build --color -W --keep-going -b html -d _build/.doctrees . _build/html {posargs} python -c 'import pathlib; print("Documentation available under file://\{0\}".format(pathlib.Path(r"{toxinidir}") / "docs" / "_build" / "index.html"))' [testenv:codestyle] skip_install = true description = Run all style and file checks with pre-commit deps = pre-commit commands = pre-commit install-hooks pre-commit run --color always --all-files --show-diff-on-failure [testenv:mypy] skip_install = true description = Run mypy deps = mypy types-aiofiles pydantic commands = mypy -p parfive # This env requires tox-conda. [testenv:py{38,39,310,311,312}-conda] extras = deps = conda_deps = # core deps tqdm aiohttp # ftp aioftp # tests pytest pytest-localserver pytest-asyncio pytest-socket pytest-cov aiofiles conda_channels = conda-forge install_command = pip install --no-deps {opts} {packages} commands = conda list {env:PYTEST_COMMAND} {posargs}