pax_global_header00006660000000000000000000000064151100705570014513gustar00rootroot0000000000000052 comment=7d8105fbc494060c4faf8e633cc80e71511582e3 zarr-python-3.1.5/000077500000000000000000000000001511007055700140165ustar00rootroot00000000000000zarr-python-3.1.5/.git-blame-ignore-revs000066400000000000000000000002321511007055700201130ustar00rootroot00000000000000# lint codebase with black and ruff 4e348d6b80c96da461fd866576c971b8a659ba15 # migrate from black to ruff format 22cea005629913208a85799372e045f353744add zarr-python-3.1.5/.git_archival.txt000066400000000000000000000002171511007055700172710ustar00rootroot00000000000000node: 7d8105fbc494060c4faf8e633cc80e71511582e3 node-date: 2025-11-21T15:04:31+01:00 describe-name: v3.1.5 ref-names: HEAD -> main, tag: v3.1.5 zarr-python-3.1.5/.gitattributes000066400000000000000000000001341511007055700167070ustar00rootroot00000000000000*.py linguist-language=python *.ipynb linguist-documentation .git_archival.txt export-subst zarr-python-3.1.5/.github/000077500000000000000000000000001511007055700153565ustar00rootroot00000000000000zarr-python-3.1.5/.github/CODEOWNERS000066400000000000000000000000701511007055700167460ustar00rootroot00000000000000zarr/_storage/absstore.py @zarr-developers/azure-team zarr-python-3.1.5/.github/CONTRIBUTING.md000066400000000000000000000002621511007055700176070ustar00rootroot00000000000000Contributing ============ Please see the [project documentation](https://zarr.readthedocs.io/en/stable/developers/contributing.html) for information about contributing to Zarr. zarr-python-3.1.5/.github/ISSUE_TEMPLATE/000077500000000000000000000000001511007055700175415ustar00rootroot00000000000000zarr-python-3.1.5/.github/ISSUE_TEMPLATE/bug_report.yml000066400000000000000000000051331511007055700224360ustar00rootroot00000000000000name: Bug Report description: Report incorrect behaviour in the library. labels: ["bug"] body: - type: markdown attributes: value: | Please provide the following information. - type: input id: Zarr-version attributes: label: Zarr version description: Value of ``zarr.__version__`` placeholder: v2.10.2, v2.11.3, v2.12.0, etc. validations: required: true - type: input id: Numcodecs-version attributes: label: Numcodecs version description: Value of ``numcodecs.__version__`` placeholder: v0.8.1, v0.9.0, v0.10.0, etc. validations: required: true - type: input id: Python-version attributes: label: Python Version description: Version of Python interpreter placeholder: 3.10, 3.11, 3.12 etc. validations: required: true - type: input id: OS attributes: label: Operating System description: Operating System placeholder: (Linux/Windows/Mac) validations: required: true - type: input id: installation attributes: label: Installation description: How was Zarr installed? placeholder: e.g., "using pip into virtual environment", or "using conda" validations: required: true - type: textarea id: description attributes: label: Description description: Explain why the current behavior is a problem, what the expected output/behaviour is, and why the expected output/behaviour is a better solution. validations: required: true - type: textarea id: reproduce attributes: label: Steps to reproduce description: Minimal, reproducible code sample. Must list dependencies in [inline script metadata](https://packaging.python.org/en/latest/specifications/inline-script-metadata/#example). When put in a file named `issue.py` calling `uv run issue.py` should show the issue. value: | ```python # /// script # requires-python = ">=3.11" # dependencies = [ # "zarr@git+https://github.com/zarr-developers/zarr-python.git@main", # ] # /// # # This script automatically imports the development branch of zarr to check for issues import zarr # your reproducer code # zarr.print_debug_info() ``` validations: required: true - type: textarea id: additional-output attributes: label: Additional output description: If you think it might be relevant, please provide the output from ``pip freeze`` or ``conda env export`` depending on which was used to install Zarr. zarr-python-3.1.5/.github/ISSUE_TEMPLATE/config.yml000066400000000000000000000012221511007055700215260ustar00rootroot00000000000000blank_issues_enabled: true contact_links: - name: Propose a new Zarr specification feature url: https://github.com/zarr-developers/zarr-specs about: A new feature for the Zarr storage specification should be opened on the zarr-specs repository. - name: Discuss something on ZulipChat url: https://ossci.zulipchat.com/ about: For questions like "How do I do X with Zarr?", consider posting your question to our developer chat. - name: Discuss something on GitHub Discussions url: https://github.com/zarr-developers/zarr-python/discussions about: For questions like "How do I do X with Zarr?", you can move to GitHub Discussions. zarr-python-3.1.5/.github/ISSUE_TEMPLATE/documentation.yml000066400000000000000000000012021511007055700231300ustar00rootroot00000000000000name: Documentation Improvement description: Report missing or wrong documentation. Alternatively, you can just open a pull request with the suggested change. title: "DOC: " labels: [documentation, help wanted] body: - type: textarea attributes: label: Describe the issue linked to the documentation description: > Please provide a description of what documentation you believe needs to be fixed/improved. validations: required: true - type: textarea attributes: label: Suggested fix for documentation description: > Please explain the suggested fix and why it's better than the existing documentation. zarr-python-3.1.5/.github/ISSUE_TEMPLATE/feature_request.yml000066400000000000000000000005111511007055700234640ustar00rootroot00000000000000name: Feature Request description: Request a new feature for zarr-python # labels: [] body: - type: textarea attributes: label: Describe the new feature you'd like description: > Please provide a description of what new feature or functionality you'd like to see in zarr-python. validations: required: true zarr-python-3.1.5/.github/ISSUE_TEMPLATE/release-checklist.md000066400000000000000000000077731511007055700234700ustar00rootroot00000000000000--- name: Zarr-Python release checklist about: Checklist for a new Zarr-Python release. [For project maintainers only!] title: Release Zarr-Python vX.Y.Z labels: release-checklist assignees: '' --- **Release**: [v3.x.x](https://github.com/zarr-developers/zarr-python/milestones/?) **Scheduled Date**: 20YY/MM/DD **Priority PRs/issues to complete prior to release** - [ ] Priority pull request #X **Before release**: - [ ] Make sure the release branch (e.g., `3.1.x`) is up to date with any backports. - [ ] Make sure that all pull requests which will be included in the release have been properly documented as changelog files in the [`changes/` directory](https://github.com/zarr-developers/zarr-python/tree/main/changes). - [ ] Run ``towncrier build --version x.y.z`` to create the changelog, and commit the result to the release branch. - [ ] Check [SPEC 0](https://scientific-python.org/specs/spec-0000/#support-window) to see if the minimum supported version of Python or NumPy needs bumping. - [ ] Check to ensure that: - [ ] Deprecated workarounds/codes/tests are removed. Run `grep "# TODO" **/*.py` to find all potential TODOs. - [ ] All tests pass in the ["Tests" workflow](https://github.com/zarr-developers/zarr-python/actions/workflows/test.yml). - [ ] All tests pass in the ["GPU Tests" workflow](https://github.com/zarr-developers/zarr-python/actions/workflows/gpu_test.yml). - [ ] All tests pass in the ["Hypothesis" workflow](https://github.com/zarr-developers/zarr-python/actions/workflows/hypothesis.yaml). - [ ] Check that downstream libraries work well (maintainers can make executive decisions about whether all checks are required for this release). - [ ] numcodecs - [ ] Xarray (@jhamman @dcherian @TomNicholas) - Zarr's upstream compatibility is tested via the [Upstream Dev CI worklow](https://github.com/pydata/xarray/actions/workflows/upstream-dev-ci.yaml). - Click on the most recent workflow and check that the `upstream-dev` job has run and passed. `upstream-dev` is not run on all all workflow runs. - Check that the expected version of Zarr-Python was tested using the `Version Info` step of the `upstream-dev` job. - If testing on a branch other than `main` is needed, open a PR modifying https://github.com/pydata/xarray/blob/90ee30943aedba66a37856b2332a41264e288c20/ci/install-upstream-wheels.sh#L56 and add the `run-upstream` label. - [ ] Titiler.Xarray (@maxrjones) - [Modify dependencies](https://github.com/developmentseed/titiler/blob/main/src/titiler/xarray/pyproject.toml) for titiler.xarray. - Modify triggers for running [the test workflow](https://github.com/developmentseed/titiler/blob/61549f2de07b20cca8fb991cfcdc89b23e18ad05/.github/workflows/ci.yml#L5-L7). - Push the branch to the repository and check for the actions for any failures. **Release**: - [ ] Go to https://github.com/zarr-developers/zarr-python/releases. - [ ] Click "Draft a new release". - [ ] Choose a version number prefixed with a `v` (e.g. `v0.0.0`). For pre-releases, include the appropriate suffix (e.g. `v0.0.0a1` or `v0.0.0rc2`). - [ ] Set the target branch to the release branch (e.g., `3.1.x`) - [ ] Set the description of the release to: `See release notes https://zarr.readthedocs.io/en/stable/release-notes.html#release-0-0-0`, replacing the correct version numbers. For pre-release versions, the URL should omit the pre-release suffix, e.g. "a1" or "rc1". - [ ] Click on "Generate release notes" to auto-fill the description. - [ ] Make a release by clicking the 'Publish Release' button, this will automatically create a tag too. - [ ] Verify that release workflows succeeded. - [ ] The latest version is correct on [PyPI](https://pypi.org/project/zarr/). - [ ] The stable version is correct on [ReadTheDocs](https://zarr.readthedocs.io/en/stable/). **After release**: - [ ] Review and merge the pull request on the conda-forge [zarr-feedstock](https://github.com/conda-forge/zarr-feedstock) that will be automatically generated. --- - [ ] Party :tada: zarr-python-3.1.5/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000005571511007055700211660ustar00rootroot00000000000000[Description of PR] TODO: * [ ] Add unit tests and/or doctests in docstrings * [ ] Add docstrings and API docs for any new/modified user-facing classes and functions * [ ] New/modified features documented in `docs/user-guide/*.md` * [ ] Changes documented as a new file in `changes/` * [ ] GitHub Actions have all passed * [ ] Test coverage is 100% (Codecov passes) zarr-python-3.1.5/.github/dependabot.yml000066400000000000000000000011471511007055700202110ustar00rootroot00000000000000--- version: 2 updates: # Updates for main - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" groups: actions: patterns: - "*" # Updates for support/v2 branch - package-ecosystem: "pip" directory: "/" target-branch: "support/v2" schedule: interval: "weekly" groups: requirements: patterns: - "*" - package-ecosystem: "github-actions" directory: "/" target-branch: "support/v2" schedule: interval: "weekly" groups: actions: patterns: - "*" zarr-python-3.1.5/.github/labeler.yml000066400000000000000000000001431511007055700175050ustar00rootroot00000000000000needs release notes: - all: - changed-files: - all-globs-to-all-files: '!changes/*.md' zarr-python-3.1.5/.github/workflows/000077500000000000000000000000001511007055700174135ustar00rootroot00000000000000zarr-python-3.1.5/.github/workflows/check_changelogs.yml000066400000000000000000000007001511007055700234020ustar00rootroot00000000000000name: Check changelog entries on: pull_request: jobs: check-changelogs: name: Check changelog entries runs-on: ubuntu-latest steps: - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Install uv uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2 - name: Check changelog entries run: uv run --no-sync python ci/check_changelog_entries.py zarr-python-3.1.5/.github/workflows/gpu_test.yml000066400000000000000000000046211511007055700217730ustar00rootroot00000000000000# This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: GPU Test on: push: branches: [ main, 3.1.x ] pull_request: branches: [ main, 3.1.x ] workflow_dispatch: env: LD_LIBRARY_PATH: /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64 concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: test: name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }} runs-on: gpu-runner strategy: matrix: python-version: ['3.11'] numpy-version: ['2.2'] dependency-set: ["minimal"] steps: - uses: actions/checkout@v5 with: fetch-depth: 0 # grab all branches and tags # - name: cuda-toolkit # uses: Jimver/cuda-toolkit@v0.2.16 # id: cuda-toolkit # with: # cuda: '12.4.1' - name: Set up CUDA run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update sudo apt-get -y install cuda-toolkit-12-6 echo "/usr/local/cuda/bin" >> $GITHUB_PATH - name: GPU check run: | nvidia-smi echo $PATH echo $LD_LIBRARY_PATH nvcc -V - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' - name: Install Hatch and CuPy run: | python -m pip install --upgrade pip pip install hatch - name: Set Up Hatch Env run: | hatch env create gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} hatch env run -e gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env - name: Run Tests run: | hatch env run --env gputest.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-coverage-gpu - name: Upload coverage uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3 # v5.3.1 with: token: ${{ secrets.CODECOV_TOKEN }} verbose: true # optional (default = false) zarr-python-3.1.5/.github/workflows/hypothesis.yaml000066400000000000000000000060461511007055700225040ustar00rootroot00000000000000name: Slow Hypothesis CI on: push: branches: [main, 3.1.x] pull_request: branches: [main, 3.1.x] types: [opened, reopened, synchronize, labeled] schedule: - cron: "0 0 * * *" # Daily “At 00:00” UTC workflow_dispatch: # allows you to trigger manually env: FORCE_COLOR: 3 jobs: hypothesis: name: Slow Hypothesis Tests runs-on: "ubuntu-latest" defaults: run: shell: bash -l {0} strategy: matrix: python-version: ['3.12'] numpy-version: ['2.2'] dependency-set: ["optional"] steps: - uses: actions/checkout@v5 - name: Set HYPOTHESIS_PROFILE based on trigger run: | if [[ "${{ github.event_name }}" == "schedule" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then echo "HYPOTHESIS_PROFILE=nightly" >> $GITHUB_ENV else echo "HYPOTHESIS_PROFILE=ci" >> $GITHUB_ENV fi - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' - name: Install Hatch run: | python -m pip install --upgrade pip pip install hatch - name: Set Up Hatch Env run: | hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env # https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache - name: Restore cached hypothesis directory id: restore-hypothesis-cache uses: actions/cache/restore@v4 with: path: .hypothesis/ key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }} restore-keys: | cache-hypothesis- - name: Run slow Hypothesis tests if: success() id: status run: | echo "Using Hypothesis profile: $HYPOTHESIS_PROFILE" hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-hypothesis # explicitly save the cache so it gets updated, also do this even if it fails. - name: Save cached hypothesis directory id: save-hypothesis-cache if: always() && steps.status.outcome != 'skipped' uses: actions/cache/save@v4 with: path: .hypothesis/ key: cache-hypothesis-${{ runner.os }}-${{ github.run_id }} - name: Upload coverage uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} verbose: true # optional (default = false) - name: Generate and publish the report if: | failure() && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository_owner == 'zarr-developers' uses: scientific-python/issue-from-pytest-log-action@v1 with: log-path: output-${{ matrix.python-version }}-log.jsonl issue-title: "Nightly Hypothesis tests failed" issue-label: "topic-hypothesis" zarr-python-3.1.5/.github/workflows/issue-metrics.yml000066400000000000000000000022431511007055700227330ustar00rootroot00000000000000name: Monthly issue metrics on: workflow_dispatch: schedule: - cron: '3 2 1 * *' permissions: contents: read jobs: build: name: issue metrics runs-on: ubuntu-latest permissions: issues: write pull-requests: read steps: - name: Get dates for last month shell: bash run: | # Calculate the first day of the previous month first_day=$(date -d "last month" +%Y-%m-01) # Calculate the last day of the previous month last_day=$(date -d "$first_day +1 month -1 day" +%Y-%m-%d) #Set an environment variable with the date range echo "$first_day..$last_day" echo "last_month=$first_day..$last_day" >> "$GITHUB_ENV" - name: Run issue-metrics tool uses: github/issue-metrics@v3 env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} SEARCH_QUERY: 'repo:zarr-developers/zarr-python is:issue created:${{ env.last_month }} -reason:"not planned"' - name: Create issue uses: peter-evans/create-issue-from-file@v6 with: title: Monthly issue metrics report token: ${{ secrets.GITHUB_TOKEN }} content-filepath: ./issue_metrics.md zarr-python-3.1.5/.github/workflows/needs_release_notes.yml000066400000000000000000000007501511007055700241460ustar00rootroot00000000000000name: "Pull Request Labeler" on: - pull_request_target jobs: labeler: if: ${{ github.event.pull_request.user.login != 'dependabot[bot]' }} && ${{ github.event.pull_request.user.login != 'pre-commit-ci[bot]' }} permissions: contents: read pull-requests: write runs-on: ubuntu-latest steps: - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1 with: repo-token: ${{ secrets.GITHUB_TOKEN }} sync-labels: true zarr-python-3.1.5/.github/workflows/nightly_wheels.yml000066400000000000000000000016001511007055700231600ustar00rootroot00000000000000name: Nightly Wheels on: schedule: # Run nightly at 2 AM UTC - cron: '0 2 * * *' workflow_dispatch: jobs: build_and_upload_nightly: name: Build and upload nightly wheels runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 with: submodules: true fetch-depth: 0 - uses: actions/setup-python@v6 name: Install Python with: python-version: '3.13' - name: Install build dependencies run: | python -m pip install --upgrade pip pip install hatch - name: Build wheel and sdist run: hatch build - name: Upload nightly wheels uses: scientific-python/upload-nightly-action@b36e8c0c10dbcfd2e05bf95f17ef8c14fd708dbf with: artifacts_path: dist anaconda_nightly_upload_token: ${{ secrets.ANACONDA_ORG_UPLOAD_TOKEN }} zarr-python-3.1.5/.github/workflows/releases.yml000066400000000000000000000026001511007055700217370ustar00rootroot00000000000000name: Wheels on: [push, pull_request] jobs: build_artifacts: name: Build wheel on ubuntu-latest runs-on: ubuntu-latest strategy: fail-fast: false steps: - uses: actions/checkout@v5 with: submodules: true fetch-depth: 0 - uses: actions/setup-python@v6 name: Install Python with: python-version: '3.11' - name: Install PyBuild run: | python -m pip install --upgrade pip pip install hatch - name: Build wheel and sdist run: hatch build - uses: actions/upload-artifact@v5 with: name: releases path: dist test_dist_pypi: needs: [build_artifacts] runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v6 with: name: releases path: dist - name: test run: | ls ls dist upload_pypi: needs: [build_artifacts] runs-on: ubuntu-latest if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') steps: - uses: actions/download-artifact@v6 with: name: releases path: dist - uses: pypa/gh-action-pypi-publish@v1.13.0 with: user: __token__ password: ${{ secrets.pypi_password }} # To test: repository_url: https://test.pypi.org/legacy/ zarr-python-3.1.5/.github/workflows/test.yml000066400000000000000000000111061511007055700211140ustar00rootroot00000000000000# This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Test on: push: branches: [ main, 3.1.x ] pull_request: branches: [ main, 3.1.x ] workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: test: name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }} strategy: matrix: python-version: ['3.11', '3.12', '3.13'] numpy-version: ['1.26', '2.2'] dependency-set: ["minimal", "optional"] os: ["ubuntu-latest"] include: - python-version: '3.11' numpy-version: '1.26' dependency-set: 'optional' os: 'macos-latest' - python-version: '3.13' numpy-version: '2.2' dependency-set: 'optional' os: 'macos-latest' - python-version: '3.11' numpy-version: '1.26' dependency-set: 'optional' os: 'windows-latest' - python-version: '3.13' numpy-version: '2.2' dependency-set: 'optional' os: 'windows-latest' runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v5 with: fetch-depth: 0 # grab all branches and tags - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' - name: Install Hatch run: | python -m pip install --upgrade pip pip install hatch - name: Set Up Hatch Env run: | hatch env create test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} hatch env run -e test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} list-env - name: Run Tests env: HYPOTHESIS_PROFILE: ci run: | hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run-coverage - name: Upload coverage if: ${{ matrix.dependency-set == 'optional' && matrix.os == 'ubuntu-latest' }} uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} verbose: true # optional (default = false) test-upstream-and-min-deps: name: py=${{ matrix.python-version }}-${{ matrix.dependency-set }} runs-on: ubuntu-latest strategy: matrix: python-version: ['3.11', "3.13"] dependency-set: ["upstream", "min_deps"] exclude: - python-version: "3.13" dependency-set: min_deps - python-version: "3.11" dependency-set: upstream steps: - uses: actions/checkout@v5 with: fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' - name: Install Hatch run: | python -m pip install --upgrade pip pip install hatch - name: Set Up Hatch Env run: | hatch env create ${{ matrix.dependency-set }} hatch env run -e ${{ matrix.dependency-set }} list-env - name: Run Tests run: | hatch env run --env ${{ matrix.dependency-set }} run-coverage - name: Upload coverage uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} verbose: true # optional (default = false) doctests: name: doctests runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 with: fetch-depth: 0 # required for hatch version discovery, which is needed for numcodecs.zarr3 - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.13' cache: 'pip' - name: Install Hatch run: | python -m pip install --upgrade pip pip install hatch - name: Set Up Hatch Env run: | hatch run doctest:pip list - name: Run Tests run: | hatch run doctest:test test-complete: name: Test complete needs: [ test, test-upstream-and-min-deps, doctests ] if: always() runs-on: ubuntu-latest steps: - name: Check failure if: | contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') run: exit 1 - name: Success run: echo Success! zarr-python-3.1.5/.gitignore000066400000000000000000000017571511007055700160200ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] # C extensions *.so # Distribution / packaging .Python env/ .venv/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .coverage .coverage.* .cache coverage.xml *,cover # Translations *.mo *.pot # Django stuff: *.log # Documentation site/ docs/_build/ docs/data data data.zip # PyBuilder target/ # PyCharm .idea # Jupyter .ipynb_checkpoints/ # VCS versioning src/zarr/_version.py # emacs *~ # VSCode .vscode/ # test data #*.zarr #*.zip #example* #doesnotexist #test_sync* data/* src/fixture/ fixture/ junit.xml .DS_Store tests/.hypothesis .hypothesis/ zarr/version.py zarr.egg-info/ zarr-python-3.1.5/.pre-commit-config.yaml000066400000000000000000000032611511007055700203010ustar00rootroot00000000000000ci: autoupdate_commit_msg: "chore: update pre-commit hooks" autoupdate_schedule: "monthly" autofix_commit_msg: "style: pre-commit fixes" autofix_prs: false default_stages: [pre-commit, pre-push] repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.14.3 hooks: - id: ruff-check args: ["--fix", "--show-fixes"] - id: ruff-format - repo: https://github.com/codespell-project/codespell rev: v2.4.1 hooks: - id: codespell args: ["-L", "fo,ihs,kake,te", "-S", "fixture"] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: check-yaml exclude: mkdocs.yml - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.18.2 hooks: - id: mypy files: src|tests additional_dependencies: # Package dependencies - packaging - donfig - numcodecs - google-crc32c>=1.5 - numpy==2.1 # until https://github.com/numpy/numpy/issues/28034 is resolved - typing_extensions - universal-pathlib - obstore>=0.5.1 # Tests - pytest - hypothesis - s3fs - repo: https://github.com/scientific-python/cookie rev: 2025.10.20 hooks: - id: sp-repo-review - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 hooks: - id: rst-directive-colons - id: rst-inline-touching-normal - repo: https://github.com/numpy/numpydoc rev: v1.9.0 hooks: - id: numpydoc-validation - repo: https://github.com/twisted/towncrier rev: 25.8.0 hooks: - id: towncrier-check zarr-python-3.1.5/.pyup.yml000066400000000000000000000005131511007055700156130ustar00rootroot00000000000000# pyup.io config file # see https://pyup.io/docs/configuration/ for all available options schedule: every month requirements: - requirements_dev_minimal.txt: pin: True update: all - requirements_dev_numpy.txt: pin: True update: all - requirements_dev_optional.txt: pin: True update: all zarr-python-3.1.5/.readthedocs.yaml000066400000000000000000000007251511007055700172510ustar00rootroot00000000000000version: 2 build: os: ubuntu-22.04 tools: python: "3.12" jobs: pre_build: - | if [ "$READTHEDOCS_VERSION_TYPE" != "tag" ]; then towncrier build --version Unreleased --yes; fi build: html: - mkdocs build --strict --site-dir $READTHEDOCS_OUTPUT/html mkdocs: configuration: mkdocs.yml python: install: - method: pip path: . extra_requirements: - docs - remote zarr-python-3.1.5/FUNDING.yml000066400000000000000000000001031511007055700156250ustar00rootroot00000000000000github: [numfocus] custom: ['https://numfocus.org/donate-to-zarr'] zarr-python-3.1.5/LICENSE.txt000066400000000000000000000021441511007055700156420ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2015-2025 Zarr Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. zarr-python-3.1.5/README.md000066400000000000000000000144341511007055700153030ustar00rootroot00000000000000

# Zarr
Latest Release latest release
latest release
Package Status status
License license
Build Status build status
Pre-commit Status pre-commit status
Coverage coverage
Downloads pypi downloads
Developer Chat
Funding CZI's Essential Open Source Software for Science
Citation DOI
## What is it? Zarr is a Python package providing an implementation of compressed, chunked, N-dimensional arrays, designed for use in parallel computing. See the [documentation](https://zarr.readthedocs.io) for more information. ## Main Features - [**Create**](https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#creating-an-array) N-dimensional arrays with any NumPy `dtype`. - [**Chunk arrays**](https://zarr.readthedocs.io/en/stable/user-guide/performance.html#chunk-optimizations) along any dimension. - [**Compress**](https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#compressors) and/or filter chunks using any NumCodecs codec. - [**Store arrays**](https://zarr.readthedocs.io/en/stable/user-guide/storage.html) in memory, on disk, inside a zip file, on S3, etc... - [**Read**](https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#reading-and-writing-data) an array [**concurrently**](https://zarr.readthedocs.io/en/stable/user-guide/performance.html#parallel-computing-and-synchronization) from multiple threads or processes. - [**Write**](https://zarr.readthedocs.io/en/stable/user-guide/arrays.html#reading-and-writing-data) to an array concurrently from multiple threads or processes. - Organize arrays into hierarchies via [**groups**](https://zarr.readthedocs.io/en/stable/quickstart.html#hierarchical-groups). ## Where to get it Zarr can be installed from PyPI using `pip`: ```bash pip install zarr ``` or via `conda`: ```bash conda install -c conda-forge zarr ``` For more details, including how to install from source, see the [installation documentation](https://zarr.readthedocs.io/en/stable/index.html#installation). zarr-python-3.1.5/TEAM.md000066400000000000000000000013211511007055700150630ustar00rootroot00000000000000## Active core-developers - @joshmoore (Josh Moore) - @jni (Juan Nunez-Iglesias) - @rabernat (Ryan Abernathey) - @jhamman (Joe Hamman) - @d-v-b (Davis Bennett) - @jakirkham (jakirkham) - @martindurant (Martin Durant) - @normanrz (Norman Rzepka) - @dstansby (David Stansby) - @dcherian (Deepak Cherian) - @TomAugspurger (Tom Augspurger) ## Emeritus core-developers - @alimanfoo (Alistair Miles) - @shoyer (Stephan Hoyer) - @ryan-williams (Ryan Williams) - @jrbourbeau (James Bourbeau) - @mzjp2 (Zain Patel) - @grlee77 (Gregory Lee) ## Former core-developers - @jeromekelleher (Jerome Kelleher) - @tjcrone (Tim Crone) - @funkey (Jan Funke) - @shikharsg - @Carreau (Matthias Bussonnier) - @dazzag24 - @WardF (Ward Fisher) zarr-python-3.1.5/bench/000077500000000000000000000000001511007055700150755ustar00rootroot00000000000000zarr-python-3.1.5/bench/compress_normal.py000066400000000000000000000016551511007055700206610ustar00rootroot00000000000000import sys import timeit import blosc import line_profiler import numpy as np import zarr if __name__ == "__main__": sys.path.insert(0, "..") # setup a = np.random.normal(2000, 1000, size=200000000).astype("u2") z = zarr.empty_like( a, chunks=1000000, compression="blosc", compression_opts={"cname": "lz4", "clevel": 5, "shuffle": 2}, ) print(z) print("*" * 79) # time t = timeit.repeat("z[:] = a", repeat=10, number=1, globals=globals()) print(t) print(min(t)) print(z) # profile profile = line_profiler.LineProfiler(blosc.compress) profile.run("z[:] = a") profile.print_stats() print("*" * 79) # time t = timeit.repeat("z[:]", repeat=10, number=1, globals=globals()) print(t) print(min(t)) # profile profile = line_profiler.LineProfiler(blosc.decompress) profile.run("z[:]") profile.print_stats() zarr-python-3.1.5/bench/compress_normal.txt000066400000000000000000000252341511007055700210470ustar00rootroot00000000000000zarr.core.Array((200000000,), uint16, chunks=(1000000,), order=C) compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4', 'shuffle': 2} nbytes: 381.5M; nbytes_stored: 294; ratio: 1360544.2; initialized: 0/200 store: builtins.dict ******************************************************************************* [0.27119584499996563, 0.2855067059999783, 0.2887747180002407, 0.3058794240005227, 0.3139041080003153, 0.3021271820007314, 0.31543190899992624, 0.31403100900024583, 0.3272544129995367, 0.31834129100025166] 0.27119584499996563 zarr.core.Array((200000000,), uint16, chunks=(1000000,), order=C) compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4', 'shuffle': 2} nbytes: 381.5M; nbytes_stored: 314.1M; ratio: 1.2; initialized: 200/200 store: builtins.dict Timer unit: 1e-06 s Total time: 0.297223 s File: /home/aliman/code/github/alimanfoo/zarr/zarr/blosc.pyx Function: compress at line 137 Line # Hits Time Per Hit % Time Line Contents ============================================================== 137 def compress(source, char* cname, int clevel, int shuffle): 138 """Compress data in a numpy array. 139 140 Parameters 141 ---------- 142 source : array-like 143 Data to be compressed. 144 cname : bytes 145 Name of compression library to use. 146 clevel : int 147 Compression level. 148 shuffle : int 149 Shuffle filter. 150 151 Returns 152 ------- 153 dest : bytes-like 154 Compressed data. 155 156 """ 157 158 cdef: 159 char *source_ptr 160 char *dest_ptr 161 Py_buffer source_buffer 162 size_t nbytes, cbytes, itemsize 163 200 506 2.5 0.2 array.array char_array_template = array.array('b', []) 164 array.array dest 165 166 # setup source buffer 167 200 458 2.3 0.2 PyObject_GetBuffer(source, &source_buffer, PyBUF_ANY_CONTIGUOUS) 168 200 119 0.6 0.0 source_ptr = source_buffer.buf 169 170 # setup destination 171 200 239 1.2 0.1 nbytes = source_buffer.len 172 200 103 0.5 0.0 itemsize = source_buffer.itemsize 173 200 2286 11.4 0.8 dest = array.clone(char_array_template, nbytes + BLOSC_MAX_OVERHEAD, 174 zero=False) 175 200 129 0.6 0.0 dest_ptr = dest.data.as_voidptr 176 177 # perform compression 178 200 1734 8.7 0.6 if _get_use_threads(): 179 # allow blosc to use threads internally 180 200 167 0.8 0.1 compressor_set = blosc_set_compressor(cname) 181 200 94 0.5 0.0 if compressor_set < 0: 182 raise ValueError('compressor not supported: %r' % cname) 183 200 288570 1442.8 97.1 with nogil: 184 cbytes = blosc_compress(clevel, shuffle, itemsize, nbytes, 185 source_ptr, dest_ptr, 186 nbytes + BLOSC_MAX_OVERHEAD) 187 188 else: 189 with nogil: 190 cbytes = blosc_compress_ctx(clevel, shuffle, itemsize, nbytes, 191 source_ptr, dest_ptr, 192 nbytes + BLOSC_MAX_OVERHEAD, cname, 193 0, 1) 194 195 # release source buffer 196 200 616 3.1 0.2 PyBuffer_Release(&source_buffer) 197 198 # check compression was successful 199 200 120 0.6 0.0 if cbytes <= 0: 200 raise RuntimeError('error during blosc compression: %d' % cbytes) 201 202 # resize after compression 203 200 1896 9.5 0.6 array.resize(dest, cbytes) 204 205 200 186 0.9 0.1 return dest ******************************************************************************* [0.24293352799941204, 0.2324290420001489, 0.24935673900017719, 0.25716222699975333, 0.24246313799994823, 0.23272456500035332, 0.2636815870000646, 0.2576046349995522, 0.2781278639995435, 0.23824110699933954] 0.2324290420001489 Timer unit: 1e-06 s Total time: 0.240178 s File: /home/aliman/code/github/alimanfoo/zarr/zarr/blosc.pyx Function: decompress at line 75 Line # Hits Time Per Hit % Time Line Contents ============================================================== 75 def decompress(source, dest): 76 """Decompress data. 77 78 Parameters 79 ---------- 80 source : bytes-like 81 Compressed data, including blosc header. 82 dest : array-like 83 Object to decompress into. 84 85 Notes 86 ----- 87 Assumes that the size of the destination buffer is correct for the size of 88 the uncompressed data. 89 90 """ 91 cdef: 92 int ret 93 char *source_ptr 94 char *dest_ptr 95 Py_buffer source_buffer 96 array.array source_array 97 Py_buffer dest_buffer 98 size_t nbytes 99 100 # setup source buffer 101 200 573 2.9 0.2 if PY2 and isinstance(source, array.array): 102 # workaround fact that array.array does not support new-style buffer 103 # interface in PY2 104 release_source_buffer = False 105 source_array = source 106 source_ptr = source_array.data.as_voidptr 107 else: 108 200 112 0.6 0.0 release_source_buffer = True 109 200 144 0.7 0.1 PyObject_GetBuffer(source, &source_buffer, PyBUF_ANY_CONTIGUOUS) 110 200 98 0.5 0.0 source_ptr = source_buffer.buf 111 112 # setup destination buffer 113 200 552 2.8 0.2 PyObject_GetBuffer(dest, &dest_buffer, 114 PyBUF_ANY_CONTIGUOUS | PyBUF_WRITEABLE) 115 200 100 0.5 0.0 dest_ptr = dest_buffer.buf 116 200 84 0.4 0.0 nbytes = dest_buffer.len 117 118 # perform decompression 119 200 1856 9.3 0.8 if _get_use_threads(): 120 # allow blosc to use threads internally 121 200 235286 1176.4 98.0 with nogil: 122 ret = blosc_decompress(source_ptr, dest_ptr, nbytes) 123 else: 124 with nogil: 125 ret = blosc_decompress_ctx(source_ptr, dest_ptr, nbytes, 1) 126 127 # release buffers 128 200 754 3.8 0.3 if release_source_buffer: 129 200 326 1.6 0.1 PyBuffer_Release(&source_buffer) 130 200 165 0.8 0.1 PyBuffer_Release(&dest_buffer) 131 132 # handle errors 133 200 128 0.6 0.1 if ret <= 0: 134 raise RuntimeError('error during blosc decompression: %d' % ret) zarr-python-3.1.5/changes/000077500000000000000000000000001511007055700154265ustar00rootroot00000000000000zarr-python-3.1.5/changes/.gitignore000066400000000000000000000000141511007055700174110ustar00rootroot00000000000000!.gitignore zarr-python-3.1.5/changes/README.md000066400000000000000000000005751511007055700167140ustar00rootroot00000000000000Writing a changelog entry ------------------------- Please put a new file in this directory named `xxxx..md`, where - `xxxx` is the pull request number associated with this entry - `` is one of: - feature - bugfix - doc - removal - misc Inside the file, please write a short description of what you have changed, and how it impacts users of `zarr-python`. zarr-python-3.1.5/ci/000077500000000000000000000000001511007055700144115ustar00rootroot00000000000000zarr-python-3.1.5/ci/check_changelog_entries.py000066400000000000000000000033111511007055700215760ustar00rootroot00000000000000""" Check changelog entries have the correct filename structure. """ import sys from pathlib import Path VALID_CHANGELOG_TYPES = ["feature", "bugfix", "doc", "removal", "misc"] CHANGELOG_DIRECTORY = (Path(__file__).parent.parent / "changes").resolve() def is_int(s: str) -> bool: try: int(s) except ValueError: return False else: return True if __name__ == "__main__": print(f"Looking for changelog entries in {CHANGELOG_DIRECTORY}") entries = CHANGELOG_DIRECTORY.glob("*") entries = [e for e in entries if e.name not in [".gitignore", "README.md"]] print(f"Found {len(entries)} entries") print() bad_suffix = [e for e in entries if e.suffix != ".md"] bad_issue_no = [e for e in entries if not is_int(e.name.split(".")[0])] bad_type = [e for e in entries if e.name.split(".")[1] not in VALID_CHANGELOG_TYPES] if len(bad_suffix) or len(bad_issue_no) or len(bad_type): if len(bad_suffix): print("Changelog entries without .md suffix") print("-------------------------------------") print("\n".join([p.name for p in bad_suffix])) print() if len(bad_issue_no): print("Changelog entries without integer issue number") print("----------------------------------------------") print("\n".join([p.name for p in bad_issue_no])) print() if len(bad_type): print("Changelog entries without valid type") print("------------------------------------") print("\n".join([p.name for p in bad_type])) print(f"Valid types are: {VALID_CHANGELOG_TYPES}") print() sys.exit(1) sys.exit(0) zarr-python-3.1.5/codecov.yml000066400000000000000000000010161511007055700161610ustar00rootroot00000000000000coverage: status: patch: default: target: auto informational: true project: default: target: auto threshold: 0.1 codecov: notify: after_n_builds: 10 # Wait for all 10 reports before updating the status wait_for_ci: yes comment: layout: "diff, files" behavior: default require_changes: true # if true: only post the comment if coverage changes branches: # branch names that can post comment - "main" github_checks: annotations: false zarr-python-3.1.5/docs/000077500000000000000000000000001511007055700147465ustar00rootroot00000000000000zarr-python-3.1.5/docs/_static/000077500000000000000000000000001511007055700163745ustar00rootroot00000000000000zarr-python-3.1.5/docs/_static/logo1.png000066400000000000000000001416141511007055700201320ustar00rootroot00000000000000PNG  IHDR[~gAMA a cHRMz&u0`:pQ<bKGD pHYs+tIME ~IDATxwUչ߾wfl%[4jb {5Qaf`W5k F{G`Ez{^}ff; 0{v9oﯬ $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A $H A ga lL4y L{pfD(4dUw3AU8$ɃI7 ejkJ 9 ] *Ab@ՠwAڌ퀓Dž4/`o#Uc`ȌA׭!$HRӨTaf)Ik4j`1QL JAbO/m7 ŗe$2+k#~~  QXF _W\ qH?R}d䝜0b"ua Qv@cP+wSM$!=&^43HCPsr~kT?9 /]5":]o!1 k1寗 !p1p m)kztNc& d`$C!-ֲH$@zUw A Χ}xE@:,룭jB0tKa9Sc(FoLF!mT]6KW4%H\HN kՠAU";: GU7[qpr@y'Q!!Ev6G|5M9.Ћp"8x9l&nJꑜT6r)Gxc\v8cW*qڛxp|`bcpzl(hsB[{ow3x{lOtvO>9ÙqHRPu-#|tsh < !xv΢~u!fK0'3e)ISE%Lt&ѸNEb0L).Fx2QD*S^[ryc >g+B`u7ҳ S>PV5p)T/x /ƛLϰi`0r񉪋K 46"<uL˓Ǩ0s& 3HCԁ,ZkI}RԱ`"EΏ-Fp{&܊Xg^23 UyE8ZcGp{0<1 BTat\RzTߓ.UD.!P)&Py* {O!I"Eb,3& ȃr&CHK<^\Ro`#nfp#ASaP=ӧ(PQ\LAN;hPՋnC8  E!gR<8C^WO0a^쟠?cTOF_oN\q ݇ [:K$!2E,H"N"4E{710 T6S*TDjJ2C;4qsa>ǘqW4 c>.cؐ)4q".0<хL>=|E7&z I\68l !ޣ0h#` \[WN⽪su1q6*i=<r֔x8v0`H` KqH,&ÈbQ0tYླྀNy: 9նgԅ5xħ{} ģ51G#` %Pg\FO-btkS4Z  `ıPq}CB`HJ LyFqb40 BuQ8tGH:̮){|x_đ@:5!<Sg< 'qbZek(k݇.6] J)ظj?Lz87`2PK&cSR$Pѯ@rI8qp*KBj)%"$<~N8Jϱ _bN([v|8>W}"Ӂv5@b\(ln=8[H89Ip+0 Qd5OH#x ~";[cJyT^PN7&5 "1Qy~ L5!݀ÄNClEKrdx>n: )8R;g j,0e`l |\VA`ԠH&,N};=:j"0y^S8TRG`*N6ϘƬ.gnz.nl}q"_b"K,ac-<LQfU `Ya\~\4 xy{0,?Ӊ| ^|Cg0{YQzy8 xgp\tK>-4N3m1h6pn:O?!>n&%\sL;עa'`#;yx8`WD~qYQfc1oÆ >OLYj,1傁L:?I&ݏEݺg̥`Z׌#:#zȆ <<s1e92P$w*EfTFOtk2RSnd_}=":5 ,l<  a'1!ӵNukn< 0}'V[8o* \<23cyYnSʱWK߄? ϼz >`< tm(^Xڒ!d#.3 `BF/k=)u5Qqz[ } {40: q"mG Zmq&"Tįs}iFl ӌQvn@M׳ efv>гֵǓs;*Ͻpsw!99*|":ΜhA !W|R ˁ ;Tp:!UwF!cf{35 qMV3)aYs.Wшj@*n{Ay(up30{1@C|܉:.U7NTigaKjcVJ3r6/ 4=*g΢=nq<?w$ 畢("QYۤ7 ˊ}o`v- * RBqW?l`$0!40f~ omh.)*,B^F6n Y A嗁*-AmfH,Ifp'rs dψϣ✁,]x!@$aw  FkV"O#c)z= \/#1#:9kӀ_ L5Ǭ_c bE+(DG}G%?F^5ǿ6&&0sS謁T}K?i &#B\J{HQq@ cfnpڔxFhq;!}yO-/KQq@ov,΋t( Qv9ʯSyN3j rTE5wz=Jd;yKɒ1c$[c.A؁֞r;wqV>pXaa>KlaP$KWc":p{誔)䡪!EG(e0*ϿBs8"-G2|OST`T\&2+^U ܄9p6y)Sjy &49 Rv?q0{y%HO!1c 56  ȝv4ߒٞέQ20ALBC|G[I1$ƝA8||X^>!n{M78"?^ bcBb|q];sDL$-9v-4BXk |INb1#Gx[[gɗe̟^&i/cl87JS<}m)1͇My3)kh1xΕ B?:q wk@5b-YD鱋Kn4f܍NO'].0TF Ƀ B }#E0 oc%oSEZ߬C.ho!DKح]݆gvOb^fSd=k0~PySby'Lj8(pVzg|DR0Qxb4'[!taA7mlQpN*F։> ZRHSsn;P<pJ`ݭ$ky&X$cieOt޸8BXd&;_9E]<Ů˨8Ix܏3G:0kk#+!ģ.q["7mǽC)w85CXWf9ۈ[na kXz7A5`$́Iae"|:,]j ;;0WXDdG3 Tw_!vA`CۈZ9xÀsÂƱ4Nx7E/"[u Sh Z&ޫ:(2jǸgY`+H`2p<&7^b@=﫯ʕ4En_v;#^JL_"[y]h1T@}Us|ȝ1  Fb,$d2S1SKq*p$aeB mH; ` Cו/Â[ft@ l{Twoiګ #9ܜh%ژ#zX9.|d{?P^y{W̝{IH2HϦ' 춣 pڐkD/\kĽj~cb#+e">^sk{tC=Q 5SNci'֥.$ DbV&$,`z~!ӿmv7ieEST*3/!}q:-iGy;0FYCƅEOk9/x jjb҃}O5lX]{15i 8?ZΌb9/Z6oC%mӪ6rwiLQR:a>ai7xJ+e(Wx=44 p)Gݍ]4Eń{G}91urbp.:z`;5x:*>k)?jT ep 3 Q&8c~w5[fڔI_2zO\@Qy3-"p#YW[ނpes'&XHʄZxR''\Z&#Am1406D/ߍaFsO$3 K3KYy_SiLmxz*, lL16%Чu/0^-=}+,^‡G}KBF#dYI({-?k8Ռ˛n:W=?@QpRY 5`o/NڡLr: am-Kf(RL< ϛkNbzo$#1+M_1 dqy5^W##T6߻a:P.ߛW|Ja++#HБ@K8,w6U%u<yd:ց# H)K^lGb8Ȅz[(;QKAO<v0 c{Dt[_[B_.~?} p0bnF)j4W->2cJp>-$dI`#?+Y_Sxӌ)aޤWmޟi; }#lgB-c c-`GTVeXl+( GqHI4 3(c4{#K^D\^z}l&ke S٧evIj3vrl^ D76LZ0j"R)^G3prwpL1ǺZ3ՍSct%lZE ,H&PT_P +r{|M5) L'#bV!?s5&*h:g64f_1q AaH 7;f/6j*Z;~/^PϿA$QU8|*к.>2xVIC`쵌կa.!vcdz:wˊ *G1Aʘ= lۣVoDЇK[m4M?Wkt2uJ*zLmVhtCVq`:N/جڧN t6Ǔ=a~n^ Q`|Xp|eL}bo']-fDwcdȦx 4l'~>=R`Z`_"61B <Ⱂ0Wx,dpx셬T,t7QR[4G A'6м.]DgQFv̶T=:.C]\acYELY'N+h,z :̳OSuhĺҜNLǠӂL_sn6q.a"b˘UmqZ1Si/'.w2S /x'hP42pJ' gwBo6_p?8? 6#, 7M7S;/ά1iLC/גw|lmQ 2u?SFp vہ#iHl-TQU8L, ^gQ=T`mo!pE-">u1z.VF7w;Bn+-X{-paI[spa#iC b+Mn]/x[08<[ek)G3 \ 7 vOQ8:n%J(| `'Lv=%BvTBϑK=8 x9x~4xHFiBsR;,B$aem/|(O,`~QX>0׭׶!b o@.X o5BGal FawnE bJYEL8jZr)mC-Ć+b:eu?!F&*_h[~֫[wD!=G/vɲs/}oB@D`5dxo)y6b;?xL0x!y>M Cl>>jߙ9jHֿ {aj (rQ/Z\˻,(7w1$^VINYs+n1$\ԎyKlRd#j Ȕ66D(7@m.C[wDHE1)z>m  ¥ !XxC] En%p9<AYߋIg c` \ωSjn&EBxx8GUN|$YJAQ V 2]c˵@5ؖb٩U_%0`hPg*p5M劁1 bs#o's&bXJv?=(RL6oL FaT0 ZcaZtӔbRd3Yȅ.`;w&Kns96#oKm8),ʻ>{R*dLٯ -~g~d}^(32@xEe4b\fU}Klb6 x"c<0 A>MIrA!1+-0A<3N7t=A-pjR2|=b4ʹ*;&=0 U#L: #Hal@2}Q5\fUFrF !>7LdvÒ\h<,dcQ0`7 dz&絟K^X{<{CŤT#K]OǙB}^^w Rzʾ%(MAT\Gbl85mA$ae++Ȧ8˸c?aՖe{`NHQlhW|Tgg2ĝC>.m…x~pPw (gji**rc .~Y-L:#Q дX ,5 p)nN'd:7`<}&e ohL몚( O",m @P4\@DQ{ dmD#$6KCܷLa X,#SkPs]"G0F>-RTV sK3nE/H^%dl;%~eyBf/*fFlAtI^O>%rHP/VM|2b2vK(l6%QK]f/WAwnQ+#;MPo#@tpVojBB8Yvo g,⼁#{l"Ȣ0h^^j\&MEc^\[ H|FH; q$(sm(S{iK0aB`gMܽ?T Z74 @@\ jLsU-gZHnKpf" 65i=! 8)g$>LGGט!LsY^%XF$aeBo9&1>b )m &N=`KSxQa ez, ^SܡȾB E=6$O)?6hPM޵p3!3=9܎`TiQfHeRBoK%pB*quy= ٳ}nI'b\)E۬7ka]@עs:ԜJ| +"W1-CQ\Qg)`AVÂwQ8tq|D=㼃GL$a%Ba%dZfP-`٬Z(YoK| j=M:]|My"OTZ,tcR;]i6xq(b=";Дs4dR a[+$%|g籴.z:&6OZod-N `{=_Ln%p ^V|<aNRJA^˵g8L, ^xq~<0!ϒoV3d0#Fk]qлsRW]*"h:< ښvt0~tKQk >P$Vsn|q]EMa2y6zᖥvbpuoOʨ0_>2u~Fy{,$;IJX1'>p !`U|^jD!,:q8QMpF`L-#凐mGѻU XHq:b2D֠A;V-lRL=ipj>C~ڨ4 *Jr!*NVs Yyϵp+lֿqQ-2v~Ck[׎>xL q!6Z^HȘ%cyWHU0@틸kP9=̙]4BA\xx *$R DKƶ.hHj)R!." ' y/~ \?kd|j!J#9,QJ<mNϥgM.KQ 4>=ߍǓ4ER'fNȟ@ { 4!+זܹZ/#ВE!;6J;H8-(nI2O Y@Z3jп4j06aG^RǙ{ WJ[LT";!yc(@1)s V!2T#'w!}=s'&[V3lg>f?&&HmuB=؎%N>CY7vGZ^ź;73H2Ls!ě|v3ʥyT#U+lwcKS]" B1^Ԙ`"1+N%(ԉE5n+;Кgܺ wF0 Ӫ6ኢRm1 {|[܂ B}#dWA7L<x}{Cú_ukEX^ğ`#v|U F^)u-U0qӆ {oN3ɤ] y7ܢ(ۺMb}y#!o_BhN &gFn\;Wλy͌彘cb|n}! :a V2!@ӊ2 H?ʳ){"~>-v74V Y1Rsy筁qp8Tl]3G#n2J(P D -n4oD䑒Ǎ C%AR؝yy3X* {x./ ȧ^EN3MQ[V㰨oW` lc< Enyd#ogl64ʹ*/9A8EG'OWUvvgЎB-Sb}Eh{`HDp,54 LxJ#,gO ^I`_5x.3FB<8et~dKQB#Gwbt"(mZom/s}cvn%O9+ ?I[F6f`U"1+疆Paiʄv1V,Ƨ\{x\.WAmoz"-BA [e/KHuRn#anQ-Jy4=S_T zQi9uROiujV` .7T!v}*RQ1>0pO<aGR܇k1eYn+ oQh0 Ήi8X8аc/^RC)ߪB"cap'Sf)K-6,Fa ƝonB1~Gc5ew?L1 B/tc8B@5B˫?SYM RTM'R]~ֱ-JP({s~}Kq4pbsy*X!_#ἱw5jt&Bisc'y3ݮZ$GϏ/ΫCQ&fol؏%(A(΂'NPe lF. jɘ,7@ ;p0bM*;6O>[vy6 4y]0#_-W@D%`"1+j'Qo  vBJ ̨U^~&=N]_9⽏0N0xI!a BwM 3Q]ܼXjL廍c 6Ax(#4 wfVgUd1 xυ_DzMEvi&bTm\ qQ]^``R\v ʣbbQTyc]uU4XHD_2Siv~^||GX#a)-o#K<+yL!khBl 8x"-յ3\;qh>ǺB@ZbLi'z,bR)z&P9Ra@SR.pC,ݼޕٮFBb$;MCPux&R}yk?y23xPEB o?H`6{)ʷ(faֱw[|ZG~'o}޵:ogwE*s8;>6r3ȬτΥ0;˶JBbV&φTn{8t4ぇ0Sn+*@r9>E<*e}d:&V8LU` b[;}k[!WNwoah6""ef#$#6Öh"CpG܊8թul0p(P |"VZb'- {cP!6IsC^G`B8m]eb9O/Clb$a&9p2ٵ2/#>Հ~x C7u5B c|ND7/fNrSM_B; B ܊Wk39Xcp THNF>OC b1bdm{M2'Xypo3pax<#F AtBqvTؗaD@܍.m0Ύ9tb 4K[B<;#ن@W<7?/FGx#Ղe^ .s f0\+bg!v^4U,-  A)"aApOlܠm,0;@RWtkJ[{ڄת#\Pa120h/q19|:* s]&~~8vlqPqtN!^73U8\hSb3Z~0]pJ%qM["*W}oAx?ίNS>>gj vg`"1+ !pM _Q38/xkת˾ 'pKd:Zxr ioXR"uF OXȖw`z=6arӋ\8c/k-Ac^981x}`_<a@Lܠ?6bIXiG:7jq;x4tP~zofd]afI}Żx F1>x-GN5~1r(FID:?#"P*p aC`"1+CPB𛯳]^ܳc#{ov-ֵѺvdq|8O(|mXQ4On0[jM:py(lh6PE ƕ\(~AMp^ lgxW*"aؚxbӳA?|]_ <׊êDbV&BuWt˟?NX\vIhK7cZ 8:v!mɊ r"Ǹ8ucY2 adz~tG]ֻ8796ה)F> vL c(4<ce+1ֿt|*oiZ٭lS]ЎY1?-B+'[+,޽ʆ6=940 K7V8{:Hi1,`OBbđmoׇ2Fw*2 1+zZop܉w`[d:K_10jvZa첵c [ :Tϖƍ='>⛱e{R ßJc A®g@eQ;7.ư L-o,^sҾW\63׻ʋ/Xqy3Y_Fu6A1gb!!v\u-2q-Tl$P+̰ą8{+7/d}0-D?$[ie"S[,ăAx5܃R^6 5r,!Fw] -4A@9}8_q) D> hsx*u`Ţad!;w6M*gQq{ ̤{-uךe /Ϣhb(+* H0'b.b$OP]x5z-BC{nDv{'C| Wx"aU)8sX^ ބ !oӴ܈^:6aHsm$XEHD[z0P9};_a\'xx@_3ΤhPi{;.죑xV.85N/k&f4\ggSE?F=+gn2#3ʂa_TJF+B`B-=5ADSE>SKB{04A{\k5&Ex>cS\K'* J˛t,7L^1b BlDpeuڍ+j$aeC^xJ> wz?ٶT};!^08 1FZv!M+)yu4s6we"_3g;iB eI׾|`⹐PKq9TR(|e+[0; $YGil 556@͒YZB`{"N'ɒ 4#S,S82{G$H$/~I @~p cScif͵AԼ\涎C ?Rg jL>?kFOy 1+j=]e3T@!p3AH]m )|i0nE 4SQ (|XyOL8I/S_#~"8^5os$mq Pj8C,$JlEvi'㍅1zO B2]M g'`̍ Qn?>|F {g)R9py6xGZᘰ^)zM B9׽_j0w;.[!ŠCRyM\4姾x*Z@+hڐ̯9 2A/,ػ救<)3WQ`&SB*zȕMh_❻8. 5致gxrU51׈?sx|R_fLvaB3BbV&Z^ Fw5ΥFje K#, xn"{b3 BB c_R_%`{bHl`Ʌ~ڧ} >:0ĺK9zMi̝4q~d"q*r`&Ns_ Խ?rF<6xcm g w^Y?Z|L/ڛЭ?d}2EB"^la.GE5hהz]ȚK-tB1ٵ ,f'{qXh+%Pfثz"t%dNWx0vwAx-RvA=Q`gk"ޥ zՖ֋Dr hMnZz1So{u9 ЁgďD3`~'ɹaW![h&_)fUޞ"J,%; J47 {xqUk>u0fa<8?_=_8)P=5kV3'|G}U>%p8bH !1#( lLS(R@ -p,yLmƴ6- V8aYPBXlz-q̑ HpJp7R-kC$g"R+0k{B; AeTq#ufehMMnXA |D G+ p|1s_J:م)<lfɆ#D"{؞>8^K}7kͅ@K䙈 e*fw#G"nr n>ڧ>o(b1A9Úo/aRH)ˈVvֽ ' q@~sAW|ץ3ͼ gV g,n3󎓫.33PRh *~EERE+4`)HZCSDDAH $dC%ےl9sfɦl5MB3gf}nGb@wġNa3щ/@#6>u4>d+ۉԩ^~< B'>K8JCZf2|TT1b JP-f}›d<߼e'sLb#?o@D-lK&*&yXܢؿ}cɈ)HSb_ 24V81O ܭ:<8N) c{AVט7&74V"2q~c*KPє=s\.M1uW7f}px)(s>պ#0xbrb2K%ǮΡ%^GD`NS5@/ġٗ Yܯh\7Nadk y]Dl49JvHTMᇱ%zU"ج&'Nى8^ANWkʻ 1J[D5 ZEThѵװdȪh*A> z&p)orFz_mSyp)͗ 2XsՄ|ă^f2wM&!b#d]01<zUfvmmY@,{;Mn{Z"GYA8"뀙8]g..4Hy9; qʞW`q6Wh]Zw,ږ^"/QlT\n;Փկ L>y^3Ox8M] }@ zӋ˹Iљ; Td'&/晊iypǃ< ڞg'(jm=^,BwxdѲ']ӜE.[ZDbOk"{$:QlZl6ߤf;LͱH#f9>0k"e&sQQ]Y] s׹޹A>;4pYmj{?Jj*'SJ<޷ xTbMG8xa{ڛn{K}qA&Xu˯ʛ:qx`= ؓ/yFЃ š -Vt^BF> zt SEu9ijYɋjZ1LAY۱:)7U;Xu$]cV` wj @oJEh]E>e?jOslu92Pث5=Jov ;hEPNp{ |׀YB~&fc9 ^; jr@ϠudrҟZOŊfs|[E5A:>-qAomqA#; ਰFΜ}I?O՞؊6]pZ'pCE/$8q=UNɈ@BDػG;z`P~!h@a9#Q5jIv2 ʞu?oǦ,VƼr9 !D=/EQyMT|8 m"2([o`G{bRGGTiB@%1gd=vGL|0TTg(~w/ʧ'1(7tU@Hoi)kI$1~}NDaPj$ާtENUfh0-Uťٕ ?Igo|8GNWe(y]ky9Oݠv"h:U'A |INпa"q,eP: اMbQ6"UY'o"50lU-@ Ѓ=Ewb_*'ۓ?Oc֗_عE*uMygAOrk4E6T{uo%yhoR&I#A. ^圕y^ 2\RT%0a}yjMf`d>qC$ EQ<"[@uį Zh1 ~M;i U'yQkm"Ź,]۝JAH<4p(%V *Y( ]>0:~ qBTp@BgbA&${ = i% BJIDAT'5RO?76`mMr[:O'adnV` Nb; EJ![fo6j/P[J/-2hPЎUSQWp],^~;p8š-t.u'ڼr qMuT> 4µ}T%ay` ;ྜ^W]$ prݫ<=۫qf/~av￸d<rS}d/.-}zpT5>[޾EBG-*V5Yzпo(P8ɈdmžEQ-oPChQ Lj:F6r{oGR[ yvZIL*(܀.Z{S`V.R^iuaêx'cQU5]Nc-Q=-MrX՘3 ,ġ Ѓ_RPcFag0֜ζ27&湤PBZ{*_ B"f 7(ށh1vgZD5w#j0-OLVܿy IJgWy6r3g'02 oNvn-m9pez RI,MQjns8ܧ*"}ZK;_B%rf{#|ߍ^fqI|}B=fQFW ЧlW# pD'b~AI<*rw7%" ~NJ<ny'g\ݶ-#8+|/p5g5X6,Z٪:Ǎ(3ii̴oow)P4w >7>Eu{23vU'R禴˃[hn-ge.et0;X[(H5D~O\i'w^oo|Yα_qו K:XU7Z;vc7?0>W@٠%Pr8(oG^@tʯA!= c[oߦD^90GEAǩ7 w ܁7@p$x{n"%&/+. Kחˈ% ^U?y?Dh&(y7Í? Z CA֡/sO>CCe~0pA,[AtDpe^lk^/ M_)A V) 7{ME~"*0{=dN g}iS8m>"rZ[X F{ M`M*f~i8 [ڐk@Qd0wy{jAnt0NB3)D1x޵۶nR#fPoan~v^ձL2Ix+7 Xo͂^8pn/?c^uX˩5T;XYo=CZt8Z[z)6"Qa+X?) wH:ǾuWT0HD2!DUT *rWhFdp yu m!@8!B3S۱@CD8$|fUvߑo]80+ThA$M|Stb,b7y P 'c]B-2kS[QP`K1Um~~~,[?*S\g#9LDT|?>6A7n/"E7jێ߹7rp6;!R@zx氼ŵI]o& MDPx D&ϝiހQox^Eϯ@ȭrD 7",A8]ac:^>xPVAz5͝ V4/Y8g¸z {f~g,1~Ғ ݿ)LQj MXq/g6 æ 8 r6ҸV,[/фq W HԛHm c~ 8T3pܟZh|e50S>l%+X xi:O\ӛUE  gzך3h>C<U# `\])a_Q`oh[_Cε!QA?T6ϸ Ce"Ҫh#>RK\/S).e1*ܱǬ,UY"~udNߠ !  N`6X^<,dVqg~ۡuZܠ>Q~>EfyabŻ3,'=|8'%>yJ]l|h웖NGRA"9IU r=ss,'6z =f ~}Z io_-7!_F9$p+ȟ}[PegnKvHUi O\Dyor1..! E4bUd?/75]d$E' K.ն7-;0IZ\3MQo}X~T5& Wt\Jpӂ\1B]ntp3 po_я+.V1׸q7k7|*K9)ޖ,Ĭߡ%<0ُgNDȄ=Dġ)~aIכ5'¥_t ѯ%o5M]:!H{GnBSK8}`w,֣*Xhh9$(lHNG~h"~i4l,QZGDs-N i j;ҙjMrjN(S0DvcQxP! s#37IU1qix"}cbieIKUSirW \r=yUnCX@mïj^$Cϓ[zAw`Y1Zq[*09! X]WUA  xqGU2": 89ʪB'薌Zyr р Yp`M5k b;Vu>!Vʋcld'ĵk9Gb&u1E|" }mUxOe[36"RT$4+T*Q-$;i2V\Cv¼ *~1BY 1&'c9@/ġ';TAE6p:;~K4qoYa44D琺[z 8q $Prb9 ʻEfnP*2]|'爵_9ׂwaLE{X7*lF_z?UM&<2+Tڭ/i3<g{c^/3A\5̫<#Ng^n P@^CXiExǎL$]>&"SfA+ՓE0n>ő 'u݌wNE@ Xp㓘m*9XGCKe%x`lXbؿ]iiڔ`1F{AQ߆|iZpDۚr&) đgUC9l讫N؛J  m+.pz!/zi5,_< ѦpOhns+T9?qM8͝c& 'zertupWrPۀTFxsv)mS^$C҉f߁.U\c;V `:y~ivQ\jbUZ %)(1wv5a9Ǚj3DѤ6I2UcF_82fS͈l+y_Wʭ&'\DYlU(yItZ]8 cOD AuHG -2r՟׽}[F7*MCEդG/8u25QVAzDA%wp@m Gn%B"rСD4=: XR ,\tġ'~>pE| 0Zj\'isT;_Jz_;Mݳ(IM>DʿVq-,Z_JY`(Ǫp&p6:W"*o ,2Q(^."WjNBSc /ǩ̀('DġX́FDGʷ/:k.x_%Q$։ؐIk-Е3>?\V*t.9CP-moDq>û{sj p"(,w"W&JF+I"!WHŒ'ߏlz =V!#P W(CX7Zx6`$9GDiU|yxI,XuN;웭M=  П7N^(xU^QK4{GHHp4)2eմIq=f$yHPҽKG^N~~giu~`BV $*^7kvDh[$Ip3<$xl 9R%P?| 8Փs7(tx@g Ѓh!΀Qh})w`ڃy.@N P(TX4EU)$yy(_W7Z^sd{Q @2˱ă+-N}tc"H1+y|UO'|]Z a] X]Vnzg[gsڒ g(qq}>h:qY% Z榳e?̓L9T?|Ua%pWɻ,%8 墳+r/u {"6e @=_ma;C=Jdın>_Om%v BN`7,\w:6MJahO gNnɻB'f-#|zr*O0 m{ (1;gsQ| pe˥ЊeJ:A`pN<.q$!t9]s1p#]!|cM7YO'0BD&͘Ź}BM (@.q~.B937XUDbgO H3PS|[լu1g"Œe fѭڰS)NkfFv6 Qq<ഭl"Gb]@(*mUTw#PG+~BnGUDAAAz)(Wm#Ao\m)Pq#8F;ӵUz&;W;~( &D{`kNA`?-gN8J.w nNA%B?$C?\b1Qjep^k|d㇬ \ۧ5J򠎺8/qt\:HEAڅ;!([8SMéC2X'WY8ciN;Y| r W5b A?A9TMkGEՌUܯpK좗 ꋾFDIAս}>ʓ .DAۉb]Q0q <]yEld! D"TpXCB md8h* >hs!xP}40XŃ=ES1[bomҤ~'sPV(@6#mB,A( IE$I<;t;ǀ]Cv50Ww0|%5l;(xq؁x"w>m*!ΡXu)!HUh j$"R(w `_(Q D|* .,7$pKhqgsNBOA?\5G>#E* zQAvhhnRb;C}I"(&CWW:ji);|EQqy,7L ;98Ys "nwBA8G}xچ)ц;fiZqh(kqj\챥 Oy/׫3ؼ!Dָ#ޞۗHy<[My 4W;QȚ\{Qȯ~8$нql^l:XP0I<RGT'MjVmdDqf-M!If"hjJ7E NPMS?\WۗCx цL\Ql -rg3<Zb/$lBBU/&?D[y<[M>oI,l8c$a Ai~EkuDuSb*z"I`1p 9t"޾j\;Qvng#/K38 'rBB^2Q#(JL@yl}1#e8m&!IDqsR_WK.WUGωO n#Cx4Wx&y'%@p!քV9nY]LT)ĶSt } (z n\5"#-P4z? ܤ#q#_m()8OG#Cx${;\ *3d [P7zC'l(!Q8A=Σ q) 8[G֝C/}nvbˡ8du6iyQpПp;&AUj3sCt'Afܮ+!A 4ת.y mPtIMzN!o #{1v)Hmj$;(A;,d/Ft$@ @ @ @ @ @ @ @ @ @ @ @ @ @ fHo'ˆD!@mpؽ޾@ E޾@>x A{B@ =HzzΝ/&t ]C (@e 00p}yОD!@`!q@8@A@ Ё @ @@ t C :!q@8t/v@peb8B_5%>nm { vQT ]8Q&c\hp A@:c(|.ϟT`LUj@ Y(+'[}K|׀o=_z (!!=@"  :¤aAD+ @&z@+O ׷{(DJDc&(躾.dYCmꋟ1} A5'^yFn6pF7n2VFw]Y6qlEV@)c!p`K co_kZXhdhJOMnPTzlAl;ou/ˎGȪ#"<~`W`l(#ܹeV[Go!&7NTH2UDtDzFC2l#AK8[>%K]0x[]Z5BF!FypW`,3eCA8dXɭNJZIEA;鍝MN0B@bׁex'R,!p0ša=?۫k"^U/vGń}'UQa q ,$?gعħy "akJZe'zTD``7b+ sWS ,izcOG 5(B5^V׌p:m`{ʁ#`60q_(E98 8([g[ܤJPؓ`|c/M<X3 7] \#aQjKgϏ(?8؇ -g#`&sZ-TM`tKv߅=o/+h1$#J(ӰSמ>otaIn>﬊U:jؾahMM](Sj`n_]i_P Yr1p%ӡC8/J~H&F +فe'"*Dr+p })qg,3S .Eց|K47!.Faq Ϙ=#GcOJ|d`%(!i(VΈ󎓁G71%*d׵ "1gBA,Θ}iG%C rv-jsjƛ$K~%:~N>_p ~o_. CBzRvd8SK$j))W:ԕ.J]$q5d݁F^?g_2J_8 ,E8C VP0)5ǧ&^YJ⭁@c=!&rfjr(ڲ_/fa?'qgc!P:VW1vc%o׊b{t}F8;EWgY(p xJs޾@~IH%*zJW:d,& ,#VFUuUnmݍ9/cU`YUi0/1R (8+\)YTRrWO Q 0fuo೪ kEVCzfl˙MX ',-ð*[ \ۥXGws*c҇$M#GUA!Q"NۍV&592,Bc1=ŒXOiy(nq[?Uzml_} }WxD<:( pB2ny-Km蚏P{fTec[q BPPU^PHl'Ҳ j }e٩$@X/@ħY Ld##VލF(,FUSQ WƞU@ S(˭<=גUP|N>J+bKͮXtME^Zt+[{OSPV=< ŒQ맼i2 T ;{ϋ^i$3-o21Yl 5؜yL 2\*,ދ>I>ʲx@PePj%pL7W uZ * f5c*%>fvoEx?` WZ+ʟ-ki0qR;E9#خi  ռD  QyKX_p"4-a#2LD[g Y-FZ`KiU Լt=O^yahϘWrYmEBQ"^TY6j&Tw`8MzZvh82Nb% 6;J|:\TFa +6bw b#1*,<l/-s.ײ=,uٳ.*U8˲Q֙[?@#2[M$A}bPr:Ox/V0&PYbf]o_v ;m5P$C,;Cb R8`2ʲ2Cۼ,;Q+ B'`Пgo^/Ç˼}j 0T ^:c1163rBB<+N9S@AKXi達1mb_Y 09lAzeU_KCJ| Ĝ Fw Ksh1o-o|@Xw %?B =ҜD|_R,GMaXkKk0f8kUIi>zTMAQu_~9jOz3i{p΃CdcWirס@,CAC#xTC)}bu%0Yх+I_ݘeWsg>ﲭĵ_~UCJHMͺa7,]&"7=ʆǪ^= DO{r[hJ~v\}>4d9z H!r:Bn8ͯA0P B?\B C3Vp?>/+('o$V忀4*¨ھUj}sÆ@ K@ħp~P2o9ӹ KU4 5ى2֚h*XqMĹ~:3؈ Ad8H/A~ۀ^:65Q'%8k݀<ഀzQ rl4i?blsՉ_f>K8:xe?=All5"ۨ@'JtMD_OTSXص?i#M\͋Uꆗu} neU/a!w~F5Q7H"EE2 Oq'{8z CKp0HAV냈wyd97us 8B>򵑗)83u&9:&v܉u.5{z_ 'vF#]}=a-j1wB@"y5H_ au,'4u9,@0=+h Gmu Ч%Vg+$rM1y+VM!C]we`=Auoǀ4JnװXC]T'CGi#PAxǤszF,ua)ĈĭIi C^ =ġkt^{8iDq Slx)`SJM݋Á#D$tK}(t鯳xJ1 Q5 k%6 ׋&hg\{?ܓmIҵR )yMD8lf:Tնlqٖ]w%˪&~Unek{#*+T֦rʇ41z5@2 {b` tnM?xΕAKKy=Q+= 6a)EwF{Mn40vmS?@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ .[cȫL%tEXtdate:create2022-05-20T13:22:18+00:00Kc%tEXtdate:modify2022-05-20T13:22:18+00:00:>tEXtSoftwarewww.inkscape.org<IENDB`zarr-python-3.1.5/docs/_static/logo_bw.png000066400000000000000000001302301511007055700205310ustar00rootroot00000000000000PNG  IHDR;' pHYsodtEXtSoftwarewww.inkscape.org< IDATxweſ'*z/JE$XAPDP,"EcB'$T@swݛ[vw&a3;3LfO;Mm| i`S} p x #:~O\I@G>"lO}0&!3@҂fk-mhA$_9|\VOg;$D/V}8bʬےfOFO!\F?pgRFDaڡfm-mLhA=%\ '`U%m/nJNnCkafFDd~XAZgI Mmt IHpq&َdugr1{Jr "aljTN :5†>`;*02j;Hon* e{퇳6Npm' a n "h|mVCl-=cI>&96@4@((؜ WCIH&@5l?j{o"Ev Q5N~~ĹDk!=^3*ʮ6ʇ6 %]O [mOg֖t%Q}Av2;W$%%V"%Q{y=*Kfj-mm"#ͪ$|emDEp'HZ]߁ZnU ؾ gˀ%)ٽL'b oo!ۻ"@[ʹҖ6ʅ?֐4wh{vw%$b½;U*ؾvD@e"PbV:f2 83Q0hq@{D>eHn+(Ǡz? m_ߑ_J?Hپ6DK*%sA;rj>&V3O2ydžO؟HZS?Aw( 1nD CI%͞a?Dr$/i1= 4z=牂zPjm"zpJ9QEڶi; `" lntl?b{wo?ptw"42+ߵճr۷؜eETM3$ S/ؾ$G(AO$*r? կ/}H׏/IkBfD?{%}Նھd_9&m"A#]2T]?ARC ukcny J "69)#m"!ipJ o0gٞ i>I?&V'ja*y,~Ib:v[;+$ݚ ]}uG`:%]@^˒Z"`yg`I)'j$yo=IPҠt̵D#]@Ds"p /k$),mm" izY/CϘWTϰ$ 'V+'hc~MJTz?.bI;p*bg &TؖxOP^M KT<t0տhNopgyG%#iDWph.o V++$N;~&#|rVvtLt2MaDLpe6;F؏StES϶oYVi/b;p$mJ[=DPRHZJ9 @~JO耤Y$} s% |pyu o 2:Hŝ$4I ?Ib[mD^B- Fm@JT\+b`:rֿKo }ۯf;$C+)Ⱦ}SmN̾ W1"sm+>̹S#B[moDc}"%]/iVƴ&@b$*fWZgyߵRcV%ϲ}4%N_y8C$R;X81<.\I-%+QDTUA+mi>DP0rյ2W}nyIRbFU"N?+N+UO=Is~=O'dAV3Ոt笪*IҖ6Mv΢ !f f߰ tمh\7hi Y]7t 04/iY\Dp`%}w{,Dq&I:"rޱ}*q?!fAVgcۗ+ɃehA!iy>&WOf;$KQy=B&nm?퐴BfOw8-N9H!NeH*[ 矑SCp:jپ4.:E4 PY|&Բֵ}IHP-Ⱦ*nAu3q5?afrE~Y"|BY*ꨡ~IIJ*oG*})ؖO Dd(hv*Pwkjl͒6bfYf6{%%_TTz !OA%imD<.o"S(< 54ۯ8<*n &qDѽFJZ@[>hA 3N Oy\g{wۏdJP("fQ[logCIJ xhΘ,NE' }๞aOT֕ڣHGnKR"XI^X|xc$]*ilTM Fr  2 CII1nS6}[%ܓ&Ni$A[?7ڎО Bc%Yۇ߉zBB)7D 췉>V%JYhD (:l~ SkMmoo{tB=_̸Z@G Wтm*%%i۴ k 7\PC |pYΫm#^E:jHBkc&~B) άJ@eFD)^%T" M !.!(t"eKJ5諔 5tY!oßTu/Wo&{]^:"[.HL"X{I[iˌ6Gi?Jv9we*$,/ 'S>TN%~PJ@'!I>w:A#Jӻ7!ZRMg+K=sKWeu7qDr6;=̃B`8i:`=W#`SSvrhAp.!(#ђsmܷ%P5`:۟%rnǁnJDo%-J[g`P&1%1+#:e9~$&*yW};gY)G#. f$"iṱ7'Խ>tߊ|!;hSNJ#D $-H(OYҢt/-%rć>Q{q"B(C])TJ^7"gsٴHC]fqؾM[ vj*}o-DPI`s'$zìl?(WzxH\9,nN)ˣOaT%z.e90%eFHZ" "Chs._}:9꩒pE~S Y{nM~Wbu71* _}4B8f{Jfhfx"HѾ$\e†i|OFPzMbt<#neZuM&+TvqH좟P?]U1^F-!HE[Typ K"ݭ,%|@K ZLGei{gCxa.xT?R8%."bGS$+i4>3qJRDgÈ(eJex 8P҅` 3?N$mlPQɹ;AP:28G_ee|uv7&k]s~o)7DJw9s&ί׼8?~؏-DBND3E\ߧ{%Dc[1r+܀H F L,IS`Ԓu쾩A\~k4|D aʄAD /%GwbaG;P"ȝw!6uoRA-k5r]X!  M:"yJ6L%s(V x A5g3޹sOlݍ&1+wMO={I:G/4r_9KYryTk4r? {O:>"3/SYU/֋7r[d[Mt#PZd-ˀmSI;/gaBd-~,!iI)δ^?ƶwtgq%U( ?ۇI $!,m<{r:WRpG1Ht>BDږm_'ދ&+OL\h#>!ҫ>l$ \IyVm_6` ؾ3PjFk;X< ih"L~%:"s%[5Y~&GDҟȉKHc}oJ]_''EދU 7F%(HFҹEQBkI[H h[f\Omqu$]In`;ۛپ%PJ.%{ucI:D[\m\8{?LRJiU؅8#W9l]VW&4A%j5?$7D"<{K*~FHRҭ(`âJ`{}(icI!^M gWt9ک0z/CED ɾ}fu pOCtZ, jwf+R},WsM OHru4yG,%X,Nؤ;DNe Dζ>BvJqY~%IQnmqDKZFA(qf!tn8{};fBZ!T\߈Ⱦa´+˄2fDCgNn#'%f%𴤣LkD!"ZÍ&}O$]AUu=h`9z iDcLR fu(Akؾ̎tIK@p3>K<!\JX{.Ljb1$$͞Kt(n/iaIlߢY 8ʹ}+s7)`AW+Ժzί*cVLdD{&E%]Md{l_-2 >#-JA,nx=O$RJL ?hZ,nNIJ*itL^Kθ"~vI)ؾǡE|lWK!ym'lo)j=Jk $B^JURv,†:C;usI-" Db\L8Z/?S .$ t*TcnF[nVIrt1/?$zť&Ky#b<[%m^uDZZ] }ǒ]㊲eIJ2ɝ~'eUA@#'왻FZLda"PV;#[LdW{Zr_N IYG[#iBd:h"xh-Хg `{h85pAlе㬈:c5uL w ծHj=1pHزi8T=磓}˥:fC(Lvԋk4r2&\BhkHʒ.#L9S*/q%I3)>[cb0Ii qm_DpE |䟳klGZ%mYdI7B{~/͟*2UB.gyNI!{|^]={ A)wWqL(x8h=. KWZ 8/$-JjWqI!<9ϓl:"go]yk%_ #ϑ=i$ T4K Ot=N1ˊwslCʶ~:ܟ>W%~ؙP޳K*]v#N퓈GkRtL̾7&ۑeq{ *y({+{$"b#Enۛ1C}]!jW4&JS"f%ճY%}hA0.+'V)t=?FgCl`{ tD+m !(#IG*T;O"N DrJDs~$iUΑ6ȗ/r!TV9%2V9;꟱}-NI#"4HBuf3K2[IXYk}G@$DR@*)>'>O6t RN lL\OΚuaDjLz8I~? Srg5өr~Ba:۟#3)t4zv-(:SOu?Um i&I_"rEClf0Nˈ[0 %m-p|aM17PQ:D,_}<1PJJG[#cG ߓ4WZpH[!2&K:9V9W}} DȆڙ00m7DxbWvShy'$>Db++>" H :f5[oZhT6= z晴hڞh7}',>? +&1XpLmI~|'Jsg5 <漶fq0"+y6IwJƮDOHE`ݹyieOo_~'}@WD]'JodKysߚ56Zg!8Թ˛`3zB&C3N`ihV"?&ɇP"ȝwM\תڿ Y`>^D;`"po Rq{NuN5103dh/"ȝgh6TM8iM W\N'\RELxֶuV&*Eonp31{9q6!Te}Cۏ;J+8L~3=:Thzn3$m,?RXOT6 b87cx*]ew+AdRyλH9y%*LyDϨ$( ˕yqys\Ūp:4J-+܊cV:a74Օ$*1CCv%^Jc SWzHPBQ Yn];"M4.YP^ }9R{pZ"2ȶ=I+4 Rs&n]Lص59X0& qZxX`7,DpE'`00pTt]Ev'n&B܊ n~9T q*D68b%Qpc( Rg̻(6L+ad-DH-s/KhI;lHrnݒ=LqʥaP @{^L/Yhd!wEm>I?&ϣ e"|PZG?i(bvZ/QU#(:v71`ygvG5bBL8lEbQķ}Ʊ"=+Dͫ%DйfHHMUԂl?nv3i= Nq y@7=,L;&v-!r_&dų+ON :jM=r.^",?1X ݥhx>O_爕ڜ=vB't|&1AP,Nm9{4p^/128QO"s,O#]> gMwVMlε7]L'^%bKk7s bD!{ N3`f[ N3E`:캿h6Nvď0krߝ*HN&~'RKQh԰gX7Ib;[zY~DPu?q]]++,Wڅ{mH:^}^È`<1뙩EFg[3f|105A}Mgs YDP1nԵFsGtAdX>%zve.@ -vcܱ?=ǵFUe:fӯQ)MB'8f#edb%17L!zg\ubQG[#ۆ\4h~k$NU͆\DDo4Ca p"ȝkA |؟!QǞ@ gc{Ufn TV9 'ܹf%T͒/t̿DPtnLO lߋܗ` Um_`#I+zМԇs׃ eGPM 6l!n)s*kW_Q_},`."qay2^ j>ù4VAl%V;9̒ftBcۗ_%R鉖y(s = ¾+"Gv_xr:ۛ^dA瞠7DQ9WRthZVO>q% 'f?iel!yQ  > C:^s" /0ۥ?kFS`I$_z$a,TI :d@dJ+JXRf&'[IK%B;^IT(/uTմ!tI&=9".olWwt,A'K߲-b2I߂qC /)H^xך=Ol/ ʊw኶ t.}$>]B\GdVђ" `NIG+B DI?V`{Qkq _km7UgIZQ@x)Qu.|PHIe{'Idժi(FLntvwz>W~*l_:kBY~Mmb\fI߭oBY~M Sg[DTM>T;T(3|L ٶoT(3 a Sz/Qʍp '#M69aBCTbf<jԴA:_]a{q4J>;[8+w7Mss2NdqQzu]Q.KdAS?U)5@?u9Y"d QP]Ki-M@ɛteљޢB1^G#yrˆ wשLa2B7A9k<%sQDl IOV$Ot!\{x"#D;ih"EBT҃kvJkfp?,P]}D.yt&7FhOqy#e%[BƎZ J@c]1 {FJ}tB wluMt& WFjYQ!oP ʔ^ FD1yp][{z"f^9rT/QJ"CZHxM" "}gc#jU1JB`UޔsTִ 2?Ϫ +?nk%Gq{:z&jCOg[Dpף~nzw[XUOde:_`bN`/Oo z"'>/rͷy1Ι۟/T=矦fz?ـê_|%[_VAe5ZI\@=Wo$ eMKwUm bUѝy@ Ltܶ͏ "ȝcUBѩp4D;Dzw̞?o{3T=_7]>ӥEfoV5:& zkᏤmEQĤHVV 8b `abV]-x<F w]Od0y9=~WY"ȝkyCGD5~oQ]Jo"ȝkq"e!uκ5jk|BH>,"YNg\5qC7:$T1.:} `Qn`~^'O"ȝs}B*SJ7w.1~ȕkιדI~pyA>\aD;T@~d&uM4nΏxE "ةqzm5'J5[?Z-,I 0:BBk8Ν|jy>\D;jD|u~gM!{xA܇Cz mdê*2@snc{ wϫNmE969T ө}IKX-[ݗ́ԖHJxgJߦ5jsw4 7d!J'>p5BO5G~,7,imI"~!=.=PNy[v>ˁ+3Ǹ!١,$i sّ _eNIKH:Xz,[e%w_.6t qw_"N$eR[tm&P"᭪G( =D5&,#Fc$!IqoTWY<$ T m2TfϷe*4\Bvܳ燉fP8Uz =O$3VWW9tf" 2ysE3 /$af0 "$":[!&bf}-0YDP tlQ ;pG>*i?Ie2L< ih"'m:(PIݖq91fDǿy qZxN(z6N !A\!y!8 IDAT `DEVt1ޗ?loݠ0"0 . o"c1+cms{آ쭅1ߙ(+rKnj=Qk<熢D+Ξt 1{>(R)+'f$?{87 $ϿI9:-_4S."fx \H}C4^@}@t %DBI1#[UnVO娵ؚHIoM ~l!cdBY_1F/[XU0 B{BY_s|{I,JFmD3S2E8 Y*_o5Gޑ#eF bJS sܫJ^5sݺ e}ݺO7zof֙a6B# aof;(?O/h6LJ )]+k؝DDO=#VKec#OKI*]FÄi좣UTTmA~BZ7TB`53b?w+y?o@5.Oa6rhdT?3=ښA)isfk|@F#_k{pIQi\Wk{H\({y?y"x&}}z FʈX]^ :DE8Sh$UY9YzRtQo"x`ߺM(^~Z[)iK9맵M~MNj&,K6üt/iր؃Ӧt6X(x_] /&Hy\hT%]gJ>&ӆM,N*Mf?gI! {zaзs2xw#xxʢ);#ʻW#x2gd=}A4l27f@ۓ>&:~nPY7ǁTCJHRM|d2m -5p+in>I4R$~pt@%$i~G|D|йh8һ lIK8RkB FKR_[>)R~Zov{@'y'"TxLܾ<j˰&XH/X i*Yq^:E;yߟ2(v :k*U6:STabB,%F;'ǷI)\"xG;qsz-zswc4`*) z"e" j_0@TKgD0 eDK Bx2#!bpz a~&0"4%I}rNFgEm`"s/o,t!>مXd(+ؚh"ڳӾi Dp `U"]`7 9,A)t#UILt'r$\,SeR"ڲϚ??p21~p,J | NԎRks>AaSB7(C Y8Dp7}{XwsLEsL7<;.L2s?/*v=,&N=Jϋӿ@$7L/꼫Sk*g #˃ҾB|$W9ྡFu $D+KD:5sm_j" آ=FD{+n_ϋ:+OGׄ,0g6^%ēzyaU.'ܗYm`bJ‰ Ust],JGɈs0ߤVpK$ZIԸu込'[+KӞ=Okk%lFR{g$Q4DW Ù{q/˧OyA#.IzLuƔ/T4D;ZTzFZ,IrGS!.>AsSSAyl"K]ql"حt[F5aIyjBwZ/o36"@op(U}ʶٞDYqԮ 5Ν2lWHnݏ{7ΕuYc6o"ȝ+Br{AşרYDpU[+}=ښE}˳,pgϽݚI 2&Y5m I+{q/u>F?ݪ:6zAg"]cGt. t~(Џ5r׼Z@%*}w^z #:y< $L`:7?{5R{]^h"k칯[3` UAt;uW4dySIcmѽETdךsJW?_7dY*j:% ?Շk5rޜ jW.M^"ȝ{з^CCYeJ+h-HW2aL]H'u*I Ve mI`(Z`U@'Ś/ΡT$q>l= $]At_ݝRwMzYl^%/9&lY6s/wƃv)-TzL 2ۤ?$&ƏJb{סia18c gSi$=PzlKYg>_y=sDG@Ҽk2yJ)a0{R^D*qAlV HXҵ|(1F?q?.C""Ë;HF|#%]5LstmK̾ Hڱ$"i3=Ah <"iIKaDB\s}RB~|;۟LQ#h&nmON򜓉xh"C"Ck 9%};+"h]q;j" M-$HvGٞ؏kOW]߯;Ɂ$Mjo9E!p;w ,pK"OůIڭUN _-n8Q"]VdXC칖Z=pTLevwT%ixLjk/#k߱A&n WFzف HY# 9Ջńnm$NloDI[fmmlAL>C~Q "")kt_0ř8MdxuWqc{, B^X1-Cd$qCO}uqtvt7QXXk@{ppr#N%G4Y+/<!/#Dђ)it@ڊhY1K6"x=dyÞHIY͞>"xL9ED̯:î^A `8 ,\1ll`$Jخ$jIt^ ؙhi]4lHmeb2&D;u56x>:x;RK5+B6ğԓ_FDd܍I@1[=*I!VOa*fu]L`FM[ݷc+󋘚f/R2._:70ϡk.l5=Ap" N U){6 P sx@58 o)0 ~l=#Y}d*dFR'*'J\>Ͽ cVagc4E<4 :3`0D5?c &`6< 1 < )y .;PyK œ,`s@0ػK ˍ;Z ؤ뀍n;œxX`t5ɘ | $a>󆑰t X$1 F=s,my[cj/ONtṖ޽Ԓ 6sA"`YnJ/"7vJ/92>[XKD~h94~;ƹAֶzY0UʁԯQNu}#nKkf}Մ,]Ǒp,_31Kle^=j:smD`הv}#:N}M@45ihVe/2?^=EkWyj] O`klJQv`8aJ-hb;gǩ:5!'y)&(C ߺotsA=,TOP< /y. :i @$fǥzkT4OW`-&u%vR ?vExڤ>y'mf}t;m\xAAY|3{pNF193asDuw Sɳne"ۙ<&%o;0۸hV:ҙs5 - g]C+gx:y}@(J>w6LM{ֹWT\G61zbS*9oM^]=gL/8B3:  O1&/#4#? ZNYscҙ|ys]֍F;0Vݻ)ST6?7-4`j0aXI?갎4 Ob!m3ԬS/^#=G]7:( sݎZr}o^1X6 trfeq.|=:@pSDѨHhU`zQ0ؤ>o ;+>^ 1 @:]Jjd1;Q"j,-Ldqձt~ ̍#R{QI7Kq&@ib!&mOk`+fe;Sa%*I[Ž3hBE[?DFaoKuQO6>JR#mܗǗ 8gu9a69 I1cA9T{G W`v>F`D=A٪>`˔{hܾ@uNب~/1}7fg-O~\zc]V!,k㫐LcowJ!f7|)y? _i;$R΢nE=gi@0:à"9{U0wKv^~T*QGs F`;bQ\`#2T5&_FD.G9Xؚ1hζM{Rnrvt[\¾Cqrgk`:PW2ԹfeOUy{*jrƃ-UD-"sBrэk+NTuOq"rl?a^{Ն>{3|‹Yѭe[D#KAۯs&Qh-=ϰҾXBxDd]}Z(|9܋l*_ZU/P眘A8/S+38@SS|3hHYrܩDn*%"wzMXUw8KU.U<.66?!"r)zn**ZU$P~rm"%1-RED6aδ$lt݉7cYdLwE1)\Tum/"W#`"rl6U@w[q܄ƞ#!ߦ(.XTVU=;X*`5Uc?ueEp5~sY;rgv" ni1/ߡ\ȳ?i=owwb~uebϻ`ܤFN%VDn t$nW̑n&8^Un?_D\DDUoP 1­˼xN]YUx\DB2S~rFg)쎥- xՐ|ADnż?6M9R/bdI-`goϩOs`7"2n>_cQ;aEDVMSUט ,0Yȝ/̠5E&^D6(uv9`}DD>)"sPa}|wM%(*G!zݬ, "ce70eU4M\e@#܅%ZrۘzNcc.L $MKw*g`n^&'߉ )]Tu *PDd-8p,{f(c?J0cm\)HWs AD1[}6]FUG"2 S̕ee#pu,՘671ak~EGUC}ܔI_]T u~  evl?^DVO%"lmP< "r|"RDlVYA2yLD~#"PkUj"" @S_d[Dʧ> =L׶{3l"f8K\63JOv"2DD+6=װ~|XW1<9/~o\K)qvEhWU0я1{S"r,EO-`!"GF睱\[IQ~`m0PZ >z~3'beHeT("+O4e)X׃JYEd{؄˘}foL[lE-sgU}3b襃v&Y3!pt3TVeb_YeǕU|9H\-V|cq$b*Tɍdmjwϐ}^#矩\C>vI*bgEd]U)e"" w.qҹ)\*qy}XҒMV%ލx P;NúvMly4WI1gȍT`nآy'$aoߐˠy4r0xύ \qQlBI@-9FݱI:Wi1sN uR?Qad{L}r!v^VN^QUwW`XIQE= dłnR]"6KAv"2W`':tH n6뤝@NTջ6U[OSTS2Tf|TTuoˆW]DdV]}jrv^E)j#10+[z`p3EUvrz}ʹy`ێ}|$n;:X,=D9_Ի2 m ^.Ŏ آ2 p"o\i00[]9}p.R똼UC̰HNa+4ͨR'̻ظ`/lXjձNVP,yMj\s;E}iדL#yAFHXEΙSx9sxV`;˝u+C۫{؂Pl15$o܀3O`zsh#,0I|!IE^W@p-ٔ{X [=y@mn༅&܁}\+ar}Nd)kD̹ KFuٯp;k 4Բt- MTu];C+>,y&$oPU{> 9Ey. BF.%o XA?у+MF 0Y=7*uX Mx0ZتzNKDm|Té7OQO9vC:uWot#MQUb;ϰ8rswCU͘3N͂ǿ~8a݌:"Ra~2 6<xπ hV`? , r?c`m(Ẹ(Z(6^t5XTu[U}b~G_&h>wEdu+F?WakO~JTm(p<:FUl wajDdj9L|CDDU_R=1OIb-ȡEdg .vD(⤥'N 8Kڬ펠9E^I'N5d'oU^YUN1P``gJ-x+Q^:ק^MtN?vioo!ro4 MO;Q׻ uѹESzr1A ɓ1!hsGp=nEd-ڽ2N{iz~8UTې'w93|SUM7vqW/Wv [zW:7qK\P\0j(eiK64;9{K}}|@UǪ6,3"qM1 Q# q$)Fv&?[h/Fn T0z^O."b.rSnvgU3m "Ww0X-f;ޫSռs`;c/lFap(ˋHsyj0[곆'@UP/Ӏ47XGx6sǟR`1/'Ed)Xr%zX=9lHDf @p'nEd%6nC 9iyd|U V8]jUU&ȩy#TXt7{<^y ˅dn軬s jIrԵ1p$"u[0oTjx4-"2^FI}('@USo`;'.X)4 s~skw|&7`jرW 0o Tz_y>y `ҟJVkD珹X \=)~%UDvEDl3>&/aY䗘q<9DdI5 [ٕFx.ƀOl+sF ە螳{5<z9T:ф~Kla1IU6[?hX:)Zg|NyROƂ[^{ =Lu1Ot0p{6:^5F\07УgI:}~ |,8l6{ kȍ1GD +߂5vQl]L0Kñzt&S khAws܉cvu/K5fo+h {Sa4KNPLB6?\?l\ Surl)ob.g<+#'[@1[INԄi՗$FQ=/nK-~1u[QףwFZf>]ds),eXm^(II{ Q3˯BoM {~FybapM\6h'@?.t] |6z0u#>-> e,z 3FzTsyUGU_la6>-LR0{8\Px6Deչ$"si-i11]Tml?D$YHGQqXD|VEI*FN,kGLm7;9}+)G"UT]*"ct?V 0X%q6A{K>--duy|LVS3T#j5':?P|؊Y_DZX9]D84DX_lr/a RG2 SȢjRrIDV.P x8#U-."u.T}71/ae^EUwVXqrYr>I9CUn.+"{0U}GUD"" n^r5f=#DdUg`p8.LY׮=]Gz!s>*"8HߤH nd˫mEԧ4. {Kl% ],( 9:K/~#LQlS^#rN;܆IԅᣱLyDs fG#Ryck ωߝTuo#Hujap4?ʢ1x[US!9/v9`ivOȟEl@`YU=Z={ß\Vnc\DV.m֪QD61~M\ӛ&"},/d?Ezy9oZnݱ03"X7Tl2= MTLPvϊNhvAgbk8=slLLe~[UDd# PPڕU˪zQD>-/qY6U1CmY,$7q=bǼ\t9 S IVՋQՇׯ9^\馷st˥4J!~$aڙ<n}fC8zQWTul¿|VDK1j@oL?"r'N7>dk6:MD>+`zמCYV~0ض9O(716w6똧G9$uIT>`x&"ϩ.)Ɋ:U}BU=H0RDfqc;;T^xex+"?tиmr.3MdѠ51pR^JPD=svR¤sQ>aYLwbmq SsCuPNC_3SUbovtX;׀2WgfJjɧ'_׾Lm U%c !; mӷnem)cWx+܄ֈ!"TY[S;U}-眡RUB0]jn""C05f^4INS6j-tMIU?ݕcDj,] 'I_d8Xs, 9cމr v9ޙC,-7鐜er(0F51yWj4̟<<2feU㨝ȸΉ|SYpۤ 3֟V{=<1,dHm/? 4`]w`A;P}:u~$uӁ"  pNH9gL߶i㯍r0qNJir. ލǗvS4ߩ :ԟI5Mu[zo,lg|Pl UBH15)=g^UO)* nƶƹ V\UO:B?TZ5_ñ08*lz[t/sB+~4йn7")!/}AYdXqwMBé^ $cgqr/ȭ"RLwIZ_Hܷz r1Ꝫ%$d :YD@/GWxڀ^D vo^,:'uCOE%\l4>>4էM?|!A Ys<: e_U}.ώ-hבX,w:8F;@ΡC )p\."hy*ޯK&`1"7u">[rH,.'0yZON+,A͌&ǰTuFgYʭuj)瓱ޔ5ZrE`lpɄ1X`ap^9uZ?EUPʩzx( F(ux#1qsbtʐ\mOlGX~TzUY.`y[U g鿈ccٶDp,DYr5Xn!͈8fHBΠxHc SSLI^X<r>XY{,S0='ՠ«U=cmU'ۖ*P19oay޾'4i7ƿ77XkJLy`q 7zj,vu$q&yu~c 0IJu8Z/EA; kV A\=̉yDq) >|OEm}MlK\>SfsbiI?ckn6np$`; +^Ԓzr 'Q|R0Db'l0 s_%:$^H>W)gs5)@,F1kt"X NWUr Qm( x3Xvv SiFQ }04l낼>1sh:SǶz0{a/:gg6s"S!vf.@?ph4&n~w ͎Ss C7Lt/.54i=c%o Ixɩ_)6ű'0% IDAT#ð߷9KDFtoo<*m2N\QC0/'iJ}bf2"Iw}n@1lE=x8 :>'5wΙSo\ 7BHZϩՕ z~~} &aa 1ف`{ÛCvcx$n zFEmu.Y'OֹSN^vaʶv4ZM+1>w $fd{X`nI/1a_ѹ6z\@!]"_ ႣR8j2Ap:pF/-dHiܤGyyfǢ:̡~7mȦF<rG@$rǜXv\ 5 5 c-ژ` oC4,J)|sMQ/CY;@Jd=$U P{Y^_%\S:'!;z3ECbF= C15 9fR@1\sc7HMQ$@V֘,<@0ծ З75c5k>Π%I|L.FotYww5s}c1Liiw^?B-m"+(`a [?A,n|65L~ҹ/ >2Z)>$횢]8t3Z DɬVIiCRELHzy3x 7gӤ_+`oܗrcc 8cI$,]qZ`ie1q .'1:LWY {ݢmD|b>SkحY$b`6IrxqB .)=4X5vPQ>8rфB_\tB^aWz&$j+qG$Ĥ6{~puh|)F#Ra .V`I,tfkyXXӀM3ֹ%mQY$]{ S;7*Apl]ȭÝ2UJP j;"b~0H. ^baEmL&۸@5Z$DL8vyqژ sZ:>b65y Koy\Ӄ?P  B?@=9rXG$;X@\YƦcIj % ?6Hi|-L=MIYaܾo@d,%m"e;Sj<U5 9$zOw:9H7|AV}M6uj`Tu)ouHlG! ,/px@jn>iT u4Lw1f^wm8:>L.-,~8l܃vVvJN:=hɓm=cN'"{ xG=dԍ- RWMQ?ek{'M`.|a5Lډ]ʩS--!y@P }KMJJw\A$;Sˆ19HP5-Ap|V,CxX%^w-8m>8υ:otð#(v`y,l[خlF`WUB:b❽@ϳo ys`>-{*(?!a+lhuX1/i)5$wn׻ygLp/&M}raicrmh6ha,[Е^gWy[`m:סqAC,Emw_ G_m9^:ݍ:w  rP IR/,V (ՂI7Y8Y1?nM^M̨=_/? 5ޝ]ڣvA:5KD ߐѣ@6~l!ݮVGiIxpDM-9} z}"8>.T|0lgZ,+@zp7},Km &c9Buq\Fo`hK s Em#NX[ F) `.yjMٝ׷)yR}G;O`ޯr/ ׇc|,&^# nG6}- <|Z z3{%9v?IIkaPj>>R@пL@3f OM;? 1 <oos<\D~Zs!!q`nH1^,ݜl( hVƬ~BzSו/zN県̓y:3:h&Lr}g_H/6vBR[+C<isb>!ߙdd#g2j126y AҞ_f Y_ǼVe-BJs XYg~dm@e!5?;QU!$S>`*Ȯ>U%@azFP4x> n$6>4NmJC>rަ_Z=J8bǀp-s@01c19O]JNF] Jfyh a!Do>{H_:s!jc4S9(,Z ز< 4I=(|!gGa`~rg&0u,#V6`TZ/6&$/zޙUO$[a_Ƒn̍vRAb>B do8KӔ>G6>SMcr"`zL*azLͲ,ALAbAEa;es u,.MkŔTf\ A!HVvCj3!!OD,@pNJ/ 7a{|nCo2^V=a.|jAp4̮/OsChH2w yv1,khHNS1oq Kz4+azC?``M,㢟 _P}<^TY fd ?̉ޚ'R,Z5|<灎KSf !# q=H>΋r]Ӏ;jwoApFM27va`);%xka^eJ֠aA|\zţk8|Fie6*Qv>T[ Ga\tt}p;nL-92@;6DEXyTxhStY99jxtX.Z_CByBi˧LQbI`B"p0{%mPql}F?AÈ}% --,|6vgs`ѫ{NMoppZ@i ~>t ^< qtިu09xNT^d2ps6Ki'F@qz/3yM'ȟC;sQ-vu2o >8i"ӱE:E3=z..qYf@ Gb}xՑFN?kM>ϲkbrLnp*nhev`>1=qoQ;gE\Q7b0}z:D<`ǐFFB & eJ;S$ ZtNHN ]f%' xK6A#e 4gt}O_B}@`.҄:F?97~dVxT*RH8c'28=1w@lW+fk(ރ@")cev`]}e*KVb =PSDlQ;!Xj a@N, ac@1 WƖ@9gǂT[:~2 hLbhL=jҨ27;< \rkdBw2,12Ņg:0ƂI&%ȨQyѹ濍@@&!& PeX,Ѧ/{1Cw6jΞ#c">lr ziF*[r֜|g*6~,UtkjIb-^vϔl2Sk._ |AmO4_tlԇp|lkq >hp?fy5`v0%-t6i!@O (cpR=@:25c+Iks7_@sWB8JI2c0OxB!Ā:-݇Es  N>WjaG[߁ 8/$X880nA }pw_3Z<C~@ijjwyo$jv>N@1vEu~tWƂEOƈ3E`W/{9@?NM#rOGG%@pf<UQ09m)s\:q^-y1;X8]@p׬mv؏PSK6[78>;>wtaMLv,6rtk$ح*3۷g?뻡0+M>-aғw'ΦN [q*G,ƍgC;+'=Rv`tpͳ5@>NmꁏβԦ3V*2NR,Vt?g@ [ITs <BV Y3} iP(yD7l׌v(LG5yK,nX3p^2]F:'{hU-Bmd76= /|)&$ ?vAOꝌsd]"Va}7!}.{OTgKm +;^K4! IM 6vyAP$Aot\@#7V@KF3c}"ى@~#N,U>R)91a 3 \}ׇ(l}1JP{d1,˽-ޙ//:P( `/I7f 6 ˛Xg?U;^ ޻+5C؝Z`h6z/j =yn ~_ s+ *wk[!d Pl 3v6aX^wϩATOa:a0AKqS,yxUji@$p6qF?$벯0Vh&`.|Ϫt ,mz$Ir=GCXm0/,A=Ob*@+x޷q1_Un H*`T! 4eHLI!Vtd˫!z|ј`L;NU3NVꙪ111K;eTOٺY~ EjleyUN{+)LTUK2/𜈜'"KDOQ1ϜK#hY/\fT%JI|szVRQg),a/39QU uIh<!;}rKOva"܉YVSߧ]SI$T|U; C8]D5S^@0DtY "܅0|X^?TPtjS2\gCy3/gw#!Q pS},)hG!ZDҗ@/>!he(RzVq|+ y Vt; *i[T5U=R3b@p;`po@Iq*(?/ (zJ!)8$`vB@)Kt bK2Cy-þwA+ ,ͷ8@X<: NN=$cTPvvxȻ6^+)KΨ6B]@ N3T0@0l"2|::۵*/f1) 4 niPy_fm{.ʮUIi$ J @/ fxm&"bju#҃:K#YEd$XP([WI+a솒NC@_GD2&IDAT~R X5X[U.L%v"ҿ3ƅ6RտVc2>{b w; 1*X#C| u^x*K hPv"2qY\mCKf5+h/; u~pB.!zT obpv?Fp^U},B@DȞl =<:.`KU]WUt%YT9Uй8 /"NjȂSU"`a ~UX"p vc1PJ*UFc*aO3mG]2vn1PU{+lb6oC8@X;?=8A-F "sXʇ0Pn6QT2I`gsc0NDYP0k܉`B%=#p/f9F Tu3U_̅9Dl~R,╪Q͊PUoWխ1k01&+@t2 @Dv0Gr;u*-NuTuku<UZxsœJz. Dd9GAznU?*S 7Cpۋo2 l ,r=nw~% 9;/mMu+DdVU鸾<DdX{XD(U}+sL`?3ډ+[,%w"ӱ-Ѫd+d0 RJ ,?cT3^LDvf%S|FKDdj=ŗ\7uLmM4TvJ fߋaY!x@\)$7KDdML=}'h0ފKUu|vK?:qlT QI%^RK<`F; !"ñ~wL>p!pڳ+EDd>,oQ,ӁX;EERHWP3|ʇW(U}]#.}nc * _DrQYs6aLtǩޡJ:@m/LwSU})UR`pIT޾,Ӂ׈7V6 HW.J*) ORXU}ȾUҾ 1o1ATVI%e~h,2NRY%K534UPp*PIl>lhR* "" cʷJ*pR@,/".MTէ[%TRI#i1SnKSJJ(ITYZ2 ̃%G;M_%$J ^x g<C%TRI^2ˀ/m,Z-#_%TRI4#"]/ epl/sTRI%386 hY/\$:ZTR ^xiuPJ*?""CP c6 X2(K/ TT*rKGaia40܎!._TRIdf/3x |K>ZUo(cTRI2 /H۱ WֱJ*T Xe/ 8X\B% rVfj UX|'p){%T26`/qJ燀+@)4 R$c'PB% i` 4[aQT2U^T*R@gRA""|.\3w:URI%TНT@Ёpq*@¤`mH]l?~zET7iUhRAFqpN_EJz%K* (Xpׁ_;EJ )LDd5 ,Yv}nrK%Ys;.pTPɀfG* (*۩0(U}ȾURI#iW@I"",CNQ7[%x``JLDdla"pZd*y- P0}B̆Jq=dfppbG* ""KPT"V T@0HDDvf>.VЧ p`,w'?aǿA̴y!MN lH1/;knc bah ;*14mj166hC?{S:~哪[om7s {OK+`58$k}u 5pLw.ҤFJ>A @3( WM>D'.}smR] @(-af7w l".}smR @;)-cf7 Zw,$Bƀ3Xh r-A.E`o˹633b$3ES0\OI> vSZF!lz}FAx8Cؒsm; LG!I AX,w7d Lc@@J~5 Qw'd @fdfo<=nkr ̅B Rr4Vl IJx7 RdNl/*`8:ǁ!w+Z`I+2+ ̋ \ Ǥqk6 B R*4 Cx5׺N^R'R.'K .l k 3XG7Rd^)38C8>'ӴqF B }QpBN쩬 AIv]&H_]o6l5@E!(| ա.m(2h T!ci'^?wgG=(g dQV>Zv!ϸ&HV%w.4@HU(R ӽ6 UHLsxɶRt\ F_AxNAH9i  HR9+  uHm FQwߙk]S)R7 N (pI}ls?3KZRRwl3 L]'ĥ+@jA!3 =;AxUi 3;xbـ3z@F!1D.crtK\F 4B ef+G`6ϙ O( 4F Owuߛ Hk٩2J`wg!`RT6Ruqx0=`p~mD6 @L!2SN;gxH(zfv @?v[()[ IENDB`zarr-python-3.1.5/docs/_static/logo_horizontal.svg000066400000000000000000000351061511007055700223330ustar00rootroot00000000000000 zarr-python-3.1.5/docs/api/000077500000000000000000000000001511007055700155175ustar00rootroot00000000000000zarr-python-3.1.5/docs/api/zarr/000077500000000000000000000000001511007055700164755ustar00rootroot00000000000000zarr-python-3.1.5/docs/api/zarr/abc/000077500000000000000000000000001511007055700172225ustar00rootroot00000000000000zarr-python-3.1.5/docs/api/zarr/abc/buffer.md000066400000000000000000000002321511007055700210120ustar00rootroot00000000000000--- title: buffer --- ::: zarr.abc options: show_root_heading: true show_root_toc_entry: true members: false ::: zarr.abc.buffer zarr-python-3.1.5/docs/api/zarr/abc/codec.md000066400000000000000000000000511511007055700206150ustar00rootroot00000000000000--- title: codec --- ::: zarr.abc.codec zarr-python-3.1.5/docs/api/zarr/abc/index.md000066400000000000000000000011131511007055700206470ustar00rootroot00000000000000## Abstract base classes - **[buffer](./buffer.md)** - Providing access to underlying memory via [buffers](https://docs.python.org/3/c-api/buffer.html) - **[codec](./codec.md)** - Expressing [zarr codecs](https://zarr-specs.readthedocs.io/en/latest/v3/core/index.html#chunk-encoding) - **[metadata](./metadata.md)** - Creating metadata classes compatible with the Zarr API - **[numcodec](./numcodec.md)** - Protocols and classes for modeling codec interface used by numcodecs - **[store](./store.md)** - ABC for implementing Zarr stores and managing getting and setting bytes in a storezarr-python-3.1.5/docs/api/zarr/abc/metadata.md000066400000000000000000000000571511007055700213260ustar00rootroot00000000000000--- title: metadata --- ::: zarr.abc.metadata zarr-python-3.1.5/docs/api/zarr/abc/numcodec.md000066400000000000000000000000571511007055700213430ustar00rootroot00000000000000--- title: numcodec --- ::: zarr.abc.numcodec zarr-python-3.1.5/docs/api/zarr/abc/store.md000066400000000000000000000000511511007055700206740ustar00rootroot00000000000000--- title: store --- ::: zarr.abc.store zarr-python-3.1.5/docs/api/zarr/api/000077500000000000000000000000001511007055700172465ustar00rootroot00000000000000zarr-python-3.1.5/docs/api/zarr/api/asynchronous.md000066400000000000000000000000661511007055700223250ustar00rootroot00000000000000--- title: asynchronous --- ::: zarr.api.asynchronouszarr-python-3.1.5/docs/api/zarr/api/index.md000066400000000000000000000002131511007055700206730ustar00rootroot00000000000000--- title: API --- Zarr provides both an [async](./asynchronous.md) and a [sync](./synchronous.md) API. See those pages for more details. zarr-python-3.1.5/docs/api/zarr/api/synchronous.md000066400000000000000000000002421511007055700221600ustar00rootroot00000000000000--- title: synchronous --- ::: zarr.api options: show_root_heading: true show_root_toc_entry: true members: false ::: zarr.api.synchronouszarr-python-3.1.5/docs/api/zarr/array.md000066400000000000000000000000431511007055700201320ustar00rootroot00000000000000::: zarr.Array ::: zarr.AsyncArray zarr-python-3.1.5/docs/api/zarr/buffer/000077500000000000000000000000001511007055700177465ustar00rootroot00000000000000zarr-python-3.1.5/docs/api/zarr/buffer/cpu.md000066400000000000000000000000241511007055700210530ustar00rootroot00000000000000::: zarr.buffer.cpu zarr-python-3.1.5/docs/api/zarr/buffer/gpu.md000066400000000000000000000000241511007055700210570ustar00rootroot00000000000000::: zarr.buffer.gpu zarr-python-3.1.5/docs/api/zarr/buffer/index.md000066400000000000000000000002251511007055700213760ustar00rootroot00000000000000Zarr provides buffer classes for both the [cpu](./cpu.md) and [gpu](./gpu.md). Generic buffer functionality is also detailed below. ::: zarr.buffer zarr-python-3.1.5/docs/api/zarr/codecs.md000066400000000000000000000000471511007055700202600ustar00rootroot00000000000000--- title: codecs --- ::: zarr.codecs zarr-python-3.1.5/docs/api/zarr/codecs/000077500000000000000000000000001511007055700177355ustar00rootroot00000000000000zarr-python-3.1.5/docs/api/zarr/codecs/numcodecs.md000066400000000000000000000000641511007055700222370ustar00rootroot00000000000000--- title: numcodecs --- ::: zarr.codecs.numcodecs zarr-python-3.1.5/docs/api/zarr/config.md000066400000000000000000000000471511007055700202650ustar00rootroot00000000000000--- title: config --- ::: zarr.config zarr-python-3.1.5/docs/api/zarr/convenience.md000066400000000000000000000002261511007055700213130ustar00rootroot00000000000000--- title: convenience --- ::: zarr.consolidate_metadata ::: zarr.copy ::: zarr.copy_all ::: zarr.copy_store ::: zarr.print_debug_info ::: zarr.tree zarr-python-3.1.5/docs/api/zarr/create.md000066400000000000000000000004471511007055700202670ustar00rootroot00000000000000--- title: create --- ::: zarr.array ::: zarr.create ::: zarr.create_array ::: zarr.create_group ::: zarr.create_hierarchy ::: zarr.empty ::: zarr.empty_like ::: zarr.full ::: zarr.full_like ::: zarr.from_array ::: zarr.group ::: zarr.ones ::: zarr.ones_like ::: zarr.zeros ::: zarr.zeros_like zarr-python-3.1.5/docs/api/zarr/deprecated/000077500000000000000000000000001511007055700205755ustar00rootroot00000000000000zarr-python-3.1.5/docs/api/zarr/deprecated/convenience.md000066400000000000000000000000241511007055700234070ustar00rootroot00000000000000::: zarr.conveniencezarr-python-3.1.5/docs/api/zarr/deprecated/creation.md000066400000000000000000000000221511007055700227150ustar00rootroot00000000000000::: zarr.creation zarr-python-3.1.5/docs/api/zarr/dtype.md000066400000000000000000000000451511007055700201430ustar00rootroot00000000000000--- title: dtype --- ::: zarr.dtype zarr-python-3.1.5/docs/api/zarr/errors.md000066400000000000000000000000461511007055700203330ustar00rootroot00000000000000--- title: errors --- ::: zarr.errorszarr-python-3.1.5/docs/api/zarr/group.md000066400000000000000000000000431511007055700201500ustar00rootroot00000000000000::: zarr.Group ::: zarr.AsyncGroup zarr-python-3.1.5/docs/api/zarr/index.md000066400000000000000000000046601511007055700201340ustar00rootroot00000000000000# API Reference Complete reference documentation for the Zarr-Python API. ::: zarr options: show_root_heading: true show_root_toc_entry: true members: false ## Core API ### Essential Classes and Functions - **[Array](array.md)** - The main Zarr array class for N-dimensional data - **[Group](group.md)** - Hierarchical organization of arrays and subgroups - **[Create](create.md)** - Functions for creating new arrays and groups - **[Open](open.md)** - Opening existing Zarr stores and arrays ### Data Operations - **[Load](load.md)** - Loading data from Zarr stores - **[Save](save.md)** - Saving data to Zarr format - **[Convenience](convenience.md)** - High-level convenience functions ### Data Types and Configuration - **[Data Types](dtype.md)** - Supported NumPy data types and type handling - **[Configuration](config.md)** - Runtime configuration and settings ## Storage and Compression - **[Codecs](codecs.md)** - Compression and filtering codecs - **[Storage](storage.md)** - Storage backend implementations and interfaces - **[Registry](registry.md)** - Codec and storage backend registry ## API Variants Zarr-Python provides both synchronous and asynchronous APIs: - **[Async API](./api/asynchronous.md)** - Asynchronous operations for concurrent access - **[Sync API](./api/synchronous.md)** - Synchronous operations for simple usage ## Abstract Base Classes The ABC module defines interfaces for extending Zarr: - **[Codec ABC](abc/codec.md)** - Interface for custom compression codecs - **[Metadata ABC](abc/metadata.md)** - Interface for metadata handling - **[Store ABC](abc/store.md)** - Interface for custom storage backends ## Utilities - **[Errors](errors.md)** - Exception classes and error handling - **[Testing](testing/index.md)** - Utilities for testing Zarr-based code ## Migration and Compatibility - **[Deprecated Functions](deprecated/convenience.md)** - Legacy convenience functions - **[Deprecated Creation](deprecated/creation.md)** - Legacy array creation functions These deprecated modules are maintained for backward compatibility but should be avoided in new code. ## Getting Help - Check the [User Guide](../../user-guide/index.md) for tutorials and examples - Browse function signatures and docstrings in the API reference - Report issues on [GitHub](https://github.com/zarr-developers/zarr-python) - Join discussions on the [Zarr community forum](https://github.com/zarr-developers/community) zarr-python-3.1.5/docs/api/zarr/load.md000066400000000000000000000000431511007055700177330ustar00rootroot00000000000000--- title: load --- ::: zarr.load zarr-python-3.1.5/docs/api/zarr/metadata.md000066400000000000000000000001101511007055700205670ustar00rootroot00000000000000--- title: metadata --- ::: zarr.metadata ::: zarr.metadata.migrate_v3 zarr-python-3.1.5/docs/api/zarr/open.md000066400000000000000000000001711511007055700177570ustar00rootroot00000000000000--- title: open --- ::: zarr.open ::: zarr.open_array ::: zarr.open_consolidated ::: zarr.open_group ::: zarr.open_like zarr-python-3.1.5/docs/api/zarr/registry.md000066400000000000000000000000521511007055700206640ustar00rootroot00000000000000--- title: registry --- ::: zarr.registryzarr-python-3.1.5/docs/api/zarr/save.md000066400000000000000000000001131511007055700177500ustar00rootroot00000000000000--- title: save --- ::: zarr.save ::: zarr.save_array ::: zarr.save_group zarr-python-3.1.5/docs/api/zarr/storage.md000066400000000000000000000001401511007055700204560ustar00rootroot00000000000000--- title: storage --- ## Attributes ::: zarr.storage.StoreLike ## Classes ::: zarr.storage zarr-python-3.1.5/docs/api/zarr/testing/000077500000000000000000000000001511007055700201525ustar00rootroot00000000000000zarr-python-3.1.5/docs/api/zarr/testing/buffer.md000066400000000000000000000000431511007055700217420ustar00rootroot00000000000000## Buffer ::: zarr.testing.buffer zarr-python-3.1.5/docs/api/zarr/testing/conftest.md000066400000000000000000000000471511007055700223220ustar00rootroot00000000000000## Conftest ::: zarr.testing.conftest zarr-python-3.1.5/docs/api/zarr/testing/index.md000066400000000000000000000003241511007055700216020ustar00rootroot00000000000000--- title: testing --- See the following sub-modules: - [buffer](./buffer.md) - [conftest](./conftest.md) - [stateful](./stateful.md) - [store](./store.md) - [strategies](./strategies.md) - [utils](./utils.md) zarr-python-3.1.5/docs/api/zarr/testing/stateful.md000066400000000000000000000000471511007055700223240ustar00rootroot00000000000000## Stateful ::: zarr.testing.stateful zarr-python-3.1.5/docs/api/zarr/testing/store.md000066400000000000000000000000421511007055700216240ustar00rootroot00000000000000 ## Store ::: zarr.testing.store zarr-python-3.1.5/docs/api/zarr/testing/strategies.md000066400000000000000000000000541511007055700226450ustar00rootroot00000000000000 ## Strategies ::: zarr.testing.strategies zarr-python-3.1.5/docs/api/zarr/testing/utils.md000066400000000000000000000000411511007055700216270ustar00rootroot00000000000000## Utils ::: zarr.testing.utils zarr-python-3.1.5/docs/contributing.md000066400000000000000000000352151511007055700200050ustar00rootroot00000000000000# Contributing Zarr is a community maintained project. We welcome contributions in the form of bug reports, bug fixes, documentation, enhancement proposals and more. This page provides information on how best to contribute. ## Asking for help If you have a question about how to use Zarr, please post your question on StackOverflow using the ["zarr" tag](https://stackoverflow.com/questions/tagged/zarr). If you don't get a response within a day or two, feel free to raise a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new) including a link to your StackOverflow question. We will try to respond to questions as quickly as possible, but please bear in mind that there may be periods where we have limited time to answer questions due to other commitments. ## Bug reports If you find a bug, please raise a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new). Please include the following items in a bug report: 1. A minimal, self-contained snippet of Python code reproducing the problem. You can format the code nicely using markdown, e.g.: ```python import zarr g = zarr.group() # etc. ``` 2. An explanation of why the current behaviour is wrong/not desired, and what you expect instead. 3. Information about the version of Zarr, along with versions of dependencies and the Python interpreter, and installation information. The version of Zarr can be obtained from the `zarr.__version__` property. Please also state how Zarr was installed, e.g., "installed via pip into a virtual environment", or "installed using conda". Information about other packages installed can be obtained by executing `pip freeze` (if using pip to install packages) or `conda env export` (if using conda to install packages) from the operating system command prompt. The version of the Python interpreter can be obtained by running a Python interactive session, e.g.: ```console python ``` ```ansi Python 3.12.7 | packaged by conda-forge | (main, Oct 4 2024, 15:57:01) [Clang 17.0.6 ] on darwin ``` ## Enhancement proposals If you have an idea about a new feature or some other improvement to Zarr, please raise a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new) first to discuss. We very much welcome ideas and suggestions for how to improve Zarr, but please bear in mind that we are likely to be conservative in accepting proposals for new features. The reasons for this are that we would like to keep the Zarr code base lean and focused on a core set of functionalities, and available time for development, review and maintenance of new features is limited. But if you have a great idea, please don't let that stop you from posting it on GitHub, just please don't be offended if we respond cautiously. ## Contributing code and/or documentation ### Forking the repository The Zarr source code is hosted on GitHub at the following location: * [https://github.com/zarr-developers/zarr-python](https://github.com/zarr-developers/zarr-python) You will need your own fork to work on the code. Go to the link above and hit the ["Fork"](https://github.com/zarr-developers/zarr-python/fork) button. Then clone your fork to your local machine: ```bash git clone git@github.com:your-user-name/zarr-python.git cd zarr-python git remote add upstream git@github.com:zarr-developers/zarr-python.git ``` ### Creating a development environment To work with the Zarr source code, it is recommended to use [hatch](https://hatch.pypa.io/latest/index.html) to create and manage development environments. Hatch will automatically install all Zarr dependencies using the same versions as are used by the core developers and continuous integration services. Assuming you have a Python 3 interpreter already installed, and you have cloned the Zarr source code and your current working directory is the root of the repository, you can do something like the following: ```bash pip install hatch hatch env show # list all available environments ``` To verify that your development environment is working, you can run the unit tests for one of the test environments, e.g.: ```bash hatch env run --env test.py3.12-2.2-optional run-pytest ``` ### Creating a branch Before you do any new work or submit a pull request, please open an issue on GitHub to report the bug or propose the feature you'd like to add. It's best to synchronize your fork with the upstream repository, then create a new, separate branch for each piece of work you want to do. E.g.: ```bash git checkout main git fetch upstream git checkout -b shiny-new-feature upstream/main git push -u origin shiny-new-feature ``` This changes your working directory to the 'shiny-new-feature' branch. Keep any changes in this branch specific to one bug or feature so it is clear what the branch brings to Zarr. To update this branch with latest code from Zarr, you can retrieve the changes from the main branch and perform a rebase: ```bash git fetch upstream git rebase upstream/main ``` This will replay your commits on top of the latest Zarr git main. If this leads to merge conflicts, these need to be resolved before submitting a pull request. Alternatively, you can merge the changes in from upstream/main instead of rebasing, which can be simpler: ```bash git pull upstream main ``` Again, any conflicts need to be resolved before submitting a pull request. ### Running the test suite Zarr includes a suite of unit tests. The simplest way to run the unit tests is to activate your development environment (see [creating a development environment](#creating-a-development-environment) above) and invoke: ```bash hatch env run --env test.py3.12-2.2-optional run-pytest ``` All tests are automatically run via GitHub Actions for every pull request and must pass before code can be accepted. Test coverage is also collected automatically via the Codecov service. > **Note:** Previous versions of Zarr-Python made extensive use of doctests. These tests were not maintained during the 3.0 refactor but may be brought back in the future. See issue #2614 for more details. ### Code standards - using pre-commit All code must conform to the PEP8 standard. Regarding line length, lines up to 100 characters are allowed, although please try to keep under 90 wherever possible. `Zarr` uses a set of `pre-commit` hooks and the `pre-commit` bot to format, type-check, and prettify the codebase. `pre-commit` can be installed locally by running: ```bash python -m pip install pre-commit ``` The hooks can be installed locally by running: ```bash pre-commit install ``` This would run the checks every time a commit is created locally. These checks will also run on every commit pushed to an open PR, resulting in some automatic styling fixes by the `pre-commit` bot. The checks will by default only run on the files modified by a commit, but the checks can be triggered for all the files by running: ```bash pre-commit run --all-files ``` If you would like to skip the failing checks and push the code for further discussion, use the `--no-verify` option with `git commit`. ### Test coverage > **Note:** Test coverage for Zarr-Python 3 is currently not at 100%. This is a known issue and help is welcome to bring test coverage back to 100%. See issue #2613 for more details. Zarr strives to maintain 100% test coverage under the latest Python stable release. Both unit tests and docstring doctests are included when computing coverage. Running: ```bash hatch env run --env test.py3.12-2.2-optional run-coverage ``` will automatically run the test suite with coverage and produce a XML coverage report. This should be 100% before code can be accepted into the main code base. You can also generate an HTML coverage report by running: ```bash hatch env run --env test.py3.12-2.2-optional run-coverage-html ``` When submitting a pull request, coverage will also be collected across all supported Python versions via the Codecov service, and will be reported back within the pull request. Codecov coverage must also be 100% before code can be accepted. ### Documentation Docstrings for user-facing classes and functions should follow the [numpydoc](https://numpydoc.readthedocs.io/en/stable/format.html#docstring-standard) standard, including sections for Parameters and Examples. All examples should run and pass as doctests under Python 3.11. Zarr uses mkdocs for documentation, hosted on readthedocs.org. Documentation is written in the Markdown markup language (.md files) in the `docs` folder. The documentation consists both of prose and API documentation. All user-facing classes and functions are included in the API documentation, under the `docs/api` folder using the [mkdocstrings](https://mkdocstrings.github.io/) extension. Add any new public functions or classes to the relevant markdown file in `docs/api/*.md`. Any new features or important usage information should be included in the user-guide (`docs/user-guide`). Any changes should also be included as a new file in the `changes` directory. The documentation can be built locally by running: ```bash hatch --env docs run build ``` The resulting built documentation will be available in the `docs/_build/html` folder. Hatch can also be used to serve continuously updating version of the documentation during development at [http://0.0.0.0:8000/](http://0.0.0.0:8000/). This can be done by running: ```bash hatch --env docs run serve ``` ### Changelog zarr-python uses [towncrier](https://towncrier.readthedocs.io/en/stable/tutorial.html) to manage release notes. Most pull requests should include at least one news fragment describing the changes. To add a release note, you'll need the GitHub issue or pull request number and the type of your change (`feature`, `bugfix`, `doc`, `removal`, `misc`). With that, run `towncrier create` with your development environment, which will prompt you for the issue number, change type, and the news text: ```bash towncrier create ``` Alternatively, you can manually create the files in the `changes` directory using the naming convention `{issue-number}.{change-type}.md`. See the [towncrier](https://towncrier.readthedocs.io/en/stable/tutorial.html) docs for more. ## Merging pull requests Pull requests submitted by an external contributor should be reviewed and approved by at least one core developer before being merged. Ideally, pull requests submitted by a core developer should be reviewed and approved by at least one other core developer before being merged. Pull requests should not be merged until all CI checks have passed (GitHub Actions, Codecov) against code that has had the latest main merged in. Before merging the milestone must be set either to decide whether a PR will be in the next patch, minor, or major release. The next section explains which types of changes go in each release. ## Compatibility and versioning policies ### Versioning Versions of this library are identified by a triplet of integers with the form `..`, for example `3.0.4`. A release of `zarr-python` is associated with a new version identifier. That new identifier is generated by incrementing exactly one of the components of the previous version identifier by 1. When incrementing the `major` component of the version identifier, the `minor` and `patch` components is reset to 0. When incrementing the minor component, the patch component is reset to 0. Releases are classified by the library changes contained in that release. This classification determines which component of the version identifier is incremented on release. * **major** releases (for example, `2.18.0` -> `3.0.0`) are for changes that will require extensive adaptation efforts from many users and downstream projects. For example, breaking changes to widely-used user-facing APIs should only be applied in a major release. Users and downstream projects should carefully consider the impact of a major release before adopting it. In advance of a major release, developers should communicate the scope of the upcoming changes, and help users prepare for them. * **minor** releases (for example, `3.0.0` -> `3.1.0`) are for changes that do not require significant effort from most users or downstream downstream projects to respond to. API changes are possible in minor releases if the burden on users imposed by those changes is sufficiently small. For example, a recently released API may need fixes or refinements that are breaking, but low impact due to the recency of the feature. Such API changes are permitted in a minor release. Minor releases are safe for most users and downstream projects to adopt. * **patch** releases (for example, `3.1.0` -> `3.1.1`) are for changes that contain no breaking or behaviour changes for downstream projects or users. Examples of changes suitable for a patch release are bugfixes and documentation improvements. Users should always feel safe upgrading to a the latest patch release. Note that this versioning scheme is not consistent with [Semantic Versioning](https://semver.org/). Contrary to SemVer, the Zarr library may release breaking changes in `minor` releases, or even `patch` releases under exceptional circumstances. But we should strive to avoid doing so. A better model for our versioning scheme is [Intended Effort Versioning](https://jacobtomlinson.dev/effver/), or "EffVer". The guiding principle off EffVer is to categorize releases based on the *expected effort required to upgrade to that release*. Zarr developers should make changes as smooth as possible for users. This means making backwards-compatible changes wherever possible. When a backwards-incompatible change is necessary, users should be notified well in advance, e.g. via informative deprecation warnings. ### Data format compatibility The Zarr library is an implementation of a file format standard defined externally -- see the [Zarr specifications website](https://zarr-specs.readthedocs.io) for the list of Zarr file format specifications. If an existing Zarr format version changes, or a new version of the Zarr format is released, then the Zarr library will generally require changes. It is very likely that a new Zarr format will require extensive breaking changes to the Zarr library, and so support for a new Zarr format in the Zarr library will almost certainly come in new `major` release. When the Zarr library adds support for a new Zarr format, there may be a period of accelerated changes as developers refine newly added APIs and deprecate old APIs. In such a transitional phase breaking changes may be more frequent than usual. ## Release procedure Open an issue on GitHub announcing the release using the release checklist template: [https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md](https://github.com/zarr-developers/zarr-python/issues/new?template=release-checklist.md>). The release checklist includes all steps necessary for the release.zarr-python-3.1.5/docs/index.md000066400000000000000000000055011511007055700164000ustar00rootroot00000000000000# Zarr-Python **Useful links**: [Source Repository](https://github.com/zarr-developers/zarr-python) | [Issue Tracker](https://github.com/zarr-developers/zarr-python/issues) | [Developer Chat](https://ossci.zulipchat.com/) | [Zarr specifications](https://zarr-specs.readthedocs.io) Zarr is a powerful library for storage of n-dimensional arrays, supporting chunking, compression, and various backends, making it a versatile choice for scientific and large-scale data. Zarr-Python is a Python library for reading and writing Zarr groups and arrays. Highlights include: * Specification support for both Zarr format 2 and 3. * Create and read from N-dimensional arrays using NumPy-like semantics. * Flexible storage enables reading and writing from local, cloud and in-memory stores. * High performance: Enables fast I/O with support for asynchronous I/O and multi-threading. * Extensible: Customizable with user-defined codecs and stores. ## Installation Zarr requires Python 3.11 or higher. You can install it via `pip`: ```bash pip install zarr ``` or `conda`: ```bash conda install --channel conda-forge zarr ``` ## Navigating the documentation
- [:material-clock-fast:{ .lg .middle } __Quick start__](quick-start.md) --- New to Zarr? Check out the quick start guide. It contains a brief introduction to Zarr's main concepts and links to additional tutorials. - [:material-book-open:{ .lg .middle } __User guide__](user-guide/installation.md) --- A detailed guide for how to use Zarr-Python. - [:material-api:{ .lg .middle } __API Reference__](api/zarr/open.md) --- The reference guide contains a detailed description of the functions, modules, and objects included in Zarr. The reference describes how the methods work and which parameters can be used. It assumes that you have an understanding of the key concepts. - [:material-account-group:{ .lg .middle } __Contributor's Guide__](contributing.md) --- Want to contribute to Zarr? We welcome contributions in the form of bug reports, bug fixes, documentation, enhancement proposals and more. The contributing guidelines will guide you through the process of improving Zarr.
## Project Status More information about the Zarr format can be found on the [main website](https://zarr.dev). If you are using Zarr-Python, we would [love to hear about it](https://github.com/zarr-developers/community/issues/19). ### Funding and Support The project is fiscally sponsored by [NumFOCUS](https://numfocus.org/), a US 501(c)(3) public charity, and development has been supported by the [MRC Centre for Genomics and Global Health](https://github.com/cggh/) and the [Chan Zuckerberg Initiative](https://chanzuckerberg.com/). [Donate to Zarr](https://numfocus.org/donate-to-zarr) to support the project! zarr-python-3.1.5/docs/overrides/000077500000000000000000000000001511007055700167505ustar00rootroot00000000000000zarr-python-3.1.5/docs/overrides/main.html000066400000000000000000000003051511007055700205600ustar00rootroot00000000000000 {% extends "base.html" %} {% block outdated %} You're not viewing the latest version. Click here to go to latest. {% endblock %} zarr-python-3.1.5/docs/overrides/stylesheets/000077500000000000000000000000001511007055700213245ustar00rootroot00000000000000zarr-python-3.1.5/docs/overrides/stylesheets/extra.css000066400000000000000000000112671511007055700231700ustar00rootroot00000000000000:root { --gradient-start: #e58077; --gradient-mid-1: #e57a77; --gradient-mid-2: #e46876; --gradient-mid-3: #e34b75; --gradient-mid-4: #e12374; --gradient-mid-5: #e01073; --gradient-end: #bb1085; /* Primary theme colors --md-primary-fg-color: #e34b75; --md-primary-fg-color--light: #e57a77; --md-primary-fg-color--dark: #bb1085; /* Accent colors */ --md-accent-fg-color: #e01073; --md-accent-fg-color--transparent: rgba(224, 16, 115, 0.1); /* Text colors that work well with the palette */ --md-text-color: #333333; --md-text-color--light: #666666; } /* Dark mode color adjustments */ [data-md-color-scheme="slate"] { --md-primary-fg-color: #e57a77; --md-primary-fg-color--light: #e58077; --md-primary-fg-color--dark: #bb1085; --md-accent-fg-color: #e46876; --md-accent-fg-color--transparent: rgba(228, 104, 118, 0.1); } /* Header styling with gradient background */ .md-header { background: linear-gradient( 135deg, var(--gradient-start) 0%, var(--gradient-mid-1) 16.66%, var(--gradient-mid-2) 33.33%, var(--gradient-mid-3) 50%, var(--gradient-mid-4) 66.66%, var(--gradient-mid-5) 83.33%, var(--gradient-end) 100% ); box-shadow: 0 2px 8px rgba(187, 16, 133, 0.15); } /* Ensure header text is readable over gradient */ .md-header__title, .md-header__button, .md-header .md-icon { color: white; } /* Search box styling */ .md-search__input { background-color: rgba(255, 255, 255, 0.15); border: 1px solid rgba(255, 255, 255, 0.2); color: white; } .md-search__input::placeholder { color: rgba(255, 255, 255, 0.7); } /* Navigation tabs */ .md-tabs { background: linear-gradient( 90deg, var(--gradient-mid-3) 0%, var(--gradient-mid-4) 50%, var(--gradient-mid-5) 100% ); } .md-tabs__link { color: rgba(255, 255, 255, 0.9); } .md-tabs__link--active, .md-tabs__link:hover { color: white; opacity: 1; } /* Sidebar navigation */ .md-nav__link--active { color: var(--md-primary-fg-color); font-weight: 500; } .md-nav__link:hover { color: var(--md-accent-fg-color); } /* Code blocks */ .highlight { border-left: 4px solid var(--md-accent-fg-color); background-color: rgba(228, 104, 118, 0.05); } /* Admonitions */ .md-typeset .admonition.note { border-color: var(--md-primary-fg-color); } .md-typeset .admonition.note > .admonition-title { background-color: rgba(227, 75, 117, 0.1); border-color: var(--md-primary-fg-color); } .md-typeset .admonition.tip { border-color: var(--gradient-mid-1); } .md-typeset .admonition.tip > .admonition-title { background-color: rgba(229, 122, 119, 0.1); border-color: var(--gradient-mid-1); } .md-typeset .admonition.warning { border-color: var(--gradient-end); } .md-typeset .admonition.warning > .admonition-title { background-color: rgba(187, 16, 133, 0.1); border-color: var(--gradient-end); } /* Links */ .md-content a { color: var(--md-accent-fg-color); } .md-content a:hover { color: var(--gradient-end); } /* Table of contents */ .md-nav--secondary .md-nav__link--active { color: var(--md-accent-fg-color); border-left: 2px solid var(--md-accent-fg-color); padding-left: calc(1rem - 2px); } /* Footer */ .md-footer { background-color: var(--gradient-end); } /* Buttons and interactive elements */ .md-button { background: linear-gradient(135deg, var(--md-primary-fg-color), var(--md-accent-fg-color)); border: none; color: white; transition: all 0.3s ease; } .md-button:hover { transform: translateY(-2px); box-shadow: 0 4px 12px rgba(187, 16, 133, 0.3); } /* Scrollbar styling */ ::-webkit-scrollbar { width: 8px; } ::-webkit-scrollbar-track { background: rgba(187, 16, 133, 0.1); } ::-webkit-scrollbar-thumb { background: linear-gradient( 180deg, var(--md-primary-fg-color), var(--md-accent-fg-color) ); border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background: linear-gradient( 180deg, var(--md-accent-fg-color), var(--gradient-end) ); } /* Search results highlighting */ .md-search-result__title { color: var(--md-primary-fg-color); } .md-search-result__teaser mark { background-color: rgba(224, 16, 115, 0.2); color: var(--gradient-end); } .md-header__button.md-logo img, .md-header__button.md-logo svg { height: 42px !important; /* Increase from default ~24px */ width: auto !important; max-height: none !important; padding: 0 0 0 16px !important; /* Keep left padding, remove others */ margin: 0 !important; /* Remove any margin */ } /* Also remove padding from the logo button container except left */ .md-header__button.md-logo { padding: 0 0 0 8px !important; /* Keep some left padding on container */ margin: 0 !important; min-width: auto !important; } zarr-python-3.1.5/docs/quick-start.md000066400000000000000000000113611511007055700175410ustar00rootroot00000000000000This section will help you get up and running with the Zarr library in Python to efficiently manage and analyze multi-dimensional arrays. ### Creating an Array To get started, you can create a simple Zarr array: ```python exec="true" session="quickstart" import shutil shutil.rmtree('data', ignore_errors=True) import numpy as np from pprint import pprint import io import warnings warnings.filterwarnings( "ignore", message="Numcodecs codecs are not in the Zarr version 3 specification*", category=UserWarning ) np.random.seed(0) ``` ```python exec="true" session="quickstart" source="above" result="ansi" import zarr import numpy as np # Create a 2D Zarr array z = zarr.create_array( store="data/example-1.zarr", shape=(100, 100), chunks=(10, 10), dtype="f4" ) # Assign data to the array z[:, :] = np.random.random((100, 100)) print(z.info) ``` Here, we created a 2D array of shape `(100, 100)`, chunked into blocks of `(10, 10)`, and filled it with random floating-point data. This array was written to a `LocalStore` in the `data/example-1.zarr` directory. #### Compression and Filters Zarr supports data compression and filters. For example, to use Blosc compression: ```python exec="true" session="quickstart" source="above" result="code" # Create a 2D Zarr array with Blosc compression z = zarr.create_array( store="data/example-2.zarr", shape=(100, 100), chunks=(10, 10), dtype="f4", compressors=zarr.codecs.BloscCodec( cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle ) ) # Assign data to the array z[:, :] = np.random.random((100, 100)) print(z.info) ``` This compresses the data using the Blosc codec with shuffle enabled for better compression. ### Hierarchical Groups Zarr allows you to create hierarchical groups, similar to directories: ```python exec="true" session="quickstart" source="above" result="ansi" # Create nested groups and add arrays root = zarr.group("data/example-3.zarr") foo = root.create_group(name="foo") bar = root.create_array( name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" ) spam = foo.create_array(name="spam", shape=(10,), dtype="i4") # Assign values bar[:, :] = np.random.random((100, 10)) spam[:] = np.arange(10) # print the hierarchy print(root.tree()) ``` This creates a group with two datasets: `foo` and `bar`. #### Batch Hierarchy Creation Zarr provides tools for creating a collection of arrays and groups with a single function call. Suppose we want to copy existing groups and arrays into a new storage backend: ```python exec="true" session="quickstart" source="above" result="html" # Create nested groups and add arrays root = zarr.group("data/example-4.zarr", attributes={'name': 'root'}) foo = root.create_group(name="foo") bar = root.create_array( name="bar", shape=(100, 10), chunks=(10, 10), dtype="f4" ) nodes = {'': root.metadata} | {k: v.metadata for k,v in root.members()} # Report nodes output = io.StringIO() pprint(nodes, stream=output, width=60, depth=3) result = output.getvalue() print(result) # Create new hierarchy from nodes new_nodes = dict(zarr.create_hierarchy(store=zarr.storage.MemoryStore(), nodes=nodes)) new_root = new_nodes[''] assert new_root.attrs == root.attrs ``` Note that [`zarr.create_hierarchy`][] will only initialize arrays and groups -- copying array data must be done in a separate step. ### Persistent Storage Zarr supports persistent storage to disk or cloud-compatible backends. While examples above utilized a [`zarr.storage.LocalStore`][], a number of other storage options are available. Zarr integrates seamlessly with cloud object storage such as Amazon S3 and Google Cloud Storage using external libraries like [s3fs](https://s3fs.readthedocs.io) or [gcsfs](https://gcsfs.readthedocs.io): ```python import s3fs z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10), dtype="f4") z[:, :] = np.random.random((100, 100)) ``` A single-file store can also be created using the [`zarr.storage.ZipStore`][]: ```python exec="true" session="quickstart" source="above" # Store the array in a ZIP file store = zarr.storage.ZipStore("data/example-5.zip", mode="w") z = zarr.create_array( store=store, shape=(100, 100), chunks=(10, 10), dtype="f4" ) # write to the array z[:, :] = np.random.random((100, 100)) # the ZipStore must be explicitly closed store.close() ``` To open an existing array from a ZIP file: ```python exec="true" session="quickstart" source="above" result="code" # Open the ZipStore in read-only mode store = zarr.storage.ZipStore("data/example-5.zip", read_only=True) z = zarr.open_array(store, mode='r') # read the data as a NumPy Array print(z[:]) ``` Read more about Zarr's storage options in the [User Guide](user-guide/index.md). zarr-python-3.1.5/docs/release-notes.md000066400000000000000000001150551511007055700200450ustar00rootroot00000000000000# Release notes # zarr 3.1.5 (2025-11-21) ## Bugfixes - Fix formatting errors in the release notes section of the docs. ([#3594](https://github.com/zarr-developers/zarr-python/issues/3594)) ## 3.1.4 (2025-11-20) ### Features - The `Array` class can now also be parametrized in the same manner as the `AsyncArray` class, allowing Zarr format v2 and v3 `Array`s to be distinguished. New types have been added to `zarr.types` to help with this. ([#3304](https://github.com/zarr-developers/zarr-python/issues/3304)) - Adds `zarr.experimental.cache_store.CacheStore`, a `Store` that implements caching by combining two other `Store` instances. See the [docs page](https://zarr.readthedocs.io/en/latest/user-guide/experimental#cachestore) for more information about this feature. ([#3366](https://github.com/zarr-developers/zarr-python/issues/3366)) - Adds a `zarr.experimental` module for unstable user-facing features. ([#3490](https://github.com/zarr-developers/zarr-python/issues/3490)) - Add a `array.target_shard_size_bytes` to [`zarr.config`][] to allow users to set a maximum number of bytes per-shard when `shards="auto"` in, for example, [`zarr.create_array`][]. ([#3547](https://github.com/zarr-developers/zarr-python/issues/3547)) - Make `async_array` on the [`zarr.Array`][] class public (`_async_array` will remain untouched, but its stability is not guaranteed). ([#3556](https://github.com/zarr-developers/zarr-python/issues/3556)) ### Bugfixes - Fix a bug that prevented `PCodec` from being properly resolved when loading arrays using that compressor. ([#3483](https://github.com/zarr-developers/zarr-python/issues/3483)) - Fixed a bug that prevented Zarr Python from opening Zarr V3 array metadata documents that contained extra keys with permissible values (dicts with a `"must_understand"` key set to `"false"`). ([#3530](https://github.com/zarr-developers/zarr-python/issues/3530)) - Fixed a bug where the `"consolidated_metadata"` key was written to metadata documents even when consolidated metadata was not used, resulting in invalid metadata documents. ([#3535](https://github.com/zarr-developers/zarr-python/issues/3535)) - Improve write performance to large shards by up to 10x. ([#3560](https://github.com/zarr-developers/zarr-python/issues/3560)) ### Improved Documentation - Use mkdocs-material for Zarr-Python documentation ([#3118](https://github.com/zarr-developers/zarr-python/issues/3118)) - Document different values of StoreLike with examples in the user guide. ([#3303](https://github.com/zarr-developers/zarr-python/issues/3303)) - Reorganize the top-level `examples` directory to give each example its own sub-directory. Adds content to the docs for each example. ([#3502](https://github.com/zarr-developers/zarr-python/issues/3502)) - Updated 3.0 Migration Guide to include function signature change to zarr.Array.resize function. ([#3536](https://github.com/zarr-developers/zarr-python/issues/3536)) ### Misc - [#3515](https://github.com/zarr-developers/zarr-python/issues/3515), [#3532](https://github.com/zarr-developers/zarr-python/issues/3532), [#3533](https://github.com/zarr-developers/zarr-python/issues/3533), [#3553](https://github.com/zarr-developers/zarr-python/issues/3553) ## zarr 3.1.3 (2025-09-18) ### Features - Add a command-line interface to migrate v2 Zarr metadata to v3. Corresponding functions are also provided under zarr.metadata. ([#1798](https://github.com/zarr-developers/zarr-python/issues/1798)) - Add obstore implementation of delete_dir. ([#3310](https://github.com/zarr-developers/zarr-python/issues/3310)) - Adds a registry for chunk key encodings for extensibility. This allows users to implement a custom `ChunkKeyEncoding`, which can be registered via `register_chunk_key_encoding` or as an entry point under `zarr.chunk_key_encoding`. ([#3436](https://github.com/zarr-developers/zarr-python/issues/3436)) - Trying to open a group at a path where an array already exists now raises a helpful error. ([#3444](https://github.com/zarr-developers/zarr-python/issues/3444)) ### Bugfixes - Prevents creation of groups (.create_group) or arrays (.create_array) as children of an existing array. ([#2582](https://github.com/zarr-developers/zarr-python/issues/2582)) - Fix a bug preventing `ones_like`, `full_like`, `empty_like`, `zeros_like` and `open_like` functions from accepting an explicit specification of array attributes like shape, dtype, chunks etc. The functions `full_like`, `empty_like`, and `open_like` now also more consistently infer a `fill_value` parameter from the provided array. ([#2992](https://github.com/zarr-developers/zarr-python/issues/2992)) - LocalStore now uses atomic writes, which should prevent some cases of corrupted data. ([#3411](https://github.com/zarr-developers/zarr-python/issues/3411)) - Fix a potential race condition when using `zarr.create_array` with the `data` parameter set to a NumPy array. Previously Zarr was iterating over the newly created array with a granularity that was too low. Now Zarr chooses a granularity that matches the size of the stored objects for that array. ([#3422](https://github.com/zarr-developers/zarr-python/issues/3422)) - Fix ChunkGrid definition (broken in 3.1.2) ([#3425](https://github.com/zarr-developers/zarr-python/issues/3425)) - Ensure syntax like `root['/subgroup']` works equivalently to `root['subgroup']` when using consolidated metadata. ([#3428](https://github.com/zarr-developers/zarr-python/issues/3428)) - Creating a new group with `zarr.group` no longer errors. This fixes a regression introduced in version 3.1.2. ([#3431](https://github.com/zarr-developers/zarr-python/issues/3431)) - Setting `fill_value` to a float like `0.0` when the data type of the array is an integer is a common mistake. This change lets Zarr Python read arrays with this erroneous metadata, although Zarr Python will not create such arrays. ([#3448](https://github.com/zarr-developers/zarr-python/issues/3448)) ### Deprecations and Removals - The `Store.set_partial_writes` method, which was not used by Zarr-Python, has been removed. `store.supports_partial_writes` is now always `False`. ([#2859](https://github.com/zarr-developers/zarr-python/issues/2859)) ### Misc - [#3376](https://github.com/zarr-developers/zarr-python/issues/3376), [#3390](https://github.com/zarr-developers/zarr-python/issues/3390), [#3403](https://github.com/zarr-developers/zarr-python/issues/3403), [#3449](https://github.com/zarr-developers/zarr-python/issues/3449) ## 3.1.2 (2025-08-25) ### Features - Added support for async vectorized and orthogonal indexing. ([#3083](https://github.com/zarr-developers/zarr-python/issues/3083)) - Make config param optional in init_array ([#3391](https://github.com/zarr-developers/zarr-python/issues/3391)) ### Bugfixes - Ensure that -0.0 is not considered equal to 0.0 when checking if all the values in a chunk are equal to an array's fill value. ([#3144](https://github.com/zarr-developers/zarr-python/issues/3144)) - Fix a bug in `create_array` caused by iterating over chunk-aligned regions instead of shard-aligned regions when writing data. Additionally, the behavior of `nchunks_initialized` has been adjusted. This function consistently reports the number of chunks present in stored objects, even when the array uses the sharding codec. ([#3299](https://github.com/zarr-developers/zarr-python/issues/3299)) - Opening an array or group with `mode="r+"` will no longer create new arrays or groups. ([#3307](https://github.com/zarr-developers/zarr-python/issues/3307)) - Added `zarr.errors.ArrayNotFoundError`, which is raised when attempting to open a zarr array that does not exist, and `zarr.errors.NodeNotFoundError`, which is raised when failing to open an array or a group in a context where either an array or a group was expected. ([#3367](https://github.com/zarr-developers/zarr-python/issues/3367)) - Ensure passing `config` is handled properly when `open`ing an existing array. ([#3378](https://github.com/zarr-developers/zarr-python/issues/3378)) - Raise a Zarr-specific error class when a codec can't be found by name when deserializing the given codecs. This avoids hiding this error behind a "not part of a zarr hierarchy" warning. ([#3395](https://github.com/zarr-developers/zarr-python/issues/3395)) ### Misc - [#3098](https://github.com/zarr-developers/zarr-python/issues/3098), [#3288](https://github.com/zarr-developers/zarr-python/issues/3288), [#3318](https://github.com/zarr-developers/zarr-python/issues/3318), [#3368](https://github.com/zarr-developers/zarr-python/issues/3368), [#3371](https://github.com/zarr-developers/zarr-python/issues/3371), [#3372](https://github.com/zarr-developers/zarr-python/issues/3372), [#3374](https://github.com/zarr-developers/zarr-python/issues/3374) ## 3.1.1 (2025-07-28) ### Features - Add lightweight implementations of `.getsize()` and `.getsize_prefix()` for ObjectStore. ([#3227](https://github.com/zarr-developers/zarr-python/issues/3227)) ### Bugfixes - Creating a Zarr format 2 array with the `order` keyword argument no longer raises a warning. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) - Fixed the error message when passing both `config` and `write_empty_chunks` arguments to reflect the current behaviour (`write_empty_chunks` takes precedence). ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) - Creating a Zarr format 3 array with the `order` argument now consistently ignores this argument and raises a warning. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) - When using [`from_array`][zarr.api.asynchronous.from_array] to copy a Zarr format 2 array to a Zarr format 3 array, if the memory order of the input array is `"F"` a warning is raised and the order ignored. This is because Zarr format 3 arrays are always stored in "C" order. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) - The `config` argument to [`zarr.create`][zarr.create] (and functions that create arrays) is now used - previously it had no effect. ([#3112](https://github.com/zarr-developers/zarr-python/issues/3112)) - Ensure that all abstract methods of [`ZDType`][zarr.core.dtype.ZDType] raise a `NotImplementedError` when invoked. ([#3251](https://github.com/zarr-developers/zarr-python/issues/3251)) - Register 'gpu' marker with pytest for downstream StoreTests. ([#3258](https://github.com/zarr-developers/zarr-python/issues/3258)) - Expand the range of types accepted by `parse_data_type` to include strings and Sequences. - Move the functionality of `zarr.core.dtype.parse_data_type` to a new function called `zarr.dtype.parse_dtype`. This change ensures that nomenclature is consistent across the codebase. `zarr.core.dtype.parse_data_type` remains, so this change is not breaking. ([#3264](https://github.com/zarr-developers/zarr-python/issues/3264)) - Fix a regression introduced in 3.1.0 that prevented `inf`, `-inf`, and `nan` values from being stored in `attributes`. ([#3280](https://github.com/zarr-developers/zarr-python/issues/3280)) - Fixes [`Group.nmembers()`][zarr.Group.nmembers] ignoring depth when using consolidated metadata. ([#3287](https://github.com/zarr-developers/zarr-python/issues/3287)) ### Improved Documentation - Expand the data type docs to include a demonstration of the `parse_data_type` function. Expand the docstring for the `parse_data_type` function. ([#3249](https://github.com/zarr-developers/zarr-python/issues/3249)) - Add a section on codecs to the migration guide. ([#3273](https://github.com/zarr-developers/zarr-python/issues/3273)) ### Misc - Remove warnings about vlen-utf8 and vlen-bytes codecs ([#3268](https://github.com/zarr-developers/zarr-python/issues/3268)) ## 3.1.0 (2025-07-14) ### Features - Ensure that invocations of `create_array` use consistent keyword arguments, with consistent defaults. [`zarr.api.synchronous.create_array`][] now takes a `write_data` keyword argument The `Group.create_array` method takes `data` and `write_data` keyword arguments. The functions [`zarr.api.asynchronous.create`][], [`zarr.api.asynchronous.create_array`] and the methods `Group.create_array`, `Group.array`, had the default `fill_value` changed from `0` to the `DEFAULT_FILL_VALUE` value, which instructs Zarr to use the default scalar value associated with the array's data type as the fill value. These are all functions or methods for array creation that mirror, wrap or are wrapped by, another function that already has a default `fill_value` set to `DEFAULT_FILL_VALUE`. This change is necessary to make these functions consistent across the entire codebase, but as this changes default values, new data might have a different fill value than expected after this change. For data types where 0 is meaningful, like integers or floats, the default scalar is 0, so this change should not be noticeable. For data types where 0 is ambiguous, like fixed-length unicode strings, the default fill value might be different after this change. Users who were relying on how Zarr interpreted `0` as a non-numeric scalar value should set their desired fill value explicitly after this change. - Added public API for Buffer ABCs and implementations. Use `zarr.buffer` to access buffer implementations, and `zarr.abc.buffer` for the interface to implement new buffer types. Users previously importing buffer from `zarr.core.buffer` should update their imports to use `zarr.buffer`. As a reminder, all of `zarr.core` is considered a private API that's not covered by zarr-python's versioning policy. ([#2871](https://github.com/zarr-developers/zarr-python/issues/2871)) - Adds zarr-specific data type classes. This change adds a `ZDType` base class for Zarr V2 and Zarr V3 data types. Child classes are defined for each NumPy data type. Each child class defines routines for `JSON` serialization. New data types can be created and registered dynamically. Prior to this change, Zarr Python had two streams for handling data types. For Zarr V2 arrays, we used NumPy data type identifiers. For Zarr V3 arrays, we used a fixed set of string enums. Both of these systems proved hard to extend. This change is largely internal, but it does change the type of the `dtype` and `data_type` fields on the `ArrayV2Metadata` and `ArrayV3Metadata` classes. Previously, `ArrayV2Metadata.dtype` was a NumPy `dtype` object, and `ArrayV3Metadata.data_type` was an internally-defined `enum`. After this change, both `ArrayV2Metadata.dtype` and `ArrayV3Metadata.data_type` are instances of `ZDType`. A NumPy data type can be generated from a `ZDType` via the `ZDType.to_native_dtype()` method. The internally-defined Zarr V3 `enum` class is gone entirely, but the `ZDType.to_json(zarr_format=3)` method can be used to generate either a string, or dictionary that has a string `name` field, that represents the string value previously associated with that `enum`. For more on this new feature, see the [documentation](user-guide/data_types.md) ([#2874](https://github.com/zarr-developers/zarr-python/issues/2874)) - Added `NDBuffer.empty` method for faster ndbuffer initialization. ([#3191](https://github.com/zarr-developers/zarr-python/issues/3191)) - The minimum version of NumPy has increased to 1.26. ([#3226](https://github.com/zarr-developers/zarr-python/issues/3226)) - Add an alternate `from_array_metadata_and_store` constructor to `CodecPipeline`. ([#3233](https://github.com/zarr-developers/zarr-python/issues/3233)) ### Bugfixes - Fixes a variety of issues related to string data types. - Brings the `VariableLengthUTF8` data type Zarr V3 identifier in alignment with Zarr Python 3.0.8 - Disallows creation of 0-length fixed-length data types - Adds a regression test for the `VariableLengthUTF8` data type that checks against version 3.0.8 - Allows users to request the `VariableLengthUTF8` data type with `str`, `"str"`, or `"string"`. ([#3170](https://github.com/zarr-developers/zarr-python/issues/3170)) - Add human readable size for No. bytes stored to `info_complete` ([#3190](https://github.com/zarr-developers/zarr-python/issues/3190)) - Restores the ability to create a Zarr V2 array with a `null` fill value by introducing a new class `DefaultFillValue`, and setting the default value of the `fill_value` parameter in array creation routines to an instance of `DefaultFillValue`. For Zarr V3 arrays, `None` will act as an alias for a `DefaultFillValue` instance, thus preserving compatibility with existing code. ([#3198](https://github.com/zarr-developers/zarr-python/issues/3198)) - Fix the type of `ArrayV2Metadata.codec` to constrain it to `numcodecs.abc.Codec | None`. Previously the type was more permissive, allowing objects that can be parsed into Codecs (e.g., the codec name). The constructor of `ArrayV2Metadata` still allows the permissive input when creating new objects. ([#3232](https://github.com/zarr-developers/zarr-python/issues/3232)) ### Improved Documentation - Add a self-contained example of data type extension to the `examples` directory, and expanded the documentation for data types. ([#3157](https://github.com/zarr-developers/zarr-python/issues/3157)) - Add a description on how to create a RemoteStore of a specific filesystem to the `Remote Store` section in `docs/user-guide/storage.md`. State in the docstring of `FsspecStore.from_url` that the filesystem type is inferred from the URL scheme. It should help a user handling the case when the type of FsspecStore doesn't match the URL scheme. ([#3212](https://github.com/zarr-developers/zarr-python/issues/3212)) ### Deprecations and Removals - Removes default chunk encoding settings (filters, serializer, compressors) from the global configuration object. This removal is justified on the basis that storing chunk encoding settings in the config required a brittle, confusing, and inaccurate categorization of array data types, which was particularly unsuitable after the recent addition of new data types that didn't fit naturally into the pre-existing categories. The default chunk encoding is the same (Zstandard compression, and the required object codecs for variable length data types), but the chunk encoding is now generated by functions that cannot be reconfigured at runtime. Users who relied on setting the default chunk encoding via the global configuration object should instead specify the desired chunk encoding explicitly when creating an array. This change also adds an extra validation step to the creation of Zarr V2 arrays, which ensures that arrays with a `VariableLengthUTF8` or `VariableLengthBytes` data type cannot be created without the correct "object codec". ([#3228](https://github.com/zarr-developers/zarr-python/issues/3228)) - Removes support for passing keyword-only arguments positionally to the following functions and methods: `save_array`, `open`, `group`, `open_group`, `create`, `get_basic_selection`, `set_basic_selection`, `get_orthogonal_selection`, `set_orthogonal_selection`, `get_mask_selection`, `set_mask_selection`, `get_coordinate_selection`, `set_coordinate_selection`, `get_block_selection`, `set_block_selection`, `Group.create_array`, `Group.empty`, `Group.zeroes`, `Group.ones`, `Group.empty_like`, `Group.full`, `Group.zeros_like`, `Group.ones_like`, `Group.full_like`, `Group.array`. Prior to this change, passing a keyword-only argument positionally to one of these functions or methods would raise a deprecation warning. That warning is now gone. Passing keyword-only arguments to these functions and methods positionally is now an error. ## 3.0.10 (2025-07-03) ### Bugfixes - Removed an unnecessary check from `_fsspec._make_async` that would raise an exception when creating a read-only store backed by a local file system with `auto_mkdir` set to `False`. ([#3193](https://github.com/zarr-developers/zarr-python/issues/3193)) - Add missing import for AsyncFileSystemWrapper for _make_async in _fsspec.py ([#3195](https://github.com/zarr-developers/zarr-python/issues/3195)) ## 3.0.9 (2025-06-30) ### Features - Add `zarr.storage.FsspecStore.from_mapper()` so that `zarr.open()` supports stores of type `fsspec.mapping.FSMap`. ([#2774](https://github.com/zarr-developers/zarr-python/issues/2774)) - Implemented `move` for `LocalStore` and `ZipStore`. This allows users to move the store to a different root path. ([#3021](https://github.com/zarr-developers/zarr-python/issues/3021)) - Added `zarr.errors.GroupNotFoundError`, which is raised when attempting to open a group that does not exist. ([#3066](https://github.com/zarr-developers/zarr-python/issues/3066)) - Adds `fill_value` to the list of attributes displayed in the output of the `AsyncArray.info()` method. ([#3081](https://github.com/zarr-developers/zarr-python/issues/3081)) - Use `numpy.zeros` instead of `np.full` for a performance speedup when creating a `zarr.core.buffer.NDBuffer` with `fill_value=0`. ([#3082](https://github.com/zarr-developers/zarr-python/issues/3082)) - Port more stateful testing actions from [Icechunk](https://icechunk.io). ([#3130](https://github.com/zarr-developers/zarr-python/issues/3130)) - Adds a `with_read_only` convenience method to the `Store` abstract base class (raises `NotImplementedError`) and implementations to the `MemoryStore`, `ObjectStore`, `LocalStore`, and `FsspecStore` classes. ([#3138](https://github.com/zarr-developers/zarr-python/issues/3138)) ### Bugfixes - Ignore stale child metadata when reconsolidating metadata. ([#2921](https://github.com/zarr-developers/zarr-python/issues/2921)) - For Zarr format 2, allow fixed-length string arrays to be created without automatically inserting a `Vlen-UT8` codec in the array of filters. Fixed-length string arrays do not need this codec. This change fixes a regression where fixed-length string arrays created with Zarr Python 3 could not be read with Zarr Python 2.18. ([#3100](https://github.com/zarr-developers/zarr-python/issues/3100)) - When creating arrays without explicitly specifying a chunk size using `zarr.create` and other array creation routines, the chunk size will now set automatically instead of defaulting to the data shape. For large arrays this will result in smaller default chunk sizes. To retain previous behaviour, explicitly set the chunk shape to the data shape. This fix matches the existing chunking behaviour of `zarr.save_array` and `zarr.api.asynchronous.AsyncArray.create`. ([#3103](https://github.com/zarr-developers/zarr-python/issues/3103)) - When `zarr.save` has an argument `path=some/path/` and multiple arrays in `args`, the path resulted in `some/path/some/path` due to using the `path` argument twice while building the array path. This is now fixed. ([#3127](https://github.com/zarr-developers/zarr-python/issues/3127)) - Fix `zarr.open` default for argument `mode` when `store` is `read_only` ([#3128](https://github.com/zarr-developers/zarr-python/issues/3128)) - Suppress `FileNotFoundError` when deleting non-existent keys in the `obstore` adapter. When writing empty chunks (i.e. chunks where all values are equal to the array's fill value) to a zarr array, zarr will delete those chunks from the underlying store. For zarr arrays backed by the `obstore` adapter, this will potentially raise a `FileNotFoundError` if the chunk doesn't already exist. Since whether or not a delete of a non-existing object raises an error depends on the behavior of the underlying store, suppressing the error in all cases results in consistent behavior across stores, and is also what `zarr` seems to expect from the store. ([#3140](https://github.com/zarr-developers/zarr-python/issues/3140)) - Trying to open a StorePath/Array with `mode='r'` when the store is not read-only creates a read-only copy of the store. ([#3156](https://github.com/zarr-developers/zarr-python/issues/3156)) ## 3.0.8 (2025-05-19) !!! warning In versions 3.0.0 to 3.0.7 opening arrays or groups with `mode='a'` (the default for many builtin functions) would cause any existing paths in the store to be deleted. This is fixed in 3.0.8, and we recommend all users upgrade to avoid this bug that could cause unintentional data loss. ### Features - Added a `print_debug_info` function for bug reports. ([#2913](https://github.com/zarr-developers/zarr-python/issues/2913)) ### Bugfixes - Fix a bug that prevented the number of initialized chunks being counted properly. ([#2862](https://github.com/zarr-developers/zarr-python/issues/2862)) - Fixed sharding with GPU buffers. ([#2978](https://github.com/zarr-developers/zarr-python/issues/2978)) - Fix structured `dtype` fill value serialization for consolidated metadata ([#2998](https://github.com/zarr-developers/zarr-python/issues/2998)) - It is now possible to specify no compressor when creating a zarr format 2 array. This can be done by passing `compressor=None` to the various array creation routines. The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given. To reproduce the behaviour in previous zarr-python versions when `compressor=None` was passed, pass `compressor='auto'` instead. ([#3039](https://github.com/zarr-developers/zarr-python/issues/3039)) - Fixed the typing of `dimension_names` arguments throughout so that it now accepts iterables that contain `None` alongside `str`. ([#3045](https://github.com/zarr-developers/zarr-python/issues/3045)) - Using various functions to open data with `mode='a'` no longer deletes existing data in the store. ([#3062](https://github.com/zarr-developers/zarr-python/issues/3062)) - Internally use `typesize` constructor parameter for `numcodecs.blosc.Blosc` to improve compression ratios back to the v2-package levels. ([#2962](https://github.com/zarr-developers/zarr-python/issues/2962)) - Specifying the memory order of Zarr format 2 arrays using the `order` keyword argument has been fixed. ([#2950](https://github.com/zarr-developers/zarr-python/issues/2950)) ### Misc - [#2972](https://github.com/zarr-developers/zarr-python/issues/2972), [#3027](https://github.com/zarr-developers/zarr-python/issues/3027), [#3049](https://github.com/zarr-developers/zarr-python/issues/3049) ## 3.0.7 (2025-04-22) ### Features - Add experimental ObjectStore storage class based on obstore. ([#1661](https://github.com/zarr-developers/zarr-python/issues/1661)) - Add `zarr.from_array` using concurrent streaming of source data ([#2622](https://github.com/zarr-developers/zarr-python/issues/2622)) ### Bugfixes - 0-dimensional arrays are now returning a scalar. Therefore, the return type of `__getitem__` changed to NDArrayLikeOrScalar. This change is to make the behavior of 0-dimensional arrays consistent with `numpy` scalars. ([#2718](https://github.com/zarr-developers/zarr-python/issues/2718)) - Fix `fill_value` serialization for `NaN` in `ArrayV2Metadata` and add property-based testing of round-trip serialization ([#2802](https://github.com/zarr-developers/zarr-python/issues/2802)) - Fixes `ConsolidatedMetadata` serialization of `nan`, `inf`, and `-inf` to be consistent with the behavior of `ArrayMetadata`. ([#2996](https://github.com/zarr-developers/zarr-python/issues/2996)) ### Improved Documentation - Updated the 3.0 migration guide to include the removal of "." syntax for getting group members. ([#2991](https://github.com/zarr-developers/zarr-python/issues/2991), [#2997](https://github.com/zarr-developers/zarr-python/issues/2997)) ### Misc - Define a new versioning policy based on Effective Effort Versioning. This replaces the old Semantic Versioning-based policy. ([#2924](https://github.com/zarr-developers/zarr-python/issues/2924), [#2910](https://github.com/zarr-developers/zarr-python/issues/2910)) - Make warning filters in the tests more specific, so warnings emitted by tests added in the future are more likely to be caught instead of ignored. ([#2714](https://github.com/zarr-developers/zarr-python/issues/2714)) - Avoid an unnecessary memory copy when writing Zarr to a local file ([#2944](https://github.com/zarr-developers/zarr-python/issues/2944)) ## 3.0.6 (2025-03-20) ### Bugfixes - Restore functionality of `del z.attrs['key']` to actually delete the key. ([#2908](https://github.com/zarr-developers/zarr-python/issues/2908)) ## 3.0.5 (2025-03-07) ### Bugfixes - Fixed a bug where `StorePath` creation would not apply standard path normalization to the `path` parameter, which led to the creation of arrays and groups with invalid keys. ([#2850](https://github.com/zarr-developers/zarr-python/issues/2850)) - Prevent update_attributes calls from deleting old attributes ([#2870](https://github.com/zarr-developers/zarr-python/issues/2870)) ### Misc - [#2796](https://github.com/zarr-developers/zarr-python/issues/2796) ## 3.0.4 (2025-02-23) ### Features - Adds functions for concurrently creating multiple arrays and groups. ([#2665](https://github.com/zarr-developers/zarr-python/issues/2665)) ### Bugfixes - Fixed a bug where `ArrayV2Metadata` could save `filters` as an empty array. ([#2847](https://github.com/zarr-developers/zarr-python/issues/2847)) - Fix a bug when setting values of a smaller last chunk. ([#2851](https://github.com/zarr-developers/zarr-python/issues/2851)) ### Misc - [#2828](https://github.com/zarr-developers/zarr-python/issues/2828) ## 3.0.3 (2025-02-14) ### Features - Improves performance of FsspecStore.delete_dir for remote filesystems supporting concurrent/batched deletes, e.g., s3fs. ([#2661](https://github.com/zarr-developers/zarr-python/issues/2661)) - Added `zarr.config.enable_gpu` to update Zarr's configuration to use GPUs. ([#2751](https://github.com/zarr-developers/zarr-python/issues/2751)) - Avoid reading chunks during writes where possible. [#757](https://github.com/zarr-developers/zarr-python/issues/757) ([#2784](https://github.com/zarr-developers/zarr-python/issues/2784)) - `LocalStore` learned to `delete_dir`. This makes array and group deletes more efficient. ([#2804](https://github.com/zarr-developers/zarr-python/issues/2804)) - Add `zarr.testing.strategies.array_metadata` to generate ArrayV2Metadata and ArrayV3Metadata instances. ([#2813](https://github.com/zarr-developers/zarr-python/issues/2813)) - Add arbitrary `shards` to Hypothesis strategy for generating arrays. ([#2822](https://github.com/zarr-developers/zarr-python/issues/2822)) ### Bugfixes - Fixed bug with Zarr using device memory, instead of host memory, for storing metadata when using GPUs. ([#2751](https://github.com/zarr-developers/zarr-python/issues/2751)) - The array returned by `zarr.empty` and an empty `zarr.core.buffer.cpu.NDBuffer` will now be filled with the specified fill value, or with zeros if no fill value is provided. This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes. ([#2755](https://github.com/zarr-developers/zarr-python/issues/2755)) - Fix zip-store path checking for stores with directories listed as files. ([#2758](https://github.com/zarr-developers/zarr-python/issues/2758)) - Use removeprefix rather than replace when removing filename prefixes in `FsspecStore.list` ([#2778](https://github.com/zarr-developers/zarr-python/issues/2778)) - Enable automatic removal of `needs release notes` with labeler action ([#2781](https://github.com/zarr-developers/zarr-python/issues/2781)) - Use the proper label config ([#2785](https://github.com/zarr-developers/zarr-python/issues/2785)) - Alters the behavior of `create_array` to ensure that any groups implied by the array's name are created if they do not already exist. Also simplifies the type signature for any function that takes an ArrayConfig-like object. ([#2795](https://github.com/zarr-developers/zarr-python/issues/2795)) - Enitialise empty chunks to the default fill value during writing and add default fill values for datetime, timedelta, structured, and other (void* fixed size) data types ([#2799](https://github.com/zarr-developers/zarr-python/issues/2799)) - Ensure utf8 compliant strings are used to construct numpy arrays in property-based tests ([#2801](https://github.com/zarr-developers/zarr-python/issues/2801)) - Fix pickling for ZipStore ([#2807](https://github.com/zarr-developers/zarr-python/issues/2807)) - Update numcodecs to not overwrite codec configuration ever. Closes [#2800](https://github.com/zarr-developers/zarr-python/issues/2800). ([#2811](https://github.com/zarr-developers/zarr-python/issues/2811)) - Fix fancy indexing (e.g. arr[5, [0, 1]]) with the sharding codec ([#2817](https://github.com/zarr-developers/zarr-python/issues/2817)) ### Improved Documentation - Added new user guide on GPU. ([#2751](https://github.com/zarr-developers/zarr-python/issues/2751)) ## 3.0.2 (2025-01-31) ### Features - Test `getsize()` and `getsize_prefix()` in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Test that a `ValueError` is raised for invalid byte range syntax in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Separate instantiating and opening a store in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Add a test for using Stores as a context managers in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Implemented `LogingStore.open()`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - `LoggingStore` is now a generic class. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Change StoreTest's `test_store_repr`, `test_store_supports_writes`, `test_store_supports_partial_writes`, and `test_store_supports_listing` to to be implemented using `@abstractmethod`, rather raising `NotImplementedError`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Test the error raised for invalid buffer arguments in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Test that data can be written to a store that's not yet open using the store.set method in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Adds a new function `init_array` for initializing an array in storage, and refactors `create_array` to use `init_array`. `create_array` takes two new parameters: `data`, an optional array-like object, and `write_data`, a bool which defaults to `True`. If `data` is given to `create_array`, then the `dtype` and `shape` attributes of `data` are used to define the corresponding attributes of the resulting Zarr array. Additionally, if `data` given and `write_data` is `True`, then the values in `data` will be written to the newly created array. ([#2761](https://github.com/zarr-developers/zarr-python/issues/2761)) ### Bugfixes - Wrap sync fsspec filesystems with `AsyncFileSystemWrapper`. ([#2533](https://github.com/zarr-developers/zarr-python/issues/2533)) - Added backwards compatibility for Zarr format 2 structured arrays. ([#2681](https://github.com/zarr-developers/zarr-python/issues/2681)) - Update equality for `LoggingStore` and `WrapperStore` such that 'other' must also be a `LoggingStore` or `WrapperStore` respectively, rather than only checking the types of the stores they wrap. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Ensure that `ZipStore` is open before getting or setting any values. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Use stdout rather than stderr as the default stream for `LoggingStore`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Match the errors raised by read only stores in `StoreTests`. ([#2693](https://github.com/zarr-developers/zarr-python/issues/2693)) - Fixed `ZipStore` to make sure the correct attributes are saved when instances are pickled. This fixes a previous bug that prevent using `ZipStore` with a `ProcessPoolExecutor`. ([#2762](https://github.com/zarr-developers/zarr-python/issues/2762)) - Updated the optional test dependencies to include `botocore` and `fsspec`. ([#2768](https://github.com/zarr-developers/zarr-python/issues/2768)) - Fixed the fsspec tests to skip if `botocore` is not installed. Previously they would have failed with an import error. ([#2768](https://github.com/zarr-developers/zarr-python/issues/2768)) - Optimize full chunk writes. ([#2782](https://github.com/zarr-developers/zarr-python/issues/2782)) ### Improved Documentation - Changed the machinery for creating changelog entries. Now individual entries should be added as files to the `changes` directory in the `zarr-python` repository, instead of directly to the changelog file. ([#2736](https://github.com/zarr-developers/zarr-python/issues/2736)) ### Other - Created a type alias `ChunkKeyEncodingLike` to model the union of `ChunkKeyEncoding` instances and the dict form of the parameters of those instances. `ChunkKeyEncodingLike` should be used by high-level functions to provide a convenient way for creating `ChunkKeyEncoding` objects. ([#2763](https://github.com/zarr-developers/zarr-python/issues/2763)) ## 3.0.1 (Jan. 17, 2025) * Implement `zarr.from_array` using concurrent streaming ([#2622](https://github.com/zarr-developers/zarr-python/issues/2622)). ### Bug fixes * Fixes `order` argument for Zarr format 2 arrays ([#2679](https://github.com/zarr-developers/zarr-python/issues/2679)). * Fixes a bug that prevented reading Zarr format 2 data with consolidated metadata written using `zarr-python` version 2 ([#2694](https://github.com/zarr-developers/zarr-python/issues/2694)). * Ensure that compressor=None results in no compression when writing Zarr format 2 data ([#2708](https://github.com/zarr-developers/zarr-python/issues/2708)). * Fix for empty consolidated metadata dataset: backwards compatibility with Zarr-Python 2 ([#2695](https://github.com/zarr-developers/zarr-python/issues/2695)). ### Documentation * Add v3.0.0 release announcement banner ([#2677](https://github.com/zarr-developers/zarr-python/issues/2677)). * Quickstart guide alignment with V3 API ([#2697](https://github.com/zarr-developers/zarr-python/issues/2697)). * Fix doctest failures related to numcodecs 0.15 ([#2727](https://github.com/zarr-developers/zarr-python/issues/2727)). ### Other * Removed some unnecessary files from the source distribution to reduce its size. ([#2686](https://github.com/zarr-developers/zarr-python/issues/2686)). * Enable codecov in GitHub actions ([#2682](https://github.com/zarr-developers/zarr-python/issues/2682)). * Speed up hypothesis tests ([#2650](https://github.com/zarr-developers/zarr-python/issues/2650)). * Remove multiple imports for an import name ([#2723](https://github.com/zarr-developers/zarr-python/issues/2723)). ## 3.0.0 (Jan. 9, 2025) 3.0.0 is a new major release of Zarr-Python, with many breaking changes. See the [v3 migration guide](user-guide/v3_migration.md) for a listing of what's changed. Normal release note service will resume with further releases in the 3.0.0 series. Release notes for the zarr-python 2.x and 1.x releases can be found here: https://zarr.readthedocs.io/en/support-v2/release.html zarr-python-3.1.5/docs/user-guide/000077500000000000000000000000001511007055700170175ustar00rootroot00000000000000zarr-python-3.1.5/docs/user-guide/arrays.md000066400000000000000000000477351511007055700206620ustar00rootroot00000000000000# Working with arrays ## Creating an array Zarr has several functions for creating arrays. For example: ```python exec="true" session="arrays" import shutil shutil.rmtree('data', ignore_errors=True) import numpy as np np.random.seed(0) ``` ```python exec="true" session="arrays" source="above" result="ansi" import zarr store = zarr.storage.MemoryStore() z = zarr.create_array(store=store, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') print(z) ``` The code above creates a 2-dimensional array of 32-bit integers with 10000 rows and 10000 columns, divided into chunks where each chunk has 1000 rows and 1000 columns (and so there will be 100 chunks in total). The data is written to a [`zarr.storage.MemoryStore`][] (e.g. an in-memory dict). See [Persistent arrays](#persistent-arrays) for details on storing arrays in other stores, and see [Data types](data_types.md) for an in-depth look at the data types supported by Zarr. See the [creation API documentation](../api/zarr/create.md) for more detailed information about creating arrays. ## Reading and writing data Zarr arrays support a similar interface to [NumPy](https://numpy.org/doc/stable/) arrays for reading and writing data. For example, the entire array can be filled with a scalar value: ```python exec="true" session="arrays" source="above" z[:] = 42 ``` Regions of the array can also be written to, e.g.: ```python exec="true" session="arrays" source="above" import numpy as np z[0, :] = np.arange(10000) z[:, 0] = np.arange(10000) ``` The contents of the array can be retrieved by slicing, which will load the requested region into memory as a NumPy array, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" print(z[0, 0]) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z[-1, -1]) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z[0, :]) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z[:, 0]) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z[:]) ``` Read more about NumPy-style indexing can be found in the [NumPy documentation](https://numpy.org/doc/stable/user/basics.indexing.html). ## Persistent arrays In the examples above, compressed data for each chunk of the array was stored in main memory. Zarr arrays can also be stored on a file system, enabling persistence of data between sessions. To do this, we can change the store argument to point to a filesystem path: ```python exec="true" session="arrays" source="above" z1 = zarr.create_array(store='data/example-1.zarr', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') ``` The array above will store its configuration metadata and all compressed chunk data in a directory called `'data/example-1.zarr'` relative to the current working directory. The [`zarr.create_array`][] function provides a convenient way to create a new persistent array or continue working with an existing array. Note, there is no need to close an array: data are automatically flushed to disk, and files are automatically closed whenever an array is modified. Persistent arrays support the same interface for reading and writing data, e.g.: ```python exec="true" session="arrays" source="above" z1[:] = 42 z1[0, :] = np.arange(10000) z1[:, 0] = np.arange(10000) ``` Check that the data have been written and can be read again: ```python exec="true" session="arrays" source="above" result="ansi" z2 = zarr.open_array('data/example-1.zarr', mode='r') print(np.all(z1[:] == z2[:])) ``` If you are just looking for a fast and convenient way to save NumPy arrays to disk then load back into memory later, the functions [`zarr.save`][] and [`zarr.load`][] may be useful. E.g.: ```python exec="true" session="arrays" source="above" result="ansi" a = np.arange(10) zarr.save('data/example-2.zarr', a) print(zarr.load('data/example-2.zarr')) ``` Please note that there are a number of other options for persistent array storage, see the [Storage Guide](storage.md) for more details. ## Resizing and appending A Zarr array can be resized, which means that any of its dimensions can be increased or decreased in length. For example: ```python exec="true" session="arrays" source="above" result="ansi" z = zarr.create_array(store='data/example-3.zarr', shape=(10000, 10000), dtype='int32',chunks=(1000, 1000)) z[:] = 42 print(f"Original shape: {z.shape}") z.resize((20000, 10000)) print(f"New shape: {z.shape}") ``` Note that when an array is resized, the underlying data are not rearranged in any way. If one or more dimensions are shrunk, any chunks falling outside the new array shape will be deleted from the underlying store. [`zarr.Array.append`][] is provided as a convenience function, which can be used to append data to any axis. E.g.: ```python exec="true" session="arrays" source="above" result="ansi" a = np.arange(10000000, dtype='int32').reshape(10000, 1000) z = zarr.create_array(store='data/example-4.zarr', shape=a.shape, dtype=a.dtype, chunks=(1000, 100)) z[:] = a print(f"Original shape: {z.shape}") z.append(a) print(f"Shape after first append: {z.shape}") z.append(np.vstack([a, a]), axis=1) print(f"Shape after second append: {z.shape}") ``` ## Compressors A number of different compressors can be used with Zarr. Zarr includes Blosc, Zstandard and Gzip compressors. Additional compressors are available through a separate package called [NumCodecs](https://numcodecs.readthedocs.io/) which provides various compressor libraries including LZ4, Zlib, BZ2 and LZMA. Different compressors can be provided via the `compressors` keyword argument accepted by all array creation functions. For example: ```python exec="true" session="arrays" source="above" result="ansi" compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=3, shuffle=zarr.codecs.BloscShuffle.bitshuffle) data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-5.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) z[:] = data print(z.compressors) ``` This array above will use Blosc as the primary compressor, using the Zstandard algorithm (compression level 3) internally within Blosc, and with the bit-shuffle filter applied. When using a compressor, it can be useful to get some diagnostics on the compression ratio. Zarr arrays provide the [`zarr.Array.info`][] property which can be used to print useful diagnostics, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" print(z.info) ``` The [`zarr.Array.info_complete`][] method inspects the underlying store and prints additional diagnostics, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" print(z.info_complete()) ``` !!! note [`zarr.Array.info_complete`][] will inspect the underlying store and may be slow for large arrays. Use [`zarr.Array.info`][] if detailed storage statistics are not needed. If you don't specify a compressor, by default Zarr uses the Zstandard compressor. In addition to Blosc and Zstandard, other compression libraries can also be used. For example, here is an array using Gzip compression, level 1: ```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-6.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=zarr.codecs.GzipCodec(level=1)) z[:] = data print(f"Compressors: {z.compressors}") ``` Here is an example using LZMA from [NumCodecs](https://numcodecs.readthedocs.io/) with a custom filter pipeline including LZMA's built-in delta filter: ```python exec="true" session="arrays" source="above" result="ansi" import lzma from zarr.codecs.numcodecs import LZMA lzma_filters = [dict(id=lzma.FILTER_DELTA, dist=4), dict(id=lzma.FILTER_LZMA2, preset=1)] compressors = LZMA(filters=lzma_filters) data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-7.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), compressors=compressors) print(f"Compressors: {z.compressors}") ``` To disable compression, set `compressors=None` when creating an array, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" z = zarr.create_array( store='data/example-8.zarr', shape=(100000000,), chunks=(1000000,), dtype='int32', compressors=None ) print(f"Compressors: {z.compressors}") ``` ## Filters In some cases, compression can be improved by transforming the data in some way. For example, if nearby values tend to be correlated, then shuffling the bytes within each numerical value or storing the difference between adjacent values may increase compression ratio. Some compressors provide built-in filters that apply transformations to the data prior to compression. For example, the Blosc compressor has built-in implementations of byte- and bit-shuffle filters, and the LZMA compressor has a built-in implementation of a delta filter. However, to provide additional flexibility for implementing and using filters in combination with different compressors, Zarr also provides a mechanism for configuring filters outside of the primary compressor. Here is an example using a delta filter with the Blosc compressor: ```python exec="true" session="arrays" source="above" result="ansi" from zarr.codecs.numcodecs import Delta filters = [Delta(dtype='int32')] compressors = zarr.codecs.BloscCodec(cname='zstd', clevel=1, shuffle=zarr.codecs.BloscShuffle.shuffle) data = np.arange(100000000, dtype='int32').reshape(10000, 10000) z = zarr.create_array(store='data/example-9.zarr', shape=data.shape, dtype=data.dtype, chunks=(1000, 1000), filters=filters, compressors=compressors) print(z.info_complete()) ``` For more information about available filter codecs, see the [Numcodecs](https://numcodecs.readthedocs.io/) documentation. ## Advanced indexing Zarr arrays support several methods for advanced or "fancy" indexing, which enable a subset of data items to be extracted or updated in an array without loading the entire array into memory. Note that although this functionality is similar to some of the advanced indexing capabilities available on NumPy arrays and on h5py datasets, **the Zarr API for advanced indexing is different from both NumPy and h5py**, so please read this section carefully. For a complete description of the indexing API, see the documentation for the [`zarr.Array`][] class. ### Indexing with coordinate arrays Items from a Zarr array can be extracted by providing an integer array of coordinates. E.g.: ```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(10) ** 2 z = zarr.create_array(store='data/example-10.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z[:]) print(z.get_coordinate_selection([2, 5])) ``` Coordinate arrays can also be used to update data, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" z.set_coordinate_selection([2, 5], [-1, -2]) print(z[:]) ``` For multidimensional arrays, coordinates must be provided for each dimension, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-11.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z[:]) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z.get_coordinate_selection(([0, 2], [1, 3]))) ``` ```python exec="true" session="arrays" source="above" result="ansi" z.set_coordinate_selection(([0, 2], [1, 3]), [-1, -2]) print(z[:]) ``` For convenience, coordinate indexing is also available via the `vindex` property, as well as the square bracket operator, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" print(z.vindex[[0, 2], [1, 3]]) z.vindex[[0, 2], [1, 3]] = [-3, -4] ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z[:]) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z[[0, 2], [1, 3]]) ``` When the indexing arrays have different shapes, they are broadcast together. That is, the following two calls are equivalent: ```python exec="true" session="arrays" source="above" result="ansi" print(z[1, [1, 3]]) print(z[[1, 1], [1, 3]]) ``` ### Indexing with a mask array Items can also be extracted by providing a Boolean mask. E.g.: ```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(10) ** 2 z = zarr.create_array(store='data/example-12.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z[:]) ``` ```python exec="true" session="arrays" source="above" result="ansi" sel = np.zeros_like(z, dtype=bool) sel[2] = True sel[5] = True print(z.get_mask_selection(sel)) ``` ```python exec="true" session="arrays" source="above" result="ansi" z.set_mask_selection(sel, [-1, -2]) print(z[:]) ``` Here's a multidimensional example: ```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-13.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z[:]) ``` ```python exec="true" session="arrays" source="above" result="ansi" sel = np.zeros_like(z, dtype=bool) sel[0, 1] = True sel[2, 3] = True print(z.get_mask_selection(sel)) ``` ```python exec="true" session="arrays" source="above" result="ansi" z.set_mask_selection(sel, [-1, -2]) print(z[:]) ``` For convenience, mask indexing is also available via the `vindex` property, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" print(z.vindex[sel]) ``` ```python exec="true" session="arrays" source="above" result="ansi" z.vindex[sel] = [-3, -4] print(z[:]) ``` Mask indexing is conceptually the same as coordinate indexing, and is implemented internally via the same machinery. Both styles of indexing allow selecting arbitrary items from an array, also known as point selection. ### Orthogonal indexing Zarr arrays also support methods for orthogonal indexing, which allows selections to be made along each dimension of an array independently. For example, this allows selecting a subset of rows and/or columns from a 2-dimensional array. E.g.: ```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-14.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z[:]) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z.get_orthogonal_selection(([0, 2], slice(None)))) # select first and third rows ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z.get_orthogonal_selection((slice(None), [1, 3]))) # select second and fourth columns) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z.get_orthogonal_selection(([0, 2], [1, 3]))) # select rows [0, 2] and columns [1, 4] ``` Data can also be modified, e.g.: ```python exec="true" session="arrays" source="above" z.set_orthogonal_selection(([0, 2], [1, 3]), [[-1, -2], [-3, -4]]) ``` For convenience, the orthogonal indexing functionality is also available via the `oindex` property, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-15.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(z.oindex[[0, 2], :]) # select first and third rows ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z.oindex[:, [1, 3]]) # select second and fourth columns ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z.oindex[[0, 2], [1, 3]]) # select rows [0, 2] and columns [1, 4] ``` ```python exec="true" session="arrays" source="above" result="ansi" z.oindex[[0, 2], [1, 3]] = [[-1, -2], [-3, -4]] print(z[:]) ``` Any combination of integer, slice, 1D integer array and/or 1D Boolean array can be used for orthogonal indexing. If the index contains at most one iterable, and otherwise contains only slices and integers, orthogonal indexing is also available directly on the array: ```python exec="true" session="arrays" source="above" result="ansi" data = np.arange(15).reshape(3, 5) z = zarr.create_array(store='data/example-16.zarr', shape=data.shape, dtype=data.dtype) z[:] = data print(np.all(z.oindex[[0, 2], :] == z[[0, 2], :])) ``` ### Block Indexing Zarr also support block indexing, which allows selections of whole chunks based on their logical indices along each dimension of an array. For example, this allows selecting a subset of chunk aligned rows and/or columns from a 2-dimensional array. E.g.: ```python exec="true" session="arrays" source="above" data = np.arange(100).reshape(10, 10) z = zarr.create_array(store='data/example-17.zarr', shape=data.shape, dtype=data.dtype, chunks=(3, 3)) z[:] = data ``` Retrieve items by specifying their block coordinates: ```python exec="true" session="arrays" source="above" result="ansi" print(z.get_block_selection(1)) ``` Equivalent slicing: ```python exec="true" session="arrays" source="above" result="ansi" print(z[3:6]) ``` For convenience, the block selection functionality is also available via the `blocks` property, e.g.: ```python exec="true" session="arrays" source="above" result="ansi" print(z.blocks[1]) ``` Block index arrays may be multidimensional to index multidimensional arrays. For example: ```python exec="true" session="arrays" source="above" result="ansi" print(z.blocks[0, 1:3]) ``` Data can also be modified. Let's start by a simple 2D array: ```python exec="true" session="arrays" source="above" z = zarr.create_array(store='data/example-18.zarr', shape=(6, 6), dtype=int, chunks=(2, 2)) ``` Set data for a selection of items: ```python exec="true" session="arrays" source="above" result="ansi" z.set_block_selection((1, 0), 1) print(z[...]) ``` For convenience, this functionality is also available via the `blocks` property. E.g.: ```python exec="true" session="arrays" source="above" result="ansi" z.blocks[:, 2] = 7 print(z[...]) ``` Any combination of integer and slice can be used for block indexing: ```python exec="true" session="arrays" source="above" result="ansi" print(z.blocks[2, 1:3]) ``` ```python exec="true" session="arrays" source="above" result="ansi" root = zarr.create_group('data/example-19.zarr') foo = root.create_array(name='foo', shape=(1000, 100), chunks=(10, 10), dtype='float32') bar = root.create_array(name='bar', shape=(100,), dtype='int32') foo[:, :] = np.random.random((1000, 100)) bar[:] = np.arange(100) print(root.tree()) ``` ## Sharding Using small chunk shapes in very large arrays can lead to a very large number of chunks. This can become a performance issue for file systems and object storage. With Zarr format 3, a new sharding feature has been added to address this issue. With sharding, multiple chunks can be stored in a single storage object (e.g. a file). Within a shard, chunks are compressed and serialized separately. This allows individual chunks to be read independently. However, when writing data, a full shard must be written in one go for optimal performance and to avoid concurrency issues. That means that shards are the units of writing and chunks are the units of reading. Users need to configure the chunk and shard shapes accordingly. Sharded arrays can be created by providing the `shards` parameter to [`zarr.create_array`][]. ```python exec="true" session="arrays" source="above" result="ansi" a = zarr.create_array('data/example-20.zarr', shape=(10000, 10000), shards=(1000, 1000), chunks=(100, 100), dtype='uint8') a[:] = (np.arange(10000 * 10000) % 256).astype('uint8').reshape(10000, 10000) print(a.info_complete()) ``` In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is used. This means that `10*10` chunks are stored in each shard, and there are `10*10` shards in total. Without the `shards` argument, there would be 10,000 chunks stored as individual files. ## Missing features in 3.0 The following features have not been ported to 3.0 yet. ### Copying and migrating data See the Zarr-Python 2 documentation on [Copying and migrating data](https://zarr.readthedocs.io/en/support-v2/tutorial.html#copying-migrating-data) for more details. zarr-python-3.1.5/docs/user-guide/attributes.md000066400000000000000000000020671511007055700215340ustar00rootroot00000000000000# Working with attributes Zarr arrays and groups support custom key/value attributes, which can be useful for storing application-specific metadata. For example: ```python exec="true" session="arrays" source="above" result="ansi" import zarr store = zarr.storage.MemoryStore() root = zarr.create_group(store=store) root.attrs['foo'] = 'bar' z = root.create_array(name='zzz', shape=(10000, 10000), dtype='int32') z.attrs['baz'] = 42 z.attrs['qux'] = [1, 4, 7, 12] print(sorted(root.attrs)) ``` ```python exec="true" session="arrays" source="above" result="ansi" print('foo' in root.attrs) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(root.attrs['foo']) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(sorted(z.attrs)) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z.attrs['baz']) ``` ```python exec="true" session="arrays" source="above" result="ansi" print(z.attrs['qux']) ``` Internally Zarr uses JSON to store array attributes, so attribute values must be JSON serializable. zarr-python-3.1.5/docs/user-guide/cli.md000066400000000000000000000050671511007055700201200ustar00rootroot00000000000000# Command-line interface Zarr-Python provides a command-line interface that enables: - migration of Zarr v2 metadata to v3 - removal of v2 or v3 metadata To see available commands run the following in a terminal: ```bash zarr --help ``` or to get help on individual commands: ```bash zarr migrate --help zarr remove-metadata --help ``` ## Migrate metadata from v2 to v3 ### Migrate to a separate location To migrate a Zarr array/group's metadata from v2 to v3 run: ```bash zarr migrate v3 path/to/input.zarr path/to/output.zarr ``` This will write new `zarr.json` files to `output.zarr`, leaving `input.zarr` un-touched. Note - this will migrate the entire Zarr hierarchy, so if `input.zarr` contains multiple groups/arrays, new `zarr.json` will be made for all of them. ### Migrate in-place If you'd prefer to migrate the metadata in-place run: ```bash zarr migrate v3 path/to/input.zarr ``` This will write new `zarr.json` files to `input.zarr`, leaving the existing v2 metadata un-touched. To open the array/group using the new metadata use: ```python import zarr zarr_with_v3_metadata = zarr.open('path/to/input.zarr', zarr_format=3) ``` Once you are happy with the conversion, you can run the following to remove the old v2 metadata: ```bash zarr remove-metadata v2 path/to/input.zarr ``` Note there is also a shortcut to migrate and remove v2 metadata in one step: ```bash zarr migrate v3 path/to/input.zarr --remove-v2-metadata ``` ## Remove metadata Remove v2 metadata using: ```bash zarr remove-metadata v2 path/to/input.zarr ``` or v3 with: ```bash zarr remove-metadata v3 path/to/input.zarr ``` By default, this will only allow removal of metadata if a valid alternative exists. For example, you can't remove v2 metadata unless v3 metadata exists at that location. To override this behaviour use `--force`: ```bash zarr remove-metadata v3 path/to/input.zarr --force ``` ## Dry run All commands provide a `--dry-run` option that will log changes that would be made on a real run, without creating or modifying any files. ```bash zarr migrate v3 path/to/input.zarr --dry-run Dry run enabled - no new files will be created or changed. Log of files that would be created on a real run: Saving metadata to path/to/input.zarr/zarr.json ``` ## Verbose You can also add `--verbose` **before** any command, to see a full log of its actions: ```bash zarr --verbose migrate v3 path/to/input.zarr zarr --verbose remove-metadata v2 path/to/input.zarr ``` ## Equivalent functions All features of the command-line interface are also available via functions under `zarr.metadata`.zarr-python-3.1.5/docs/user-guide/config.md000066400000000000000000000034471511007055700206160ustar00rootroot00000000000000# Runtime configuration [`zarr.config`][] is responsible for managing the configuration of zarr and is based on the [donfig](https://github.com/pytroll/donfig) Python library. Configuration values can be set using code like the following: ```python exec="true" session="config" source="above" result="ansi" import zarr print(zarr.config.get('array.order')) ``` ```python exec="true" session="config" source="above" result="ansi" zarr.config.set({'array.order': 'F'}) print(zarr.config.get('array.order')) ``` Alternatively, configuration values can be set using environment variables, e.g. `ZARR_ARRAY__ORDER=F`. The configuration can also be read from a YAML file in standard locations. For more information, see the [donfig documentation](https://donfig.readthedocs.io/en/latest/). Configuration options include the following: - Default Zarr format `default_zarr_version` - Default array order in memory `array.order` - Whether empty chunks are written to storage `array.write_empty_chunks` - Async and threading options, e.g. `async.concurrency` and `threading.max_workers` - Selections of implementations of codecs, codec pipelines and buffers - Enabling GPU support with `zarr.config.enable_gpu()`. See GPU support for more. For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations in the registry and then select them in the config. For example, an implementation of the bytes codec in a class `'custompackage.NewBytesCodec'`, requires the value of `codecs.bytes.name` to be `'custompackage.NewBytesCodec'`. This is the current default configuration: ```python exec="true" session="config" source="above" result="ansi" from pprint import pprint import io output = io.StringIO() zarr.config.pprint(stream=output, width=60) print(output.getvalue()) ``` zarr-python-3.1.5/docs/user-guide/consolidated_metadata.md000066400000000000000000000120451511007055700236530ustar00rootroot00000000000000# Consolidated metadata !!! warning The Consolidated Metadata feature in Zarr-Python is considered experimental for v3 stores. [zarr-specs#309](https://github.com/zarr-developers/zarr-specs/pull/309) has proposed a formal extension to the v3 specification to support consolidated metadata. Zarr-Python implements the [Consolidated Metadata](https://github.com/zarr-developers/zarr-specs/pull/309) for v2 and v3 stores. Consolidated metadata can reduce the time needed to load the metadata for an entire hierarchy, especially when the metadata is being served over a network. Consolidated metadata essentially stores all the metadata for a hierarchy in the metadata of the root Group. ## Usage If consolidated metadata is present in a Zarr Group's metadata then it is used by default. The initial read to open the group will need to communicate with the store (reading from a file for a [`zarr.storage.LocalStore`][], making a network request for a [`zarr.storage.FsspecStore`][]). After that, any subsequent metadata reads get child Group or Array nodes will *not* require reads from the store. In Python, the consolidated metadata is available on the `.consolidated_metadata` attribute of the `GroupMetadata` object. ```python exec="true" session="consolidated_metadata" source="above" result="ansi" import zarr import warnings warnings.filterwarnings("ignore", category=UserWarning) store = zarr.storage.MemoryStore() group = zarr.create_group(store=store) print(group) array = group.create_array(shape=(1,), name='a', dtype='float64') print(array) ``` ```python exec="true" session="consolidated_metadata" source="above" result="ansi" array = group.create_array(shape=(2, 2), name='b', dtype='float64') print(array) ``` ```python exec="true" session="consolidated_metadata" source="above" result="ansi" array = group.create_array(shape=(3, 3, 3), name='c', dtype='float64') print(array) ``` ```python exec="true" session="consolidated_metadata" source="above" result="ansi" result = zarr.consolidate_metadata(store) print(result) ``` If we open that group, the Group's metadata has a `zarr.core.group.ConsolidatedMetadata` that can be used.: ```python exec="true" session="consolidated_metadata" source="above" result="ansi" from pprint import pprint import io consolidated = zarr.open_group(store=store) consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata # Note: pprint can be users without capturing the output regularly output = io.StringIO() pprint(dict(sorted(consolidated_metadata.items())), stream=output, width=60) print(output.getvalue()) ``` Operations on the group to get children automatically use the consolidated metadata.: ```python exec="true" session="consolidated_metadata" source="above" result="ansi" print(consolidated['a']) # no read / HTTP request to the Store is required ``` With nested groups, the consolidated metadata is available on the children, recursively.: ```python exec="true" session="consolidated_metadata" source="above" result="ansi" child = group.create_group('child', attributes={'kind': 'child'}) grandchild = child.create_group('child', attributes={'kind': 'grandchild'}) consolidated = zarr.consolidate_metadata(store) output = io.StringIO() pprint(consolidated['child'].metadata.consolidated_metadata, stream=output, width=60) print(output.getvalue()) ``` !!! info "Added in version 3.1.1" The keys in the consolidated metadata are sorted prior to writing. Keys are sorted in ascending order by path depth, where a path is defined as a sequence of strings joined by `"/"`. For keys with the same path length, lexicographic order is used to break the tie. This behaviour ensures deterministic metadata output for a given group. ## Synchronization and Concurrency Consolidated metadata is intended for read-heavy use cases on slowly changing hierarchies. For hierarchies where new nodes are constantly being added, removed, or modified, consolidated metadata may not be desirable. 1. It will add some overhead to each update operation, since the metadata would need to be re-consolidated to keep it in sync with the store. 2. Readers using consolidated metadata will regularly see a "past" version of the metadata, at the time they read the root node with its consolidated metadata. ## Stores Without Support for Consolidated Metadata Some stores may want to opt out of the consolidated metadata mechanism. This may be for several reasons like: * They want to maintain read-write consistency, which is challenging with consolidated metadata. * They have their own consolidated metadata mechanism. * They offer good enough performance without need for consolidation. This type of store can declare it doesn't want consolidation by implementing `Store.supports_consolidated_metadata` and returning `False`. For stores that don't support consolidation, Zarr will: * Raise an error on `consolidate_metadata` calls, maintaining the store in its unconsolidated state. * Raise an error in `AsyncGroup.open(..., use_consolidated=True)` * Not use consolidated metadata in `AsyncGroup.open(..., use_consolidated=None)` zarr-python-3.1.5/docs/user-guide/data_types.md000066400000000000000000000442771511007055700215140ustar00rootroot00000000000000# Array data types ## Zarr's Data Type Model Zarr is designed for interoperability with NumPy, so if you are familiar with NumPy or any other N-dimensional array library, Zarr's model for array data types should seem familiar. However, Zarr data types have some unique features that are described in this document. Zarr arrays operate under an essential design constraint: unlike NumPy arrays, Zarr arrays are designed to be stored and accessed by other Zarr implementations. This means that, among other things, Zarr data types must be serializable to metadata documents in accordance with the Zarr specifications, which adds some unique aspects to the Zarr data type model. The following sections explain Zarr's data type model in greater detail and demonstrate the Zarr Python APIs for working with Zarr data types. ### Array Data Types Every Zarr array has a data type, which defines the meaning of the array's elements. An array's data type is encoded in the JSON metadata for the array. This means that the data type of an array must be JSON-serializable. In Zarr V2, the data type of an array is stored in the `dtype` field in array metadata. Zarr V3 changed the name of this field to `data_type` and also defined new rules for the values that can be assigned to the `data_type` field. For example, in Zarr V2, the boolean array data type was represented in array metadata as the string `"|b1"`. In Zarr V3, the same type is represented as the string `"bool"`. ### Scalars Zarr also specifies how array elements, i.e., scalars, are encoded in array metadata. This is necessary because Zarr uses a field in array metadata to define a default value for chunks that are not stored. This field, called `fill_value` in both Zarr V2 and Zarr V3 metadata documents, contains a JSON value that can be decoded to a scalar value compatible with the array's data type. For the boolean data type, the scalar encoding is simple—booleans are natively supported by JSON, so Zarr saves booleans as JSON booleans. Other scalars, like floats or raw bytes, have more elaborate encoding schemes, and in some cases, this scheme depends on the Zarr format version. ## Data Types in Zarr Version 2 Version 2 of the Zarr format defined its data types relative to [NumPy's data types](https://numpy.org/doc/2.1/reference/arrays.dtypes.html#data-type-objects-dtype), and added a few non-NumPy data types as well. With one exception ([structured data types](#structured-data-type)), the Zarr V2 JSON identifier for a data type is just the NumPy `str` attribute of that data type: ```python exec="true" session="data_types" source="above" result="ansi" import zarr import numpy as np import json store = {} np_dtype = np.dtype('int64') print(np_dtype.str) ``` ```python exec="true" session="data_types" source="above" result="ansi" z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2) dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"] print(dtype_meta) ``` !!! note The `<` character in the data type metadata encodes the [endianness](https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html), or "byte order," of the data type. As per the NumPy model, in Zarr version 2 each data type has an endianness where applicable. However, Zarr version 3 data types do not store endianness information. There are two special cases to consider: ["structured" data types](#structured-data-type), and ["object"](#object-data-type) data types. ### Structured Data Type NumPy allows the construction of a so-called "structured" data types comprised of ordered collections of named fields, where each field is itself a distinct NumPy data type. See the NumPy documentation [here](https://numpy.org/doc/stable/user/basics.rec.html). Crucially, NumPy does not use a special data type for structured data types—instead, NumPy implements structured data types as an optional feature of the so-called "Void" data type, which models arbitrary fixed-size byte strings. The `str` attribute of a regular NumPy void data type is the same as the `str` of a NumPy structured data type. This means that the `str` attribute does not convey information about the fields contained in a structured data type. For these reasons, Zarr V2 uses a special data type encoding for structured data types. They are stored in JSON as lists of pairs, where the first element is a string, and the second element is a Zarr V2 data type specification. This representation supports recursion. For example: ```python exec="true" session="data_types" source="above" result="ansi" store = {} np_dtype = np.dtype([('field_a', '>i2'), ('field_b', [('subfield_c', '>f4'), ('subfield_d', 'i2')])]) print(np_dtype.str) ``` ```python exec="true" session="data_types" source="above" result="ansi" z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2) dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"] print(dtype_meta) ``` ### Object Data Type The NumPy "object" type is essentially an array of references to arbitrary Python objects. It can model arrays of variable-length UTF-8 strings, arrays of variable-length byte strings, or even arrays of variable-length arrays, each with a distinct data type. This makes the "object" data type expressive, but also complicated to store. Zarr Python cannot persistently store references to arbitrary Python objects. But if each of those Python objects has a consistent type, then we can use a special encoding procedure to store the array. This is how Zarr Python stores variable-length UTF-8 strings, or variable-length byte strings. Although these are separate data types in this library, they are both "object" arrays in NumPy, which means they have the *same* Zarr V2 string representation: `"|O"`. So for Zarr V2 we have to disambiguate different "object" data type arrays on the basis of their encoding procedure, i.e., the codecs declared in the `filters` and `compressor` attributes of array metadata. If an array with data type "object" used the `"vlen-utf8"` codec, then it was interpreted as an array of variable-length strings. If an array with data type "object" used the `"vlen-bytes"` codec, then it was interpreted as an array of variable-length byte strings. This all means that the `dtype` field alone does not fully specify a data type in Zarr V2. The name of the object codec used, if one was used, is also required. Although this fact can be ignored for many simple numeric data types, any comprehensive approach to Zarr V2 data types must either reject the "object" data types or include the "object codec" identifier in the JSON form of the basic data type model. ## Data Types in Zarr Version 3 The NumPy-based Zarr V2 data type representation was effective for simple data types but struggled with more complex data types, like "object" and "structured" data types. To address these limitations, Zarr V3 introduced several key changes to how data types are represented: - Instead of copying NumPy character codecs, Zarr V3 defines an identifier for each data type. The basic data types are identified by strings like `"int8"`, `"int16"`, etc., and data types that require a configuration can be identified by a JSON object. For example, this JSON object declares a datetime data type: ```json { "name": "numpy.datetime64", "configuration": { "unit": "s", "scale_factor": 10 } } ``` - Zarr V3 data types do not have endianness. This is a departure from Zarr V2, where multi-byte data types are defined with endianness information. Instead, Zarr V3 requires that the endianness of encoded array chunks is specified in the `codecs` attribute of array metadata. The Zarr V3 specification leaves the in-memory endianness of decoded array chunks as an implementation detail. For more about data types in Zarr V3, see the [V3 specification](https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html). ## Data Types in Zarr Python The two Zarr formats that Zarr Python supports specify data types in different ways: data types in Zarr version 2 are encoded as NumPy-compatible strings (or lists, in the case of structured data types), while data types in Zarr V3 are encoded as either strings or JSON objects. Zarr V3 data types do not have any associated endianness information, unlike Zarr V2 data types. Zarr Python needs to support both Zarr V2 and V3, which means we need to abstract over these differences. We do this with an abstract Zarr data type class: [ZDType][zarr.dtype.ZDType] which provides Zarr V2 and Zarr V3 compatibility routines for "native" data types. In this context, a "native" data type is a Python class, typically defined in another library, that models an array's data type. For example, [`numpy.dtypes.UInt8DType`][] is a native data type defined in NumPy. Zarr Python wraps the NumPy `uint8` with a [ZDType][zarr.dtype.ZDType] instance called [UInt8][zarr.dtype.UInt8]. As of this writing, the only native data types Zarr Python supports are NumPy data types. We could avoid the "native data type" jargon and just say "NumPy data type," but we do not want to rule out the possibility of using non-NumPy array backends in the future. Each data type supported by Zarr Python is modeled by a [ZDType][zarr.dtype.ZDType] subclass, which provides an API for the following operations: - Encoding and decoding a native data type - Encoding and decoding a data type to and from Zarr V2 and Zarr V3 array metadata - Encoding and decoding a scalar value to and from Zarr V2 and Zarr V3 array metadata - Casting a Python object to a scalar value consistent with the data type ### List of data types The following section lists the data types built in to Zarr Python. With a few exceptions, Zarr Python supports nearly all of the data types in NumPy. If you need a data type that is not listed here, it's possible to create it yourself: see [Adding New Data Types](#adding-new-data-types). #### Boolean - [Boolean][zarr.dtype.Bool] #### Integral - [Signed 8-bit integer][zarr.dtype.Int8] - [Signed 16-bit integer][zarr.dtype.Int16] - [Signed 32-bit integer][zarr.dtype.Int32] - [Signed 64-bit integer][zarr.dtype.Int64] - [Unsigned 8-bit integer][zarr.dtype.UInt8] - [Unsigned 16-bit integer][zarr.dtype.UInt16] - [Unsigned 32-bit integer][zarr.dtype.UInt32] - [Unsigned 64-bit integer][zarr.dtype.UInt64] #### Floating-point - [16-bit floating-point][zarr.dtype.Float16] - [32-bit floating-point][zarr.dtype.Float32] - [64-bit floating-point][zarr.dtype.Float64] - [64-bit complex floating-point][zarr.dtype.Complex64] - [128-bit complex floating-point][zarr.dtype.Complex128] #### String - [Fixed-length UTF-32 string][zarr.dtype.FixedLengthUTF32] - [Variable-length UTF-8 string][zarr.dtype.VariableLengthUTF8] #### Bytes - [Fixed-length null-terminated bytes][zarr.dtype.NullTerminatedBytes] - [Fixed-length raw bytes][zarr.dtype.RawBytes] - [Variable-length bytes][zarr.dtype.VariableLengthBytes] #### Temporal - [DateTime64][zarr.dtype.DateTime64] - [TimeDelta64][zarr.dtype.TimeDelta64] #### Struct-like - [Structured][zarr.dtype.Structured] ### Example Usage This section will demonstrates the basic usage of Zarr data types. Create a `ZDType` from a native data type: ```python exec="true" session="data_types" source="above" from zarr.core.dtype import Int8 import numpy as np int8 = Int8.from_native_dtype(np.dtype('int8')) ``` Convert back to a native data type: ```python exec="true" session="data_types" source="above" native_dtype = int8.to_native_dtype() assert native_dtype == np.dtype('int8') ``` Get the default scalar value for the data type: ```python exec="true" session="data_types" source="above" default_value = int8.default_scalar() assert default_value == np.int8(0) ``` Serialize to JSON for Zarr V2: ```python exec="true" session="data_types" source="above" result="ansi" json_v2 = int8.to_json(zarr_format=2) print(json_v2) {'name': '|i1', 'object_codec_id': None} ``` !!! note The representation returned by `to_json(zarr_format=2)` is more abstract than the literal contents of Zarr V2 array metadata, because the JSON representation used by the `ZDType` classes must be distinct across different data types. As noted [earlier](#object-data-type), Zarr V2 identifies multiple distinct data types with the "object" data type identifier `"|O"`. Extra information is needed to disambiguate these data types from one another. That's the reason for the `object_codec_id` field you see here. And for V3: ```python exec="true" session="data_types" source="above" result="ansi" json_v3 = int8.to_json(zarr_format=3) print(json_v3) ``` Serialize a scalar value to JSON: ```python exec="true" session="data_types" source="above" result="ansi" json_value = int8.to_json_scalar(42, zarr_format=3) print(json_value) ``` Deserialize a scalar value from JSON: ```python exec="true" session="data_types" source="above" scalar_value = int8.from_json_scalar(42, zarr_format=3) assert scalar_value == np.int8(42) ``` ### Adding New Data Types Each Zarr data type is a separate Python class that inherits from [ZDType][zarr.dtype.ZDType]. You can define a custom data type by writing your own subclass of [ZDType][zarr.dtype.ZDType] and adding your data type to the data type registry. To see an executable demonstration of this process, see the [`custom_dtype` example](../user-guide/examples/custom_dtype.md). ### Data Type Resolution Although Zarr Python uses a different data type model from NumPy, you can still define a Zarr array with a NumPy data type object: ```python exec="true" session="data_types" source="above" result="ansi" from zarr import create_array import numpy as np a = create_array({}, shape=(10,), dtype=np.dtype('int')) print(a) ``` Or a string representation of a NumPy data type: ```python exec="true" session="data_types" source="above" result="ansi" a = create_array({}, shape=(10,), dtype=' ``` This example illustrates a general problem Zarr Python has to solve: how can we allow users to specify a data type as a string or a NumPy `dtype` object, and produce the right Zarr data type from that input? We call this process "data type resolution." Zarr Python also performs data type resolution when reading stored arrays, although in this case the input is a JSON value instead of a NumPy data type. For simple data types like `int`, the solution could be extremely simple: just maintain a lookup table that maps a NumPy data type to the Zarr data type equivalent. But not all data types are so simple. Consider this case: ```python exec="true" session="data_types" source="above" from zarr import create_array import warnings import numpy as np warnings.simplefilter("ignore", category=FutureWarning) a = create_array({}, shape=(10,), dtype=[('a', 'f8'), ('b', 'i8')]) print(a.dtype) # this is the NumPy data type ``` ```python exec="true" session="data_types" source="above" print(a.metadata.data_type) # this is the Zarr data type ``` In this example, we created a [NumPy structured data type](https://numpy.org/doc/stable/user/basics.rec.html#structured-datatypes). This data type is a container that can hold any NumPy data type, which makes it recursive. It is not possible to make a lookup table that relates all NumPy structured data types to their Zarr equivalents, as there is a nearly unbounded number of different structured data types. So instead of a static lookup table, Zarr Python relies on a dynamic approach to data type resolution. Zarr Python defines a collection of Zarr data types. This collection, called a "data type registry," is essentially a dictionary where the keys are strings (a canonical name for each data type), and the values are the data type classes themselves. Dynamic data type resolution entails iterating over these data type classes, invoking that class' [from_native_dtype][zarr.dtype.ZDType.from_native_dtype] method, and returning a concrete data type instance if and only if exactly one of those constructor invocations is successful. In plain language, we take some user input, like a NumPy data type, offer it to all the known data type classes, and return an instance of the one data type class that can accept that user input. We want to avoid a situation where the same native data type matches multiple Zarr data types; that is, a NumPy data type should *uniquely* specify a single Zarr data type. But data type resolution is dynamic, so it's not possible to statically guarantee this uniqueness constraint. Therefore, we attempt data type resolution against *every* data type class, and if, for some reason, a native data type matches multiple Zarr data types, we treat this as an error and raise an exception. If you have a NumPy data type and you want to get the corresponding `ZDType` instance, you can use the `parse_dtype` function, which will use the dynamic resolution described above. `parse_dtype` handles a range of input types: - NumPy data types: ```python exec="true" session="data_types" source="above" result="ansi" import numpy as np from zarr.dtype import parse_dtype my_dtype = np.dtype('>M8[10s]') print(parse_dtype(my_dtype, zarr_format=2)) ``` - NumPy data type-compatible strings: ```python exec="true" session="data_types" source="above" result="ansi" dtype_str = '>M8[10s]' print(parse_dtype(dtype_str, zarr_format=2)) ``` - `ZDType` instances: ```python exec="true" session="data_types" source="above" result="ansi" from zarr.dtype import DateTime64 zdt = DateTime64(endianness='big', scale_factor=10, unit='s') print(parse_dtype(zdt, zarr_format=2)) # Use a ZDType (this is a no-op) ``` - Python dictionaries (requires `zarr_format=3`). These dictionaries must be consistent with the `JSON` form of the data type: ```python exec="true" session="data_types" source="above" result="ansi" dt_dict = {"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}} print(parse_dtype(dt_dict, zarr_format=3)) ``` ```python exec="true" session="data_types" source="above" result="ansi" print(parse_dtype(dt_dict, zarr_format=3).to_json(zarr_format=3)) ``` zarr-python-3.1.5/docs/user-guide/examples/000077500000000000000000000000001511007055700206355ustar00rootroot00000000000000zarr-python-3.1.5/docs/user-guide/examples/custom_dtype.md000066400000000000000000000001671511007055700237020ustar00rootroot00000000000000--8<-- "examples/custom_dtype/README.md" ## Source Code ```python --8<-- "examples/custom_dtype/custom_dtype.py" ``` zarr-python-3.1.5/docs/user-guide/experimental.md000066400000000000000000000220011511007055700220310ustar00rootroot00000000000000# Experimental features This section contains documentation for experimental Zarr Python features. The features described here are exciting and potentially useful, but also volatile -- we might change them at any time. Take this into account if you consider depending on these features. ## `CacheStore` Zarr Python 3.1.4 adds `zarr.experimental.cache_store.CacheStore` provides a dual-store caching implementation that can be wrapped around any Zarr store to improve performance for repeated data access. This is particularly useful when working with remote stores (e.g., S3, HTTP) where network latency can significantly impact data access speed. The CacheStore implements a cache that uses a separate Store instance as the cache backend, providing persistent caching capabilities with time-based expiration, size-based eviction, and flexible cache storage options. It automatically evicts the least recently used items when the cache reaches its maximum size. Because the `CacheStore` uses an ordinary Zarr `Store` object as the caching layer, you can reuse the data stored in the cache later. > **Note:** The CacheStore is a wrapper store that maintains compatibility with the full > `zarr.abc.store.Store` API while adding transparent caching functionality. ## Basic Usage Creating a CacheStore requires both a source store and a cache store. The cache store can be any Store implementation, providing flexibility in cache persistence: ```python exec="true" session="experimental" source="above" result="ansi" import zarr from zarr.storage import LocalStore import numpy as np from tempfile import mkdtemp from zarr.experimental.cache_store import CacheStore # Create a local store and a separate cache store local_store_path = mkdtemp(suffix='.zarr') source_store = LocalStore(local_store_path) cache_store = zarr.storage.MemoryStore() # In-memory cache cached_store = CacheStore( store=source_store, cache_store=cache_store, max_size=256*1024*1024 # 256MB cache ) # Create an array using the cached store zarr_array = zarr.zeros((100, 100), chunks=(10, 10), dtype='f8', store=cached_store, mode='w') # Write some data to force chunk creation zarr_array[:] = np.random.random((100, 100)) ``` The dual-store architecture allows you to use different store types for source and cache, such as a remote store for source data and a local store for persistent caching. ## Performance Benefits The CacheStore provides significant performance improvements for repeated data access: ```python exec="true" session="experimental" source="above" result="ansi" import time # Benchmark reading with cache start = time.time() for _ in range(100): _ = zarr_array[:] elapsed_cache = time.time() - start # Compare with direct store access (without cache) zarr_array_nocache = zarr.open(local_store_path, mode='r') start = time.time() for _ in range(100): _ = zarr_array_nocache[:] elapsed_nocache = time.time() - start # Cache provides speedup for repeated access speedup = elapsed_nocache / elapsed_cache ``` Cache effectiveness is particularly pronounced with repeated access to the same data chunks. ## Cache Configuration The CacheStore can be configured with several parameters: **max_size**: Controls the maximum size of cached data in bytes ```python exec="true" session="experimental" source="above" result="ansi" # 256MB cache with size limit cache = CacheStore( store=source_store, cache_store=cache_store, max_size=256*1024*1024 ) # Unlimited cache size (use with caution) cache = CacheStore( store=source_store, cache_store=cache_store, max_size=None ) ``` **max_age_seconds**: Controls time-based cache expiration ```python exec="true" session="experimental" source="above" result="ansi" # Cache expires after 1 hour cache = CacheStore( store=source_store, cache_store=cache_store, max_age_seconds=3600 ) # Cache never expires cache = CacheStore( store=source_store, cache_store=cache_store, max_age_seconds="infinity" ) ``` **cache_set_data**: Controls whether written data is cached ```python exec="true" session="experimental" source="above" result="ansi" # Cache data when writing (default) cache = CacheStore( store=source_store, cache_store=cache_store, cache_set_data=True ) # Don't cache written data (read-only cache) cache = CacheStore( store=source_store, cache_store=cache_store, cache_set_data=False ) ``` ## Cache Statistics The CacheStore provides statistics to monitor cache performance and state: ```python exec="true" session="experimental" source="above" result="ansi" # Access some data to generate cache activity data = zarr_array[0:50, 0:50] # First access - cache miss data = zarr_array[0:50, 0:50] # Second access - cache hit # Get comprehensive cache information info = cached_store.cache_info() print(info['cache_store_type']) # e.g., 'MemoryStore' print(info['max_age_seconds']) print(info['max_size']) print(info['current_size']) print(info['tracked_keys']) print(info['cached_keys']) print(info['cache_set_data']) ``` The `cache_info()` method returns a dictionary with detailed information about the cache state. ## Cache Management The CacheStore provides methods for manual cache management: ```python exec="true" session="experimental" source="above" result="ansi" # Clear all cached data and tracking information import asyncio asyncio.run(cached_store.clear_cache()) # Check cache info after clearing info = cached_store.cache_info() assert info['tracked_keys'] == 0 assert info['current_size'] == 0 ``` The `clear_cache()` method is an async method that clears both the cache store (if it supports the `clear` method) and all internal tracking data. ## Best Practices 1. **Choose appropriate cache store**: Use MemoryStore for fast temporary caching or LocalStore for persistent caching 2. **Size the cache appropriately**: Set `max_size` based on available storage and expected data access patterns 3. **Use with remote stores**: The cache provides the most benefit when wrapping slow remote stores 4. **Monitor cache statistics**: Use `cache_info()` to tune cache size and access patterns 5. **Consider data locality**: Group related data accesses together to improve cache efficiency 6. **Set appropriate expiration**: Use `max_age_seconds` for time-sensitive data or "infinity" for static data ## Working with Different Store Types The CacheStore can wrap any store that implements the `zarr.abc.store.Store` interface and use any store type for the cache backend: ### Local Store with Memory Cache ```python exec="true" session="experimental-memory-cache" source="above" result="ansi" from zarr.storage import LocalStore, MemoryStore from zarr.experimental.cache_store import CacheStore from tempfile import mkdtemp local_store_path = mkdtemp(suffix='.zarr') source_store = LocalStore(local_store_path) cache_store = MemoryStore() cached_store = CacheStore( store=source_store, cache_store=cache_store, max_size=128*1024*1024 ) ``` ### Memory Store with Persistent Cache ```python exec="true" session="experimental-local-cache" source="above" result="ansi" from tempfile import mkdtemp from zarr.storage import MemoryStore, LocalStore from zarr.experimental.cache_store import CacheStore memory_store = MemoryStore() local_store_path = mkdtemp(suffix='.zarr') persistent_cache = LocalStore(local_store_path) cached_store = CacheStore( store=memory_store, cache_store=persistent_cache, max_size=256*1024*1024 ) ``` The dual-store architecture provides flexibility in choosing the best combination of source and cache stores for your specific use case. ## Examples from Real Usage Here's a complete example demonstrating cache effectiveness: ```python exec="true" session="experimental-final" source="above" result="ansi" import numpy as np import time from tempfile import mkdtemp import zarr import zarr.storage from zarr.experimental.cache_store import CacheStore # Create test data with dual-store cache local_store_path = mkdtemp(suffix='.zarr') source_store = zarr.storage.LocalStore(local_store_path) cache_store = zarr.storage.MemoryStore() cached_store = CacheStore( store=source_store, cache_store=cache_store, max_size=256*1024*1024 ) zarr_array = zarr.zeros((100, 100), chunks=(10, 10), dtype='f8', store=cached_store, mode='w') zarr_array[:] = np.random.random((100, 100)) # Demonstrate cache effectiveness with repeated access start = time.time() data = zarr_array[20:30, 20:30] # First access (cache miss) first_access = time.time() - start start = time.time() data = zarr_array[20:30, 20:30] # Second access (cache hit) second_access = time.time() - start # Check cache statistics info = cached_store.cache_info() assert info['cached_keys'] > 0 # Should have cached keys assert info['current_size'] > 0 # Should have cached data print(f"Cache contains {info['cached_keys']} keys with {info['current_size']} bytes") ``` This example shows how the CacheStore can significantly reduce access times for repeated data reads, particularly important when working with remote data sources. The dual-store architecture allows for flexible cache persistence and management. zarr-python-3.1.5/docs/user-guide/extending.md000066400000000000000000000103641511007055700213320ustar00rootroot00000000000000# Extending Zarr Zarr-Python 3 was designed to be extensible. This means that you can extend the library by writing custom classes and plugins. Currently, Zarr can be extended in the following ways: ## Custom codecs !!! note This section explains how custom codecs can be created for Zarr format 3 arrays. For Zarr format 2, codecs should subclass the [numcodecs.abc.Codec](https://numcodecs.readthedocs.io/en/stable/abc.html#numcodecs.abc.Codec) base class and register through [numcodecs.registry.register_codec](https://numcodecs.readthedocs.io/en/stable/registry.html#numcodecs.registry.register_codec). There are three types of codecs in Zarr: - array-to-array - array-to-bytes - bytes-to-bytes Array-to-array codecs are used to transform the array data before serializing to bytes. Examples include delta encoding or scaling codecs. Array-to-bytes codecs are used for serializing the array data to bytes. In Zarr, the main codec to use for numeric arrays is the [`zarr.codecs.BytesCodec`][]. Bytes-to-bytes codecs transform the serialized bytestreams of the array data. Examples include compression codecs, such as [`zarr.codecs.GzipCodec`][], [`zarr.codecs.BloscCodec`][] or [`zarr.codecs.ZstdCodec`][], and codecs that add a checksum to the bytestream, such as [`zarr.codecs.Crc32cCodec`][]. Custom codecs for Zarr are implemented by subclassing the relevant base class, see [`zarr.abc.codec.ArrayArrayCodec`][], [`zarr.abc.codec.ArrayBytesCodec`][] and [`zarr.abc.codec.BytesBytesCodec`][]. Most custom codecs should implemented the `_encode_single` and `_decode_single` methods. These methods operate on single chunks of the array data. Alternatively, custom codecs can implement the `encode` and `decode` methods, which operate on batches of chunks, in case the codec is intended to implement its own batch processing. Custom codecs should also implement the following methods: - `compute_encoded_size`, which returns the byte size of the encoded data given the byte size of the original data. It should raise `NotImplementedError` for codecs with variable-sized outputs, such as compression codecs. - `validate` (optional), which can be used to check that the codec metadata is compatible with the array metadata. It should raise errors if not. - `resolve_metadata` (optional), which is important for codecs that change the shape, dtype or fill value of a chunk. - `evolve_from_array_spec` (optional), which can be useful for automatically filling in codec configuration metadata from the array metadata. To use custom codecs in Zarr, they need to be registered using the [entrypoint mechanism](https://packaging.python.org/en/latest/specifications/entry-points/). Commonly, entrypoints are declared in the `pyproject.toml` of your package under the `[project.entry-points."zarr.codecs"]` section. Zarr will automatically discover and load all codecs registered with the entrypoint mechanism from imported modules. ```toml [project.entry-points."zarr.codecs"] "custompackage.fancy_codec" = "custompackage:FancyCodec" ``` New codecs need to have their own unique identifier. To avoid naming collisions, it is strongly recommended to prefix the codec identifier with a unique name. For example, the codecs from `numcodecs` are prefixed with `numcodecs.`, e.g. `numcodecs.delta`. !!! note Note that the extension mechanism for the Zarr format 3 is still under development. Requirements for custom codecs including the choice of codec identifiers might change in the future. It is also possible to register codecs as replacements for existing codecs. This might be useful for providing specialized implementations, such as GPU-based codecs. In case of multiple codecs, the [`zarr.config`][] mechanism can be used to select the preferred implementation. ## Custom stores Coming soon. ## Custom array buffers Zarr-python provides control over where and how arrays stored in memory through [`zarr.abc.buffer.Buffer`][]. Currently both CPU (the default) and GPU implementations are provided (see [Using GPUs with Zarr](gpu.md) for more information). You can implement your own buffer classes by implementing the interface defined in [`zarr.abc.buffer.BufferPrototype`][]. ## Other extensions In the future, Zarr will support writing custom custom data types and chunk grids. zarr-python-3.1.5/docs/user-guide/gpu.md000066400000000000000000000017431511007055700201410ustar00rootroot00000000000000# Using GPUs with Zarr Zarr can use GPUs to accelerate your workload by running `zarr.Config.enable_gpu`. !!! note `zarr-python` currently supports reading the ndarray data into device (GPU) memory as the final stage of the codec pipeline. Data will still be read into or copied to host (CPU) memory for encoding and decoding. In the future, codecs will be available compressing and decompressing data on the GPU, avoiding the need to move data between the host and device for compression and decompression. ## Reading data into device memory [`zarr.config`][] configures Zarr to use GPU memory for the data buffers used internally by Zarr via `enable_gpu()`. ```python import zarr import cupy as cp zarr.config.enable_gpu() store = zarr.storage.MemoryStore() z = zarr.create_array( store=store, shape=(100, 100), chunks=(10, 10), dtype="float32", ) type(z[:10, :10]) # cupy.ndarray ``` Note that the output type is a `cupy.ndarray` rather than a NumPy array. zarr-python-3.1.5/docs/user-guide/groups.md000066400000000000000000000112371511007055700206640ustar00rootroot00000000000000# Working with groups Zarr supports hierarchical organization of arrays via groups. As with arrays, groups can be stored in memory, on disk, or via other storage systems that support a similar interface. To create a group, use the [`zarr.group`][] function: ```python exec="true" session="groups" source="above" result="ansi" import zarr store = zarr.storage.MemoryStore() root = zarr.create_group(store=store) print(root) ``` Groups have a similar API to the Group class from [h5py](https://www.h5py.org/). For example, groups can contain other groups: ```python exec="true" session="groups" source="above" foo = root.create_group('foo') bar = foo.create_group('bar') ``` Groups can also contain arrays, e.g.: ```python exec="true" session="groups" source="above" result="ansi" z1 = bar.create_array(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') print(z1) ``` Members of a group can be accessed via the suffix notation, e.g.: ```python exec="true" session="groups" source="above" result="ansi" print(root['foo']) ``` The '/' character can be used to access multiple levels of the hierarchy in one call, e.g.: ```python exec="true" session="groups" source="above" result="ansi" print(root['foo/bar']) ``` ```python exec="true" session="groups" source="above" result="ansi" print(root['foo/bar/baz']) ``` The [`zarr.Group.tree`][] method can be used to print a tree representation of the hierarchy, e.g.: ```python exec="true" session="groups" source="above" result="ansi" print(root.tree()) ``` The [`zarr.open_group`][] function provides a convenient way to create or re-open a group stored in a directory on the file-system, with sub-groups stored in sub-directories, e.g.: ```python exec="true" session="groups" source="above" result="ansi" root = zarr.open_group('data/group.zarr', mode='w') print(root) ``` ```python exec="true" session="groups" source="above" result="ansi" z = root.create_array(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') print(z) ``` For more information on groups see the [`zarr.Group` API docs](../api/zarr/group.md). ## Batch Group Creation You can also create multiple groups concurrently with a single function call. [`zarr.create_hierarchy`][] takes a [`zarr Storage instance`](../api/zarr/storage.md) instance and a dict of `key : metadata` pairs, parses that dict, and writes metadata documents to storage: ```python exec="true" session="groups" source="above" result="ansi" from zarr import create_hierarchy from zarr.core.group import GroupMetadata from zarr.storage import LocalStore from pprint import pprint import io node_spec = {'a/b/c': GroupMetadata()} nodes_created = dict(create_hierarchy(store=LocalStore(root='data'), nodes=node_spec)) # Report nodes (pprint is used for cleaner rendering in the docs) output = io.StringIO() pprint(nodes_created, stream=output, width=60) print(output.getvalue()) ``` Note that we only specified a single group named `a/b/c`, but 4 groups were created. These additional groups were created to ensure that the desired node `a/b/c` is connected to the root group `''` by a sequence of intermediate groups. [`zarr.create_hierarchy`][] normalizes the `nodes` keyword argument to ensure that the resulting hierarchy is complete, i.e. all groups or arrays are connected to the root of the hierarchy via intermediate groups. Because [`zarr.create_hierarchy`][] concurrently creates metadata documents, it's more efficient than repeated calls to [`create_group`][zarr.create_group] or [`create_array`][zarr.create_array], provided you can statically define the metadata for the groups and arrays you want to create. ## Array and group diagnostics Diagnostic information about arrays and groups is available via the `info` property. E.g.: ```python exec="true" session="groups" source="above" result="ansi" store = zarr.storage.MemoryStore() root = zarr.group(store=store) foo = root.create_group('foo') bar = foo.create_array(name='bar', shape=1000000, chunks=100000, dtype='int64') bar[:] = 42 baz = foo.create_array(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='float32') baz[:] = 4.2 print(root.info) ``` ```python exec="true" session="groups" source="above" result="ansi" print(foo.info) ``` ```python exec="true" session="groups" source="above" result="ansi" print(bar.info_complete()) ``` ```python exec="true" session="groups" source="above" result="ansi" print(baz.info) ``` Groups also have the [`zarr.Group.tree`][] method, e.g.: ```python exec="true" session="groups" source="above" result="ansi" print(root.tree()) ``` !!! note [`zarr.Group.tree`][] requires the optional [rich](https://rich.readthedocs.io/en/stable/) dependency. It can be installed with the `[tree]` extra.zarr-python-3.1.5/docs/user-guide/index.md000066400000000000000000000027131511007055700204530ustar00rootroot00000000000000# User Guide Welcome to the user guide, where you can learn more about using Zarr-Python! ## Getting Started New to Zarr-Python? Start here: - **[Installation](installation.md)** - Install Zarr-Python - **[Quick-start](../quick-start.md)** - Quick overview of core functionality ## Core Concepts Learn the essential building blocks: - **[Arrays](arrays.md)** - Learn the fundamentals of working with arrays - **[Groups](groups.md)** - Organize your data with groups - **[Attributes](attributes.md)** - Configure metadata to your data structures - **[Storage](storage.md)** - Learn how data is stored and accessed ## Configuration & Setup Customize your experience: - **[Runtime Configuration](config.md)** - Configure Zarr-Python for your needs - **[V3 Migration](v3_migration.md)** - Upgrading from version 2 to version 3 ## Advanced Topics Take your skills to the next level: - **[Data Types](data_types.md)** - Learn about supported and extensible data types - **[Performance](performance.md)** - Optimize for speed and efficiency - **[GPU](gpu.md)** - Leverage GPU acceleration - **[Extending](extending.md)** - Extend functionality with custom code - **[Consolidated Metadata](consolidated_metadata.md)** - Advanced metadata management ## Need Help? - Browse the [API Reference](../api/zarr/index.md) for detailed function documentation - Report issues on [GitHub](https://github.com/zarr-developers/zarr-python/issues?q=sort%3Aupdated-desc+is%3Aissue+is%3Aopen) zarr-python-3.1.5/docs/user-guide/installation.md000066400000000000000000000040741511007055700220470ustar00rootroot00000000000000# Installation ## Required dependencies Required dependencies include: - [Python](https://docs.python.org/3/) (3.11 or later) - [packaging](https://packaging.pypa.io) (22.0 or later) - [numpy](https://numpy.org) (1.26 or later) - [numcodecs](https://numcodecs.readthedocs.io) (0.14 or later) - [google-crc32c](https://github.com/googleapis/python-crc32c) (1.5 or later) - [typing_extensions](https://typing-extensions.readthedocs.io) (4.9 or later) - [donfig](https://donfig.readthedocs.io) (0.8 or later) ## pip Zarr is available on [PyPI](https://pypi.org/project/zarr/). Install it using `pip`: ```console pip install zarr ``` There are a number of optional dependency groups you can install for extra functionality. These can be installed using `pip install "zarr[]"`, e.g. `pip install "zarr[gpu]"` - `gpu`: support for GPUs - `remote`: support for reading/writing to remote data stores Additional optional dependencies include `rich`, `universal_pathlib`. These must be installed separately. ## conda Zarr is also published to [conda-forge](https://conda-forge.org). Install it using `conda`: ```console conda install -c conda-forge zarr ``` Conda does not support optional dependencies, so you will have to manually install any packages needed to enable extra functionality. # Nightly wheels Development wheels are built nightly and published to the [scientific-python-nightly-wheels](https://anaconda.org/scientific-python-nightly-wheels) index. To install the latest nightly build: ```console pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple zarr ``` Note that nightly wheels may be unstable and are intended for testing purposes. ## Dependency support Zarr has endorsed [Scientific-Python SPEC 0](https://scientific-python.org/specs/spec-0000/) and now follows the version support window as outlined below: - Python: 36 months after initial release - Core package dependencies (e.g. NumPy): 24 months after initial release ## Development To install the latest development version of Zarr, see the contributing guide. zarr-python-3.1.5/docs/user-guide/performance.md000066400000000000000000000310001511007055700216340ustar00rootroot00000000000000# Optimizing performance ## Chunk optimizations ### Chunk size and shape In general, chunks of at least 1 megabyte (1M) uncompressed size seem to provide better performance, at least when using the Blosc compression library. The optimal chunk shape will depend on how you want to access the data. E.g., for a 2-dimensional array, if you only ever take slices along the first dimension, then chunk across the second dimension. If you know you want to chunk across an entire dimension you can use the full size of that dimension within the `chunks` argument, e.g.: ```python exec="true" session="performance" source="above" result="ansi" import zarr z1 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(100, 10000), dtype='int32') print(z1.chunks) ``` Alternatively, if you only ever take slices along the second dimension, then chunk across the first dimension, e.g.: ```python exec="true" session="performance" source="above" result="ansi" z2 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 100), dtype='int32') print(z2.chunks) ``` If you require reasonable performance for both access patterns then you need to find a compromise, e.g.: ```python exec="true" session="performance" source="above" result="ansi" z3 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32') print(z3.chunks) ``` If you are feeling lazy, you can let Zarr guess a chunk shape for your data by providing `chunks='auto'`, although please note that the algorithm for guessing a chunk shape is based on simple heuristics and may be far from optimal. E.g.: ```python exec="true" session="performance" source="above" result="ansi" z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32') print(z4.chunks) ``` If you know you are always going to be loading the entire array into memory, you can turn off chunks by providing `chunks` equal to `shape`, in which case there will be one single chunk for the array: ```python exec="true" session="performance" source="above" result="ansi" z5 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 10000), dtype='int32') print(z5.chunks) ``` ### Sharding If you have large arrays but need small chunks to efficiently access the data, you can use sharding. Sharding provides a mechanism to store multiple chunks in a single storage object or file. This can be useful because traditional file systems and object storage systems may have performance issues storing and accessing many files. Additionally, small files can be inefficient to store if they are smaller than the block size of the file system. Picking a good combination of chunk shape and shard shape is important for performance. The chunk shape determines what unit of your data can be read independently, while the shard shape determines what unit of your data can be written efficiently. For an example, consider you have a 100 GB array and need to read small chunks of 1 MB. Without sharding, each chunk would be one file resulting in 100,000 files. That can already cause performance issues on some file systems. With sharding, you could use a shard size of 1 GB. This would result in 1000 chunks per file and 100 files in total, which seems manageable for most storage systems. You would still be able to read each 1 MB chunk independently, but you would need to write your data in 1 GB increments. To use sharding, you need to specify the `shards` parameter when creating the array. ```python exec="true" session="performance" source="above" result="ansi" z6 = zarr.create_array(store={}, shape=(10000, 10000, 1000), shards=(1000, 1000, 1000), chunks=(100, 100, 100), dtype='uint8') print(z6.info) ``` `shards` can be `"auto"` as well, in which case the `array.target_shard_size_bytes` setting can be used to control the size of shards (i.e., the size of the shard will be as close to without being bigger than `target_shard_size_bytes`); otherwise, a default is used. ### Chunk memory layout The order of bytes **within each chunk** of an array can be changed via the `order` config option, to use either C or Fortran layout. For multi-dimensional arrays, these two layouts may provide different compression ratios, depending on the correlation structure within the data. E.g.: ```python exec="true" session="performance" source="above" result="ansi" import numpy as np a = np.arange(100000000, dtype='int32').reshape(10000, 10000).T c = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype, config={'order': 'C'}) c[:] = a print(c.info_complete()) ``` ```python exec="true" session="performance" source="above" result="ansi" with zarr.config.set({'array.order': 'F'}): f = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype) f[:] = a print(f.info_complete()) ``` In the above example, Fortran order gives a better compression ratio. This is an artificial example but illustrates the general point that changing the order of bytes within chunks of an array may improve the compression ratio, depending on the structure of the data, the compression algorithm used, and which compression filters (e.g., byte-shuffle) have been applied. ### Empty chunks It is possible to configure how Zarr handles the storage of chunks that are "empty" (i.e., every element in the chunk is equal to the array's fill value). When creating an array with `write_empty_chunks=False`, Zarr will check whether a chunk is empty before compression and storage. If a chunk is empty, then Zarr does not store it, and instead deletes the chunk from storage if the chunk had been previously stored. This optimization prevents storing redundant objects and can speed up reads, but the cost is added computation during array writes, since the contents of each chunk must be compared to the fill value, and these advantages are contingent on the content of the array. If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above. In this case, creating an array with `write_empty_chunks=True` (the default) will instruct Zarr to write every chunk without checking for emptiness. The following example illustrates the effect of the `write_empty_chunks` flag on the time required to write an array with different values.: ```python exec="true" session="performance" source="above" result="ansi" import zarr import numpy as np import time def timed_write(write_empty_chunks): """ Measure the time required and number of objects created when writing to a Zarr array with random ints or fill value. """ chunks = (8192,) shape = (chunks[0] * 1024,) data = np.random.randint(0, 255, shape) dtype = 'uint8' arr = zarr.create_array( f'data/example-{write_empty_chunks}.zarr', shape=shape, chunks=chunks, dtype=dtype, fill_value=0, config={'write_empty_chunks': write_empty_chunks} ) # initialize all chunks arr[:] = 100 result = [] for value in (data, arr.fill_value): start = time.time() arr[:] = value elapsed = time.time() - start result.append((elapsed, arr.nchunks_initialized)) return result # log results for write_empty_chunks in (True, False): full, empty = timed_write(write_empty_chunks) print(f'\nwrite_empty_chunks={write_empty_chunks}:\n\tRandom Data: {full[0]:.4f}s, {full[1]} objects stored\n\t Empty Data: {empty[0]:.4f}s, {empty[1]} objects stored\n') ``` In this example, writing random data is slightly slower with `write_empty_chunks=True`, but writing empty data is substantially faster and generates far fewer objects in storage. ### Changing chunk shapes (rechunking) Coming soon. ## Parallel computing and synchronization Zarr is designed to support parallel computing and enables concurrent reads and writes to arrays. This section covers how to optimize Zarr's concurrency settings for different parallel computing scenarios. ### Concurrent I/O operations Zarr uses asynchronous I/O internally to enable concurrent reads and writes across multiple chunks. The level of concurrency is controlled by the `async.concurrency` configuration setting, which determines the maximum number of concurrent I/O operations. The default value is 10, which is a conservative value. You may get improved performance by tuning the concurrency limit. You can adjust this value based on your specific needs: ```python import zarr # Set concurrency for the current session zarr.config.set({'async.concurrency': 128}) # Or use environment variable # export ZARR_ASYNC_CONCURRENCY=128 ``` Higher concurrency values can improve throughput when: - Working with remote storage (e.g., S3, GCS) where network latency is high - Reading/writing many small chunks in parallel - The storage backend can handle many concurrent requests Lower concurrency values may be beneficial when: - Working with local storage with limited I/O bandwidth - Memory is constrained (each concurrent operation requires buffer space) - Using Zarr within a parallel computing framework (see below) ### Using Zarr with Dask [Dask](https://www.dask.org/) is a popular parallel computing library that works well with Zarr for processing large arrays. When using Zarr with Dask, it's important to consider the interaction between Dask's thread pool and Zarr's concurrency settings. **Important**: When using many Dask threads, you may need to reduce both Zarr's `async.concurrency` and `threading.max_workers` settings to avoid creating too many concurrent operations. The total number of concurrent I/O operations can be roughly estimated as: ``` total_concurrency ≈ dask_threads × zarr_async_concurrency ``` For example, if you're running Dask with 10 threads and Zarr's default concurrency of 64, you could potentially have up to 640 concurrent operations, which may overwhelm your storage system or cause memory issues. **Recommendation**: When using Dask with many threads, configure Zarr's concurrency settings: ```python import zarr import dask.array as da # If using Dask with many threads (e.g., 8-16), reduce Zarr's concurrency settings zarr.config.set({ 'async.concurrency': 4, # Limit concurrent async operations 'threading.max_workers': 4, # Limit Zarr's internal thread pool }) # Open Zarr array z = zarr.open_array('data/large_array.zarr', mode='r') # Create Dask array from Zarr array arr = da.from_array(z, chunks=z.chunks) # Process with Dask result = arr.mean(axis=0).compute() ``` **Configuration guidelines for Dask workloads**: - `async.concurrency`: Controls the maximum number of concurrent async I/O operations. Start with a lower value (e.g., 4-8) when using many Dask threads. - `threading.max_workers`: Controls Zarr's internal thread pool size for blocking operations (defaults to CPU count). Reduce this to avoid thread contention with Dask's scheduler. You may need to experiment with different values to find the optimal balance for your workload. Monitor your system's resource usage and adjust these settings based on whether your storage system or CPU is the bottleneck. ### Thread safety and process safety Zarr arrays are designed to be thread-safe for concurrent reads and writes from multiple threads within the same process. However, proper synchronization is required when writing to overlapping regions from multiple threads. For multi-process parallelism, Zarr provides safe concurrent writes as long as: - Different processes write to different chunks - The storage backend supports atomic writes (most do) When writing to the same chunks from multiple processes, you should use external synchronization mechanisms or ensure that writes are coordinated to avoid race conditions. ## Pickle support Zarr arrays and groups can be pickled, as long as the underlying store object can be pickled. With the exception of the `zarr.storage.MemoryStore`, any of the storage classes provided in the `zarr.storage` module can be pickled. If an array or group is backed by a persistent store such as the a `zarr.storage.LocalStore`, `zarr.storage.ZipStore` or `zarr.storage.FsspecStore` then the store data **are not** pickled. The only thing that is pickled is the necessary parameters to allow the store to re-open any underlying files or databases upon being unpickled. E.g., pickle/unpickle a local store array: ```python exec="true" session="performance" source="above" result="ansi" import pickle data = np.arange(100000) z1 = zarr.create_array(store='data/perf-example-2.zarr', shape=data.shape, chunks=data.shape, dtype=data.dtype) z1[:] = data s = pickle.dumps(z1) z2 = pickle.loads(s) assert z1 == z2 print(np.all(z1[:] == z2[:])) ``` ## Configuring Blosc Coming soon. zarr-python-3.1.5/docs/user-guide/storage.md000066400000000000000000000167501511007055700210160ustar00rootroot00000000000000# Storage guide Zarr-Python supports multiple storage backends, including: local file systems, Zip files, remote stores via [fsspec](https://filesystem-spec.readthedocs.io) (S3, HTTP, etc.), and in-memory stores. In Zarr-Python 3, stores must implement the abstract store API from [`zarr.abc.store.Store`][]. !!! note Unlike Zarr-Python 2 where the store interface was built around a generic `MutableMapping` API, Zarr-Python 3 utilizes a custom store API that utilizes Python's AsyncIO library. ## Implicit Store Creation In most cases, it is not required to create a `Store` object explicitly. Passing a string (or other [StoreLike value](#storelike)) to Zarr's top level API will result in the store being created automatically: ```python exec="true" session="storage" source="above" result="ansi" import zarr # Implicitly create a writable LocalStore group = zarr.create_group(store='data/foo/bar') print(group) ``` ```python exec="true" session="storage" source="above" result="ansi" # Implicitly create a read-only FsspecStore # Note: requires s3fs to be installed group = zarr.open_group( store='s3://noaa-nwm-retro-v2-zarr-pds', mode='r', storage_options={'anon': True} ) print(group) ``` ```python exec="true" session="storage" source="above" result="ansi" # Implicitly creates a MemoryStore data = {} group = zarr.create_group(store=data) print(group) ``` [](){#user-guide-store-like} ### StoreLike `StoreLike` values can be: - a `Path` or string indicating a location on the local file system. This will create a [local store](#local-store): ```python exec="true" session="storage" source="above" result="ansi" group = zarr.open_group(store='data/foo/bar') print(group) ``` ```python exec="true" session="storage" source="above" result="ansi" from pathlib import Path group = zarr.open_group(store=Path('data/foo/bar')) print(group) ``` - an FSSpec URI string, indicating a [remote store](#remote-store) location: ```python exec="true" session="storage" source="above" result="ansi" # Note: requires s3fs to be installed group = zarr.open_group( store='s3://noaa-nwm-retro-v2-zarr-pds', mode='r', storage_options={'anon': True} ) print(group) ``` - an empty dictionary or None, which will create a new [memory store](#memory-store): ```python exec="true" session="storage" source="above" result="ansi" group = zarr.create_group(store={}) print(group) ``` ```python exec="true" session="storage" source="above" result="ansi" group = zarr.create_group(store=None) print(group) ``` - a dictionary of string to [`Buffer`][zarr.abc.buffer.Buffer] mappings. This will create a [memory store](#memory-store), using this dictionary as the [`store_dict` argument][zarr.storage.MemoryStore]. - an FSSpec [FSMap object](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.FSMap), which will create an [FsspecStore](#remote-store). - a [`Store`][zarr.abc.store.Store] or [`StorePath`][zarr.storage.StorePath] - see explicit store creation below. ## Explicit Store Creation In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four built-in store: [`zarr.storage.LocalStore`][], [`zarr.storage.FsspecStore`][], [`zarr.storage.ZipStore`][], [`zarr.storage.MemoryStore`][], and [`zarr.storage.ObjectStore`][]. ### Local Store The [`zarr.storage.LocalStore`][] stores data in a nested set of directories on a local filesystem: ```python exec="true" session="storage" source="above" result="ansi" store = zarr.storage.LocalStore('data/foo/bar', read_only=True) group = zarr.open_group(store=store, mode='r') print(group) ``` ### Zip Store The [`zarr.storage.ZipStore`][] stores the contents of a Zarr hierarchy in a single Zip file. The [Zip Store specification](https://github.com/zarr-developers/zarr-specs/pull/311) is currently in draft form: ```python exec="true" session="storage" source="above" result="ansi" store = zarr.storage.ZipStore('data.zip', mode='w') array = zarr.create_array(store=store, shape=(2,), dtype='float64') print(array) ``` ### Remote Store The [`zarr.storage.FsspecStore`][] stores the contents of a Zarr hierarchy in following the same logical layout as the [`LocalStore`][zarr.storage.LocalStore], except the store is assumed to be on a remote storage system such as cloud object storage (e.g. AWS S3, Google Cloud Storage, Azure Blob Store). The [`zarr.storage.FsspecStore`][] is backed by [fsspec](https://filesystem-spec.readthedocs.io) and can support any backend that implements the [AbstractFileSystem](https://filesystem-spec.readthedocs.io/en/stable/api.html#fsspec.spec.AbstractFileSystem) API. `storage_options` can be used to configure the fsspec backend: ```python exec="true" session="storage" source="above" result="ansi" # Note: requires s3fs to be installed store = zarr.storage.FsspecStore.from_url( 's3://noaa-nwm-retro-v2-zarr-pds', read_only=True, storage_options={'anon': True} ) group = zarr.open_group(store=store, mode='r') print(group) ``` The type of filesystem (e.g. S3, https, etc..) is inferred from the scheme of the url (e.g. s3 for "**s3**://noaa-nwm-retro-v2-zarr-pds"). In case a specific filesystem is needed, one can explicitly create it. For example to create a S3 filesystem: ```python exec="true" session="storage" source="above" result="ansi" # Note: requires s3fs to be installed import fsspec fs = fsspec.filesystem( 's3', anon=True, asynchronous=True, client_kwargs={'endpoint_url': "https://noaa-nwm-retro-v2-zarr-pds.s3.amazonaws.com"} ) store = zarr.storage.FsspecStore(fs) print(store) ``` ### Memory Store The [`zarr.storage.MemoryStore`][] a in-memory store that allows for serialization of Zarr data (metadata and chunks) to a dictionary: ```python exec="true" session="storage" source="above" result="ansi" data = {} store = zarr.storage.MemoryStore(data) array = zarr.create_array(store=store, shape=(2,), dtype='float64') print(array) ``` ### Object Store [`zarr.storage.ObjectStore`][] stores the contents of the Zarr hierarchy using any ObjectStore [storage implementation](https://developmentseed.org/obstore/latest/api/store/), including AWS S3 ([`obstore.store.S3Store`][]), Google Cloud Storage ([`obstore.store.GCSStore`][]), and Azure Blob Storage ([`obstore.store.AzureStore`][]). This store is backed by [obstore](https://developmentseed.org/obstore/latest/), which builds on the production quality Rust library [object_store](https://docs.rs/object_store/latest/object_store/). ```python exec="true" session="storage" source="above" result="ansi" from zarr.storage import ObjectStore from obstore.store import MemoryStore store = ObjectStore(MemoryStore()) array = zarr.create_array(store=store, shape=(2,), dtype='float64') print(array) ``` Here's an example of using ObjectStore for accessing remote data: ```python exec="true" session="storage" source="above" result="ansi" from zarr.storage import ObjectStore from obstore.store import S3Store s3_store = S3Store('noaa-nwm-retro-v2-zarr-pds', skip_signature=True, region="us-west-2") store = zarr.storage.ObjectStore(store=s3_store, read_only=True) group = zarr.open_group(store=store, mode='r') print(group.info) ``` !!! warning The [`zarr.storage.ObjectStore`][] class is experimental. ## Developing custom stores Zarr-Python [`zarr.abc.store.Store`][] API is meant to be extended. The Store Abstract Base Class includes all of the methods needed to be a fully operational store in Zarr Python. Zarr also provides a test harness for custom stores: [`zarr.testing.store.StoreTests`][]. zarr-python-3.1.5/docs/user-guide/v3_migration.md000066400000000000000000000253161511007055700217510ustar00rootroot00000000000000# 3.0 Migration Guide Zarr-Python 3 represents a major refactor of the Zarr-Python codebase. Some of the goals motivating this refactor included: * adding support for the Zarr format 3 specification (along with the Zarr format 2 specification) * cleaning up internal and user facing APIs * improving performance (particularly in high latency storage environments like cloud object stores) To accommodate this, Zarr-Python 3 introduces a number of changes to the API, including a number of significant breaking changes and deprecations. This page provides a guide explaining breaking changes and deprecations to help you migrate your code from version 2 to version 3. If we have missed anything, please open a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new) so we can improve this guide. ## Compatibility target The goals described above necessitated some breaking changes to the API (hence the major version update), but where possible we have maintained backwards compatibility in the most widely used parts of the API. This in the [`zarr.Array`][] and [`zarr.Group`][] classes and the "top-level API" (e.g. [`zarr.open_array`][] and [`zarr.open_group`][]). ## Getting ready for 3.0 Before migrating to Zarr-Python 3, we suggest projects that depend on Zarr-Python take the following actions in order: 1. Pin the supported Zarr-Python version to `zarr>=2,<3`. This is a best practice and will protect your users from any incompatibilities that may arise during the release of Zarr-Python 3. This pin can be removed after migrating to Zarr-Python 3. 2. Limit your imports from the Zarr-Python package. Most of the primary API `zarr.*` will be compatible in Zarr-Python 3. However, the following breaking API changes are planned: - `numcodecs.*` will no longer be available in `zarr.*`. To migrate, import codecs directly from `numcodecs`: ```python from numcodecs import Blosc # instead of: # from zarr import Blosc ``` - The `zarr.v3_api_available` feature flag is being removed. In Zarr-Python 3 the v3 API is always available, so you shouldn't need to use this flag. - The following internal modules are being removed or significantly changed. If your application relies on imports from any of the below modules, you will need to either a) modify your application to no longer rely on these imports or b) vendor the parts of the specific modules that you need. * `zarr.attrs` has gone, with no replacement * `zarr.codecs` has changed, see "Codecs" section below for more information * `zarr.context` has gone, with no replacement * `zarr.core` remains but should be considered private API * `zarr.hierarchy` has gone, with no replacement (use `zarr.Group` inplace of `zarr.hierarchy.Group`) * `zarr.indexing` has gone, with no replacement * `zarr.meta` has gone, with no replacement * `zarr.meta_v1` has gone, with no replacement * `zarr.sync` has gone, with no replacement * `zarr.types` has gone, with no replacement * `zarr.util` has gone, with no replacement * `zarr.n5` has gone, see below for an alternative N5 options 3. Test that your package works with version 3. 4. Update the pin to include `zarr>=3,<4`. ## Zarr-Python 2 support window Zarr-Python 2.x is still available, though we recommend migrating to Zarr-Python 3 for its performance improvements and new features. Security and bug fixes will be made to the 2.x series for at least six months following the first Zarr-Python 3 release. If you need to use the latest Zarr-Python 2 release, you can install it with: ```console $ pip install "zarr==2.*" ``` !!! note Development and maintenance of the 2.x release series has moved to the [support/v2](https://github.com/zarr-developers/zarr-python/tree/support/v2) branch. Issues and pull requests related to this branch are tagged with the [V2](https://github.com/zarr-developers/zarr-python/labels/V2) label. ## Migrating to Zarr-Python 3 The following sections provide details on breaking changes in Zarr-Python 3. ### The Array class 1. Disallow direct construction - the signature for initializing the `Array` class has changed significantly. Please use [`zarr.create_array`][] or [`zarr.open_array`][] instead of directly constructing the [`zarr.Array`][] class. 2. Defaulting to `zarr_format=3` - newly created arrays will use the version 3 of the Zarr specification. To continue using version 2, set `zarr_format=2` when creating arrays or set `default_zarr_version=2` in Zarr's runtime configuration. 3. Function signature change to [`zarr.Array.resize`][] - the `resize` function now takes a `zarr.core.common.ShapeLike` input rather than separate arguments for each dimension. Use `resize((10,10))` in place of `resize(10,10)`. ### The Group class 1. Disallow direct construction - use [`zarr.open_group`][] or [`zarr.create_group`][] instead of directly constructing the `zarr.Group` class. 2. Most of the h5py compatibility methods are deprecated and will issue warnings if used. The following functions are drop in replacements that have the same signature and functionality: - Use [`zarr.Group.create_array`][] in place of `zarr.Group.create_dataset` - Use [`zarr.Group.require_array`][] in place of `zarr.Group.require_dataset` 3. Disallow "." syntax for getting group members. To get a member of a group named `foo`, use `group["foo"]` in place of `group.foo`. ### The Store class The Store API has changed significant in Zarr-Python 3. #### The base store class The `MutableMapping` base class has been replaced in favor of a custom abstract base class ([`zarr.abc.store.Store`][]). An asynchronous interface is used for all store methods that use I/O. This change ensures that these store methods are non-blocking and are as performant as possible. #### Store implementations Store implementations have moved from the top-level module to `zarr.storage`: ```diff title="Store import changes from v2 to v3" # Before (v2) - from zarr import MemoryStore + from zarr.storage import MemoryStore ``` The following stores have been renamed or changed: | v2 | v3 | |------------------------|------------------------------------| | `DirectoryStore` | [`zarr.storage.LocalStore`][] | | `FSStore` | [`zarr.storage.FsspecStore`][] | | `TempStore` | Use [`tempfile.TemporaryDirectory`][] with [`LocalStore`][zarr.storage.LocalStore] | | `zarr. A number of deprecated stores were also removed. See issue #1274 for more details on the removal of these stores. - `N5Store` - see https://github.com/zarr-developers/n5py for an alternative interface to N5 formatted data. - `ABSStore` - use the [`zarr.storage.FsspecStore`][] instead along with fsspec's [adlfs backend](https://github.com/fsspec/adlfs). - `DBMStore` - `LMDBStore` - `SQLiteStore` - `MongoDBStore` - `RedisStore` The latter five stores in this list do not have an equivalent in Zarr-Python 3. If you are interested in developing a custom store that targets these backends, see [developing custom stores](storage.md/#developing-custom-stores) or open an [issue](https://github.com/zarr-developers/zarr-python/issues) to discuss your use case. ### Codecs Codecs defined in ``numcodecs`` (and also imported into the ``zarr.codecs`` namespace in Zarr-Python 2) should still be used when creating Zarr format 2 arrays. Codecs for creating Zarr format 3 arrays are available in two locations: - `zarr.codecs` contains Zarr format 3 codecs that are defined in the [codecs section of the Zarr format 3 specification](https://zarr-specs.readthedocs.io/en/latest/v3/codecs/index.html). - `numcodecs.zarr3` contains codecs from `numcodecs` that can be used to create Zarr format 3 arrays, but are not necessarily part of the Zarr format 3 specification. ### Dependencies When installing using `pip`: - The new `remote` dependency group can be used to install a supported version of `fsspec`, required for remote data access. - The new `gpu` dependency group can be used to install a supported version of `cuda`, required for GPU functionality. - The `jupyter` optional dependency group has been removed, since v3 contains no jupyter specific functionality. ### Miscellaneous - The keyword argument `zarr_version` available in most creation functions in `zarr` (e.g. [`zarr.create`][], [`zarr.open`][], [`zarr.group`][], [`zarr.array`][]) has been deprecated in favor of `zarr_format`. ## 🚧 Work in Progress 🚧 Zarr-Python 3 is still under active development, and is not yet fully complete. The following list summarizes areas of the codebase that we expect to build out after the 3.0.0 release. If features listed below are important to your use case of Zarr-Python, please open (or comment on) a [GitHub issue](https://github.com/zarr-developers/zarr-python/issues/new). - The following functions / methods have not been ported to Zarr-Python 3 yet: * `zarr.copy` ([issue #2407](https://github.com/zarr-developers/zarr-python/issues/2407)) * `zarr.copy_all` ([issue #2407](https://github.com/zarr-developers/zarr-python/issues/2407)) * `zarr.copy_store` ([issue #2407](https://github.com/zarr-developers/zarr-python/issues/2407)) * `zarr.Group.move` ([issue #2108](https://github.com/zarr-developers/zarr-python/issues/2108)) - The following features (corresponding to function arguments to functions in `zarr`) have not been ported to Zarr-Python 3 yet. Using these features will raise a warning or a `NotImplementedError`: * `cache_attrs` * `cache_metadata` * `chunk_store` ([issue #2495](https://github.com/zarr-developers/zarr-python/issues/2495)) * `meta_array` * `object_codec` ([issue #2617](https://github.com/zarr-developers/zarr-python/issues/2617)) * `synchronizer` ([issue #1596](https://github.com/zarr-developers/zarr-python/issues/1596)) * `dimension_separator` - The following features that were supported by Zarr-Python 2 have not been ported to Zarr-Python 3 yet: * Structured arrays / dtypes ([issue #2134](https://github.com/zarr-developers/zarr-python/issues/2134)) * Fixed-length string dtypes ([issue #2347](https://github.com/zarr-developers/zarr-python/issues/2347)) * Datetime and timedelta dtypes ([issue #2616](https://github.com/zarr-developers/zarr-python/issues/2616)) * Object dtypes ([issue #2616](https://github.com/zarr-developers/zarr-python/issues/2616)) * Ragged arrays ([issue #2618](https://github.com/zarr-developers/zarr-python/issues/2618)) * Groups and Arrays do not implement `__enter__` and `__exit__` protocols ([issue #2619](https://github.com/zarr-developers/zarr-python/issues/2619)) * Default filters for object dtypes for Zarr format 2 arrays ([issue #2627](https://github.com/zarr-developers/zarr-python/issues/2627)) zarr-python-3.1.5/examples/000077500000000000000000000000001511007055700156345ustar00rootroot00000000000000zarr-python-3.1.5/examples/README.md000066400000000000000000000023411511007055700171130ustar00rootroot00000000000000# Zarr Python Examples This directory contains complete, runnable examples demonstrating various features and use cases of Zarr Python. ## Directory Structure Each example is organized in its own subdirectory with the following structure: ``` examples/ ├── example_name/ │ ├── README.md # Documentation for the example │ └── example_name.py # Python source code └── ... ``` ## Adding New Examples To add a new example: 1. Create a new subdirectory: `examples/my_example/` 2. Add your Python code: `examples/my_example/my_example.py` 3. Create documentation: `examples/my_example/README.md` 4. Create a documentation page at `docs/user-guide/examples/my_example.md`. The documentation page should simply link to the `README.md` and the source code, e.g.: ```` # docs/user-guide/examples/my_example.md --8<-- "examples/my_example/README.md" ## Source Code ```python --8<-- "examples/my_example/my_example.py" ``` ```` 5. Update `mkdocs.yml` to include the new example in the navigation. ### Example README.md Format Your README.md should include: - A title (`# Example Name`) - Description of what the example demonstrates - Instructions for running the example zarr-python-3.1.5/examples/custom_dtype/000077500000000000000000000000001511007055700203535ustar00rootroot00000000000000zarr-python-3.1.5/examples/custom_dtype/README.md000066400000000000000000000011451511007055700216330ustar00rootroot00000000000000# Custom Data Type Example This example demonstrates how to extend Zarr Python by defining a new data type. The example shows how to: - Define a custom `ZDType` class for the `int2` data type from [`ml_dtypes`](https://pypi.org/project/ml-dtypes/) - Implement all required methods for serialization and deserialization - Register the custom data type with Zarr's registry - Create and use arrays with the custom data type in both Zarr v2 and v3 formats ## Running the Example ```bash python examples/custom_dtype/custom_dtype.py ``` Or run with uv: ```bash uv run examples/custom_dtype/custom_dtype.py ``` zarr-python-3.1.5/examples/custom_dtype/custom_dtype.py000066400000000000000000000214601511007055700234470ustar00rootroot00000000000000# /// script # requires-python = ">=3.11" # dependencies = [ # "zarr @ git+https://github.com/zarr-developers/zarr-python.git@main", # "ml_dtypes==0.5.1", # "pytest==8.4.1" # ] # /// # """ Demonstrate how to extend Zarr Python by defining a new data type """ import json import sys from pathlib import Path from typing import ClassVar, Literal, Self, TypeGuard, overload import ml_dtypes # necessary to add extra dtypes to NumPy import numpy as np import pytest import zarr from zarr.core.common import JSON, ZarrFormat from zarr.core.dtype import ZDType, data_type_registry from zarr.core.dtype.common import ( DataTypeValidationError, DTypeConfig_V2, DTypeJSON, check_dtype_spec_v2, ) # This is the int2 array data type int2_dtype_cls = type(np.dtype("int2")) # This is the int2 scalar type int2_scalar_cls = ml_dtypes.int2 class Int2(ZDType[int2_dtype_cls, int2_scalar_cls]): """ This class provides a Zarr compatibility layer around the int2 data type (the ``dtype`` of a NumPy array of type int2) and the int2 scalar type (the ``dtype`` of the scalar value inside an int2 array). """ # This field is as the key for the data type in the internal data type registry, and also # as the identifier for the data type when serializaing the data type to disk for zarr v3 _zarr_v3_name: ClassVar[Literal["int2"]] = "int2" # this field will be used internally _zarr_v2_name: ClassVar[Literal["int2"]] = "int2" # we bind a class variable to the native data type class so we can create instances of it dtype_cls = int2_dtype_cls @classmethod def from_native_dtype(cls, dtype: np.dtype) -> Self: """Create an instance of this ZDType from a native dtype.""" if cls._check_native_dtype(dtype): return cls() raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self: Self) -> int2_dtype_cls: """Create an int2 dtype instance from this ZDType""" return self.dtype_cls() @classmethod def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[Literal["|b1"], None]]: """ Type check for Zarr v2-flavored JSON. This will check that the input is a dict like this: .. code-block:: json { "name": "int2", "object_codec_id": None } Note that this representation differs from the ``dtype`` field looks like in zarr v2 metadata. Specifically, whatever goes into the ``dtype`` field in metadata is assigned to the ``name`` field here. See the Zarr docs for more information about the JSON encoding for data types. """ return ( check_dtype_spec_v2(data) and data["name"] == "int2" and data["object_codec_id"] is None ) @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["int2"]]: """ Type check for Zarr V3-flavored JSON. Checks that the input is the string "int2". """ return data == cls._zarr_v3_name @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this ZDType from Zarr V3-flavored JSON. """ if cls._check_json_v2(data): return cls() # This first does a type check on the input, and if that passes we create an instance of the ZDType. msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v2_name!r}" raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls: type[Self], data: DTypeJSON) -> Self: """ Create an instance of this ZDType from Zarr V3-flavored JSON. This first does a type check on the input, and if that passes we create an instance of the ZDType. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload # type: ignore[override] def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal["int2"], None]: ... @overload def to_json(self, zarr_format: Literal[3]) -> Literal["int2"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal["int2"], None] | Literal["int2"]: """ Serialize this ZDType to v2- or v3-flavored JSON If the zarr_format is 2, then return a dict like this: .. code-block:: json { "name": "int2", "object_codec_id": None } If the zarr_format is 3, then return the string "int2" """ if zarr_format == 2: return {"name": "int2", "object_codec_id": None} elif zarr_format == 3: return self._zarr_v3_name raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[int | ml_dtypes.int2]: """ Check if a python object is a valid int2-compatible scalar The strictness of this type check is an implementation degree of freedom. You could be strict here, and only accept int2 values, or be open and accept any integer or any object and rely on exceptions from the int2 constructor that will be called in cast_scalar. """ return isinstance(data, (int, int2_scalar_cls)) def cast_scalar(self, data: object) -> ml_dtypes.int2: """ Attempt to cast a python object to an int2. We first perform a type check to ensure that the input type is appropriate, and if that passes we call the int2 scalar constructor. """ if self._check_scalar(data): return ml_dtypes.int2(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) def default_scalar(self) -> ml_dtypes.int2: """ Get the default scalar value. This will be used when automatically selecting a fill value. """ return ml_dtypes.int2(0) def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> int: """ Convert a python object to a JSON representation of an int2 scalar. This is necessary for taking user input for the ``fill_value`` attribute in array metadata. In this implementation, we optimistically convert the input to an int, and then check that it lies in the acceptable range for this data type. """ # We could add a type check here, but we don't need to for this example val: int = int(data) # type: ignore[call-overload] if val not in (-2, -1, 0, 1): raise ValueError("Invalid value. Expected -2, -1, 0, or 1.") return val def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> ml_dtypes.int2: """ Read a JSON-serializable value as an int2 scalar. We first perform a type check to ensure that the JSON value is well-formed, then call the int2 scalar constructor. The base definition of this method requires that it take a zarr_format parameter because other data types serialize scalars differently in zarr v2 and v3, but we don't use this here. """ if self._check_scalar(data): return ml_dtypes.int2(data) raise TypeError(f"Invalid type: {data}. Expected an int.") # after defining dtype class, it must be registered with the data type registry so zarr can use it data_type_registry.register(Int2._zarr_v3_name, Int2) # this parametrized function will create arrays in zarr v2 and v3 using our new data type @pytest.mark.parametrize("zarr_format", [2, 3]) def test_custom_dtype(tmp_path: Path, zarr_format: Literal[2, 3]) -> None: # create array and write values z_w = zarr.create_array( store=tmp_path, shape=(4,), dtype="int2", zarr_format=zarr_format, compressors=None ) z_w[:] = [-1, -2, 0, 1] # open the array z_r = zarr.open_array(tmp_path, mode="r") print(z_r.info_complete()) # look at the array metadata if zarr_format == 2: meta_file = tmp_path / ".zarray" else: meta_file = tmp_path / "zarr.json" print(json.dumps(json.loads(meta_file.read_text()), indent=2)) if __name__ == "__main__": # Run the example with printed output, and a dummy pytest configuration file specified. # Without the dummy configuration file, at test time pytest will attempt to use the # configuration file in the project root, which will error because Zarr is using some # plugins that are not installed in this example. sys.exit(pytest.main(["-s", __file__, f"-c {__file__}"])) zarr-python-3.1.5/mkdocs.yml000066400000000000000000000213321511007055700160220ustar00rootroot00000000000000# Based on https://github.com/developmentseed/obspec/blob/main/mkdocs.yml site_name: zarr-python repo_name: zarr-developers/zarr-python repo_url: https://github.com/zarr-developers/zarr-python site_description: An implementation of chunked, compressed, N-dimensional arrays for Python. site_author: Alistair Miles site_url: !ENV [READTHEDOCS_CANONICAL_URL, 'https://zarr.readthedocs.io/'] docs_dir: docs use_directory_urls: true nav: - "index.md" - "quick-start.md" - User Guide: - user-guide/index.md - user-guide/installation.md - user-guide/arrays.md - user-guide/groups.md - user-guide/attributes.md - user-guide/storage.md - user-guide/config.md - user-guide/cli.md - user-guide/v3_migration.md - user-guide/data_types.md - user-guide/performance.md - user-guide/extending.md - user-guide/gpu.md - user-guide/consolidated_metadata.md - user-guide/experimental.md - Examples: - user-guide/examples/custom_dtype.md - API Reference: - api/zarr/index.md - api/zarr/array.md - api/zarr/group.md - api/zarr/create.md - api/zarr/dtype.md - api/zarr/load.md - api/zarr/open.md - api/zarr/save.md - api/zarr/codecs.md - api/zarr/codecs/numcodecs.md - api/zarr/config.md - api/zarr/convenience.md - api/zarr/errors.md - api/zarr/metadata.md - api/zarr/registry.md - api/zarr/storage.md - ABC: - api/zarr/abc/index.md - api/zarr/abc/buffer.md - api/zarr/abc/codec.md - api/zarr/abc/numcodec.md - api/zarr/abc/metadata.md - api/zarr/abc/store.md - API: - api/zarr/api/index.md - api/zarr/api/asynchronous.md - api/zarr/api/synchronous.md - Buffer: - api/zarr/buffer/index.md - api/zarr/buffer/cpu.md - api/zarr/buffer/gpu.md - Testing: - api/zarr/testing/index.md - api/zarr/testing/buffer.md - api/zarr/testing/conftest.md - api/zarr/testing/stateful.md - api/zarr/testing/store.md - api/zarr/testing/strategies.md - api/zarr/testing/utils.md - deprecated: - Convenience sub-module: api/zarr/deprecated/convenience.md - Creation sub-module: api/zarr/deprecated/creation.md - release-notes.md - contributing.md watch: - src/zarr - docs theme: language: en name: material custom_dir: docs/overrides logo: _static/logo_bw.png palette: # Light mode - media: "(prefers-color-scheme: light)" scheme: default primary: custom accent: custom toggle: icon: material/brightness-7 name: Switch to dark mode # Dark mode - media: "(prefers-color-scheme: dark)" scheme: slate primary: custom accent: custom toggle: icon: material/brightness-4 name: Switch to light mode font: text: Roboto code: Roboto Mono features: - content.code.annotate - content.code.copy - navigation.indexes - navigation.instant - navigation.tracking - search.suggest - search.share extra: social: - icon: fontawesome/brands/mastodon link: https://fosstodon.org/@zarr - icon: fontawesome/brands/bluesky link: https://bsky.app/profile/zarr.dev extra_css: - overrides/stylesheets/extra.css plugins: - autorefs - search - markdown-exec - mkdocstrings: enable_inventory: true handlers: python: paths: [src/zarr] options: allow_inspection: true docstring_section_style: list docstring_style: numpy inherited_members: true line_length: 60 separate_signature: true show_root_heading: true show_signature_annotations: true show_source: true show_symbol_type_toc: true signature_crossrefs: true show_if_no_docstring: true extensions: - griffe_inherited_docstrings inventories: - https://docs.python.org/3/objects.inv - https://docs.xarray.dev/en/stable/objects.inv - https://numpy.org/doc/stable/objects.inv - https://numcodecs.readthedocs.io/en/stable/objects.inv - https://developmentseed.org/obstore/latest/objects.inv - https://filesystem-spec.readthedocs.io/en/latest/objects.inv - https://requests.readthedocs.io/en/latest/objects.inv - https://docs.aiohttp.org/en/stable/objects.inv - https://s3fs.readthedocs.io/en/latest/objects.inv - https://docs.h5py.org/en/stable/objects.inv - https://icechunk.io/en/stable/objects.inv - https://lithops-cloud.github.io/docs/objects.inv - https://docs.dask.org/en/stable/objects.inv - redirects: redirect_maps: 'spec/index.md': 'https://zarr-specs.readthedocs.io' 'spec/v1.md': 'https://zarr-specs.readthedocs.io/en/latest/v1/v1.0.html' 'spec/v2.md': 'https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html' 'spec/v3.md': 'https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html' 'license.md': 'https://github.com/zarr-developers/zarr-python/blob/main/LICENSE.txt' 'genindex.html.md': 'index.md' 'py-modindex.html.md': 'index.md' 'search.html.md': 'index.md' 'tutorial.md': 'user-guide/installation.md' 'getting-started.md': 'quick-start.md' 'roadmap.md': 'https://zarr.readthedocs.io/en/v3.0.8/developers/roadmap.html' 'installation.md': 'user-guide/installation.md' 'release.md': 'release-notes.md' 'about.html.md': 'index.md' 'arrays.html.md': 'user-guide/arrays.md' 'attributes.html.md': 'user-guide/attributes.md' 'cli.html.md': 'user-guide/cli.md' 'config.html.md': 'user-guide/config.md' 'consolidated_metadata.html.md': 'user-guide/consolidated_metadata.md' 'data_types.html.md': 'user-guide/data_types.md' 'extending.html.md': 'user-guide/extending.md' 'gpu.html.md': 'user-guide/gpu.md' 'groups.html.md': 'user-guide/groups.md' 'installation.html.md': 'user-guide/installation.md' 'performance.html.md': 'user-guide/performance.md' 'quickstart.html.md': 'quick-start.md' 'release-notes.html.md': 'release-notes.md' 'storage.html.md': 'user-guide/storage.md' 'v3_migration.html.md': 'user-guide/v3_migration.md' 'user-guide/arrays.html.md': 'user-guide/arrays.md' 'user-guide/attributes.html.md': 'user-guide/attributes.md' 'user-guide/cli.html.md': 'user-guide/cli.md' 'user-guide/config.html.md': 'user-guide/config.md' 'user-guide/consolidated_metadata.html.md': 'user-guide/consolidated_metadata.md' 'user-guide/data_types.html.md': 'user-guide/data_types.md' 'user-guide/extending.html.md': 'user-guide/extending.md' 'user-guide/gpu.html.md': 'user-guide/gpu.md' 'user-guide/groups.html.md': 'user-guide/groups.md' 'user-guide/installation.html.md': 'user-guide/installation.md' 'user-guide/performance.html.md': 'user-guide/performance.md' 'user-guide/storage.html.md': 'user-guide/storage.md' 'user-guide/v3_migration.html.md': 'user-guide/v3_migration.md' 'developers/contributing.html.md': 'contributing.md' 'developers/index.html.md': 'contributing.md' 'developers/roadmap.html.md': 'https://zarr.readthedocs.io/en/v3.0.8/developers/roadmap.html' 'api/zarr/creation.md': 'api/zarr/deprecated/creation.md' 'api/zarr/codecs/numcodecs.md': 'api/zarr/deprecated/creation.md' 'api.md': 'api/zarr/index.md' 'api/zarr/metadata/migrate_v3.md': 'api/zarr/metadata.md' # Based on https://github.com/developmentseed/titiler/blob/50934c929cca2fa8d3c408d239015f8da429c6a8/docs/mkdocs.yml#L115-L140 markdown_extensions: - admonition - attr_list - codehilite: guess_lang: false - def_list - footnotes - md_in_html - pymdownx.arithmatex - pymdownx.betterem - pymdownx.caret: insert: false - pymdownx.details - pymdownx.escapeall: hardbreak: true nbsp: true - pymdownx.magiclink: hide_protocol: true repo_url_shortener: true - pymdownx.smartsymbols - pymdownx.superfences - pymdownx.tasklist: custom_checkbox: true - pymdownx.tilde - pymdownx.emoji: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:material.extensions.emoji.to_svg - toc: permalink: true - pymdownx.highlight: anchor_linenums: true line_spans: __span pygments_lang_class: true - pymdownx.inlinehilite - pymdownx.snippets zarr-python-3.1.5/pyproject.toml000066400000000000000000000276441511007055700167470ustar00rootroot00000000000000[build-system] requires = ["hatchling>=1.27.0", "hatch-vcs"] build-backend = "hatchling.build" [tool.hatch.build.targets.sdist] exclude = [ "/.github", "/bench", "/docs", ] [project] name = "zarr" description = "An implementation of chunked, compressed, N-dimensional arrays for Python" readme = { file = "README.md", content-type = "text/markdown" } authors = [ { name = "Alistair Miles", email = "alimanfoo@googlemail.com" }, ] maintainers = [ { name = "Davis Bennett", email = "davis.v.bennett@gmail.com" }, { name = "jakirkham" }, { name = "Josh Moore", email = "josh@openmicroscopy.org" }, { name = "Joe Hamman", email = "joe@earthmover.io" }, { name = "Juan Nunez-Iglesias", email = "juan.nunez-iglesias@monash.edu" }, { name = "Martin Durant", email = "mdurant@anaconda.com" }, { name = "Norman Rzepka" }, { name = "Ryan Abernathey" }, { name = "David Stansby" }, { name = "Tom Augspurger", email = "tom.w.augspurger@gmail.com" }, { name = "Deepak Cherian" } ] requires-python = ">=3.11" # If you add a new dependency here, please also add it to .pre-commit-config.yml dependencies = [ 'packaging>=22.0', 'numpy>=1.26', 'numcodecs>=0.14', 'google-crc32c>=1.5', 'typing_extensions>=4.9', 'donfig>=0.8', ] dynamic = [ "version", ] classifiers = [ 'Development Status :: 6 - Mature', 'Intended Audience :: Developers', 'Intended Audience :: Information Technology', 'Intended Audience :: Science/Research', 'Programming Language :: Python', 'Topic :: Software Development :: Libraries :: Python Modules', 'Operating System :: Unix', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', ] license = "MIT" license-files = ["LICENSE.txt"] keywords = ["Python", "compressed", "ndimensional-arrays", "zarr"] [project.optional-dependencies] # User extras remote = [ "fsspec>=2023.10.0", "obstore>=0.5.1", ] gpu = [ "cupy-cuda12x", ] cli = ["typer"] # Development extras test = [ "coverage>=7.10", # Pin possibly due to https://github.com/pytest-dev/pytest-cov/issues/693 "pytest<8.4", "pytest-asyncio", "pytest-cov", "pytest-accept", "rich", "mypy", 'numpydoc', "hypothesis", "pytest-xdist", "packaging", "tomlkit", "uv", ] remote_tests = [ 'zarr[remote]', "botocore", "s3fs>=2023.10.0", "moto[s3,server]", "requests", ] optional = ["rich", "universal-pathlib"] docs = [ # Doc building "mkdocs-material[imaging]>=9.6.14", "mkdocs>=1.6.1", "mkdocstrings>=0.29.1", "mkdocstrings-python>=1.16.10", "mike>=2.1.3", "mkdocs-redirects>=1.2.0", "markdown-exec[ansi]", "griffe-inherited-docstrings", "ruff", # Changelog generation 'towncrier', # Optional dependencies to run examples 'numcodecs[msgpack]', 'rich', 's3fs>=2023.10.0', 'astroid<4', 'pytest' ] [project.scripts] zarr = "zarr._cli.cli:app" [project.urls] issues = "https://github.com/zarr-developers/zarr-python/issues" changelog = "https://zarr.readthedocs.io/en/stable/release-notes.html" Discussions = "https://github.com/zarr-developers/zarr-python/discussions" documentation = "https://zarr.readthedocs.io/" homepage = "https://github.com/zarr-developers/zarr-python" [dependency-groups] dev = [ "ipykernel>=6.29.5", "pip>=25.0.1", ] [tool.coverage.report] exclude_also = [ 'if TYPE_CHECKING:', ] [tool.coverage.run] omit = [ "bench/compress_normal.py", "src/zarr/testing/conftest.py", # only for downstream projects ] [tool.hatch] version.source = "vcs" [tool.hatch.build] hooks.vcs.version-file = "src/zarr/_version.py" [tool.hatch.envs.test] dependencies = [ "numpy~={matrix:numpy}", ] features = ["test"] [tool.hatch.envs.test.env-vars] # Required to test with a pytest plugin; see https://pytest-cov.readthedocs.io/en/latest/plugins.html COV_CORE_SOURCE = "src" COV_CORE_CONFIG = ".coveragerc" COV_CORE_DATAFILE = ".coverage.eager" [[tool.hatch.envs.test.matrix]] python = ["3.11", "3.12", "3.13"] numpy = ["1.26", "2.2"] deps = ["minimal", "optional"] [tool.hatch.envs.test.overrides] matrix.deps.dependencies = [ {value = "zarr[remote, remote_tests, test, optional, cli]", if = ["optional"]} ] [tool.hatch.envs.test.scripts] run-coverage = "pytest --cov-config=pyproject.toml --cov=src --cov-append --cov-report xml --junitxml=junit.xml -o junit_family=legacy" run-coverage-html = "pytest --cov-config=pyproject.toml --cov=src --cov-append --cov-report html" run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=src --cov-append --cov-report xml --junitxml=junit.xml -o junit_family=legacy" run = "run-coverage --no-cov" run-pytest = "run" run-verbose = "run-coverage --verbose" run-mypy = "mypy src" run-hypothesis = "run-coverage -nauto --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" list-env = "pip list" [tool.hatch.envs.gputest] template = "test" dependencies = [ "numpy~={matrix:numpy}", "universal_pathlib", ] features = ["test", "gpu"] [[tool.hatch.envs.gputest.matrix]] python = ["3.11", "3.12", "3.13"] numpy = ["1.26", "2.2"] version = ["minimal"] [tool.hatch.envs.gputest.scripts] run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy" run = "run-coverage --no-cov" run-verbose = "run-coverage --verbose" run-mypy = "mypy src" run-hypothesis = "run-coverage --hypothesis-profile ci --run-slow-hypothesis tests/test_properties.py tests/test_store/test_stateful*" list-env = "pip list" [tool.hatch.envs.upstream] template = 'test' python = "3.13" dependencies = [ 'packaging @ git+https://github.com/pypa/packaging', 'numpy', # from scientific-python-nightly-wheels 'numcodecs @ git+https://github.com/zarr-developers/numcodecs', 's3fs @ git+https://github.com/fsspec/s3fs', 'universal_pathlib @ git+https://github.com/fsspec/universal_pathlib', 'typing_extensions @ git+https://github.com/python/typing_extensions', 'donfig @ git+https://github.com/pytroll/donfig', 'obstore @ git+https://github.com/developmentseed/obstore@main#subdirectory=obstore', # test deps 'zarr[test]', ] [tool.hatch.envs.upstream.env-vars] PIP_INDEX_URL = "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/" PIP_EXTRA_INDEX_URL = "https://pypi.org/simple/" PIP_PRE = "1" [tool.hatch.envs.min_deps] description = """Test environment for minimum supported dependencies See Spec 0000 for details and drop schedule: https://scientific-python.org/specs/spec-0000/ """ template = "test" python = "3.11" dependencies = [ 'zarr[remote]', 'packaging==22.*', 'numpy==1.26.*', 'numcodecs==0.14.*', # 0.14 needed for zarr3 codecs 'fsspec==2023.10.0', 's3fs==2023.10.0', 'universal_pathlib==0.0.22', 'typing_extensions==4.9.*', 'donfig==0.8.*', 'obstore==0.5.*', # test deps 'zarr[test]', 'zarr[remote_tests]', ] [tool.hatch.envs.docs] features = ['docs', 'remote'] [tool.hatch.envs.docs.scripts] serve = "mkdocs serve --watch src" build = "mkdocs build" check = "mkdocs build --strict" readthedocs = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r site $READTHEDOCS_OUTPUT/html" [tool.hatch.envs.doctest] description = "Test environment for validating executable code blocks in documentation" features = ['test', 'remote'] # Include remote dependencies for s3fs dependencies = [ "s3fs>=2023.10.0", "pytest", "pytest-examples", ] [tool.hatch.envs.doctest.scripts] test = "pytest tests/test_docs.py -v" list-env = "pip list" [tool.ruff] line-length = 100 force-exclude = true extend-exclude = [ ".bzr", ".direnv", ".eggs", ".git", ".mypy_cache", ".nox", ".pants.d", ".ruff_cache", ".venv", "__pypackages__", "_build", "buck-out", "build", "dist", "venv", "docs", "tests/test_regression/scripts/", # these are scripts that use a different version of python "src/zarr/v2/", "tests/v2/", ] [tool.ruff.lint] extend-select = [ "ANN", # flake8-annotations "B", # flake8-bugbear "C4", # flake8-comprehensions "EXE", # flake8-executable "FA", # flake8-future-annotations "FLY", # flynt "FURB", # refurb "G", # flake8-logging-format "I", # isort "ISC", # flake8-implicit-str-concat "LOG", # flake8-logging "PERF", # Perflint "PIE", # flake8-pie "PGH", # pygrep-hooks "PT", # flake8-pytest-style "PYI", # flake8-pyi "RET", # flake8-return "RSE", # flake8-raise "RUF", "SIM", # flake8-simplify "SLOT", # flake8-slots "TC", # flake8-type-checking "TRY", # tryceratops "UP", # pyupgrade "W", # pycodestyle warnings ] ignore = [ "ANN401", "PT011", # TODO: apply this rule "RET505", "RET506", "RUF005", "RUF043", "SIM108", "TRY003", # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules "W191", "E111", "E114", "E117", "D206", "D300", "Q000", "Q001", "Q002", "Q003", "COM812", "COM819", "TC006", ] [tool.ruff.lint.extend-per-file-ignores] "tests/**" = ["ANN001", "ANN201", "RUF029", "SIM117", "SIM300"] [tool.mypy] python_version = "3.11" ignore_missing_imports = true namespace_packages = false strict = true warn_unreachable = true enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] [[tool.mypy.overrides]] module = [ "tests.package_with_entrypoint.*", "zarr.testing.stateful", "tests.test_codecs.test_transpose", "tests.test_config", "tests.test_store.test_zip", "tests.test_store.test_local", "tests.test_store.test_fsspec", "tests.test_store.test_memory", "tests.test_codecs.test_codecs", "tests.test_metadata.*", "tests.test_store.test_core", "tests.test_store.test_logging", "tests.test_store.test_object", "tests.test_store.test_stateful", "tests.test_store.test_wrapper", ] strict = false # TODO: Move the next modules up to the strict = false section # and fix the errors [[tool.mypy.overrides]] module = [ "tests.test_group", "tests.test_indexing", "tests.test_properties", "tests.test_sync", "tests.test_regression.scripts.*" ] ignore_errors = true [tool.pytest.ini_options] minversion = "7" testpaths = ["tests", "docs/user-guide"] log_cli_level = "INFO" xfail_strict = true asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "function" doctest_optionflags = [ "NORMALIZE_WHITESPACE", "ELLIPSIS", "IGNORE_EXCEPTION_DETAIL", ] addopts = [ "--durations=10", "-ra", "--strict-config", "--strict-markers", ] filterwarnings = [ "error", "ignore:Unclosed client session None: """ Print version info for use in bug reports. """ import platform from importlib.metadata import version def print_packages(packages: list[str]) -> None: not_installed = [] for package in packages: try: print(f"{package}: {version(package)}") except ModuleNotFoundError: not_installed.append(package) if not_installed: print("\n**Not Installed:**") for package in not_installed: print(package) required = [ "packaging", "numpy", "numcodecs", "typing_extensions", "donfig", ] optional = [ "botocore", "cupy-cuda12x", "fsspec", "numcodecs", "s3fs", "gcsfs", "universal-pathlib", "rich", "obstore", ] print(f"platform: {platform.platform()}") print(f"python: {platform.python_version()}") print(f"zarr: {__version__}\n") print("**Required dependencies:**") print_packages(required) print("\n**Optional dependencies:**") print_packages(optional) # The decorator ensures this always returns the same handler (and it is only # attached once). @functools.cache def _ensure_handler() -> logging.Handler: """ The first time this function is called, attach a `StreamHandler` using the same format as `logging.basicConfig` to the Zarr-Python root logger. Return this handler every time this function is called. """ handler = logging.StreamHandler() handler.setFormatter(logging.Formatter(logging.BASIC_FORMAT)) _logger.addHandler(handler) return handler def set_log_level( level: Literal["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ) -> None: """Set the logging level for Zarr-Python. Zarr-Python uses the standard library `logging` framework under the root logger 'zarr'. This is a helper function to: - set Zarr-Python's root logger level - set the root logger handler's level, creating the handler if it does not exist yet Parameters ---------- level : str The logging level to set. """ _logger.setLevel(level) _ensure_handler().setLevel(level) def set_format(log_format: str) -> None: """Set the format of logging messages from Zarr-Python. Zarr-Python uses the standard library `logging` framework under the root logger 'zarr'. This sets the format of log messages from the root logger's StreamHandler. Parameters ---------- log_format : str A string determining the log format (as defined in the standard library's `logging` module for logging.Formatter) """ _ensure_handler().setFormatter(logging.Formatter(fmt=log_format)) __all__ = [ "Array", "AsyncArray", "AsyncGroup", "Group", "__version__", "array", "config", "consolidate_metadata", "copy", "copy_all", "copy_store", "create", "create_array", "create_group", "create_hierarchy", "empty", "empty_like", "from_array", "full", "full_like", "group", "load", "ones", "ones_like", "open", "open_array", "open_consolidated", "open_group", "open_like", "print_debug_info", "save", "save_array", "save_group", "tree", "zeros", "zeros_like", ] zarr-python-3.1.5/src/zarr/_cli/000077500000000000000000000000001511007055700164715ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/_cli/__init__.py000066400000000000000000000000001511007055700205700ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/_cli/cli.py000066400000000000000000000132051511007055700176130ustar00rootroot00000000000000import logging from enum import Enum from typing import Annotated, Literal, cast import typer import zarr import zarr.metadata.migrate_v3 as migrate_metadata from zarr.core.sync import sync from zarr.storage._common import make_store app = typer.Typer() logger = logging.getLogger(__name__) def _set_logging_level(*, verbose: bool) -> None: if verbose: lvl = "INFO" else: lvl = "WARNING" zarr.set_log_level(cast(Literal["INFO", "WARNING"], lvl)) zarr.set_format("%(message)s") class ZarrFormat(str, Enum): v2 = "v2" v3 = "v3" class ZarrFormatV3(str, Enum): """Limit CLI choice to only v3""" v3 = "v3" @app.command() # type: ignore[misc] def migrate( zarr_format: Annotated[ ZarrFormatV3, typer.Argument( help="Zarr format to migrate to. Currently only 'v3' is supported.", ), ], input_store: Annotated[ str, typer.Argument( help=( "Input Zarr to migrate - should be a store, path to directory in file system or name of zip file " "e.g. 'data/example-1.zarr', 's3://example-bucket/example'..." ) ), ], output_store: Annotated[ str | None, typer.Argument( help=( "Output location to write generated metadata (no array data will be copied). If not provided, " "metadata will be written to input_store. Should be a store, path to directory in file system " "or name of zip file e.g. 'data/example-1.zarr', 's3://example-bucket/example'..." ) ), ] = None, dry_run: Annotated[ bool, typer.Option( help="Enable a dry-run: files that would be converted are logged, but no new files are created or changed." ), ] = False, overwrite: Annotated[ bool, typer.Option( help="Remove any existing v3 metadata at the output location, before migration starts." ), ] = False, force: Annotated[ bool, typer.Option( help=( "Only used when --overwrite is given. Allows v3 metadata to be removed when no valid " "v2 metadata exists at the output location." ) ), ] = False, remove_v2_metadata: Annotated[ bool, typer.Option( help="Remove v2 metadata (if any) from the output location, after migration is complete." ), ] = False, ) -> None: """Migrate all v2 metadata in a zarr hierarchy to v3. This will create a zarr.json file for each level (every group / array). v2 files (.zarray, .zattrs etc.) will be left as-is. """ if dry_run: _set_logging_level(verbose=True) logger.info( "Dry run enabled - no new files will be created or changed. Log of files that would be created on a real run:" ) input_zarr_store = sync(make_store(input_store, mode="r+")) if output_store is not None: output_zarr_store = sync(make_store(output_store, mode="w-")) write_store = output_zarr_store else: output_zarr_store = None write_store = input_zarr_store if overwrite: sync(migrate_metadata.remove_metadata(write_store, 3, force=force, dry_run=dry_run)) migrate_metadata.migrate_v2_to_v3( input_store=input_zarr_store, output_store=output_zarr_store, dry_run=dry_run ) if remove_v2_metadata: # There should always be valid v3 metadata at the output location after migration, so force=False sync(migrate_metadata.remove_metadata(write_store, 2, force=False, dry_run=dry_run)) @app.command() # type: ignore[misc] def remove_metadata( zarr_format: Annotated[ ZarrFormat, typer.Argument(help="Which format's metadata to remove - v2 or v3."), ], store: Annotated[ str, typer.Argument( help="Store or path to directory in file system or name of zip file e.g. 'data/example-1.zarr', 's3://example-bucket/example'..." ), ], force: Annotated[ bool, typer.Option( help=( "Allow metadata to be deleted when no valid alternative exists e.g. allow deletion of v2 metadata, " "when no v3 metadata is present." ) ), ] = False, dry_run: Annotated[ bool, typer.Option( help="Enable a dry-run: files that would be deleted are logged, but no files are removed or changed." ), ] = False, ) -> None: """Remove all v2 (.zarray, .zattrs, .zgroup, .zmetadata) or v3 (zarr.json) metadata files from the given Zarr. Note - this will remove metadata files at all levels of the hierarchy (every group and array). """ if dry_run: _set_logging_level(verbose=True) logger.info( "Dry run enabled - no files will be deleted or changed. Log of files that would be deleted on a real run:" ) input_zarr_store = sync(make_store(store, mode="r+")) sync( migrate_metadata.remove_metadata( store=input_zarr_store, zarr_format=cast(Literal[2, 3], int(zarr_format[1:])), force=force, dry_run=dry_run, ) ) @app.callback() # type: ignore[misc] def main( verbose: Annotated[ bool, typer.Option( help="enable verbose logging - will print info about metadata files being deleted / saved." ), ] = False, ) -> None: """ See available commands below - access help for individual commands with zarr COMMAND --help. """ _set_logging_level(verbose=verbose) if __name__ == "__main__": app() zarr-python-3.1.5/src/zarr/_compat.py000066400000000000000000000045231511007055700175630ustar00rootroot00000000000000import warnings from collections.abc import Callable from functools import wraps from inspect import Parameter, signature from typing import Any, TypeVar from zarr.errors import ZarrFutureWarning T = TypeVar("T") # Based off https://github.com/scikit-learn/scikit-learn/blob/e87b32a81c70abed8f2e97483758eb64df8255e9/sklearn/utils/validation.py#L63 def _deprecate_positional_args( func: Callable[..., T] | None = None, *, version: str = "3.1.0" ) -> Callable[..., T]: """Decorator for methods that issues warnings for positional arguments. Using the keyword-only argument syntax in pep 3102, arguments after the * will issue a warning when passed as a positional argument. Parameters ---------- func : callable, default=None Function to check arguments on. version : callable, default="3.1.0" The version when positional arguments will result in error. """ def _inner_deprecate_positional_args(f: Callable[..., T]) -> Callable[..., T]: sig = signature(f) kwonly_args = [] all_args = [] for name, param in sig.parameters.items(): if param.kind == Parameter.POSITIONAL_OR_KEYWORD: all_args.append(name) elif param.kind == Parameter.KEYWORD_ONLY: kwonly_args.append(name) @wraps(f) def inner_f(*args: Any, **kwargs: Any) -> T: extra_args = len(args) - len(all_args) if extra_args <= 0: return f(*args, **kwargs) # extra_args > 0 args_msg = [ f"{name}={arg}" for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:], strict=False) ] formatted_args_msg = ", ".join(args_msg) warnings.warn( ( f"Pass {formatted_args_msg} as keyword args. From version " f"{version} passing these as positional arguments " "will result in an error" ), ZarrFutureWarning, stacklevel=2, ) kwargs.update(zip(sig.parameters, args, strict=False)) return f(**kwargs) return inner_f if func is not None: return _inner_deprecate_positional_args(func) return _inner_deprecate_positional_args # type: ignore[return-value] zarr-python-3.1.5/src/zarr/abc/000077500000000000000000000000001511007055700163105ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/abc/__init__.py000066400000000000000000000000001511007055700204070ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/abc/buffer.py000066400000000000000000000003041511007055700201300ustar00rootroot00000000000000from zarr.core.buffer.core import ArrayLike, Buffer, BufferPrototype, NDArrayLike, NDBuffer __all__ = [ "ArrayLike", "Buffer", "BufferPrototype", "NDArrayLike", "NDBuffer", ] zarr-python-3.1.5/src/zarr/abc/codec.py000066400000000000000000000365071511007055700177520ustar00rootroot00000000000000from __future__ import annotations from abc import abstractmethod from collections.abc import Mapping from typing import TYPE_CHECKING, Generic, TypeGuard, TypeVar from typing_extensions import ReadOnly, TypedDict from zarr.abc.metadata import Metadata from zarr.core.buffer import Buffer, NDBuffer from zarr.core.common import NamedConfig, concurrent_map from zarr.core.config import config if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Iterable from typing import Self from zarr.abc.store import ByteGetter, ByteSetter, Store from zarr.core.array_spec import ArraySpec from zarr.core.chunk_grids import ChunkGrid from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType from zarr.core.indexing import SelectorTuple from zarr.core.metadata import ArrayMetadata __all__ = [ "ArrayArrayCodec", "ArrayBytesCodec", "ArrayBytesCodecPartialDecodeMixin", "ArrayBytesCodecPartialEncodeMixin", "BaseCodec", "BytesBytesCodec", "CodecInput", "CodecOutput", "CodecPipeline", ] CodecInput = TypeVar("CodecInput", bound=NDBuffer | Buffer) CodecOutput = TypeVar("CodecOutput", bound=NDBuffer | Buffer) TName = TypeVar("TName", bound=str, covariant=True) class CodecJSON_V2(TypedDict, Generic[TName]): """The JSON representation of a codec for Zarr V2""" id: ReadOnly[TName] def _check_codecjson_v2(data: object) -> TypeGuard[CodecJSON_V2[str]]: return isinstance(data, Mapping) and "id" in data and isinstance(data["id"], str) CodecJSON_V3 = str | NamedConfig[str, Mapping[str, object]] """The JSON representation of a codec for Zarr V3.""" # The widest type we will *accept* for a codec JSON # This covers v2 and v3 CodecJSON = str | Mapping[str, object] """The widest type of JSON-like input that could specify a codec.""" class BaseCodec(Metadata, Generic[CodecInput, CodecOutput]): """Generic base class for codecs. Codecs can be registered via zarr.codecs.registry. Warnings -------- This class is not intended to be directly, please use ArrayArrayCodec, ArrayBytesCodec or BytesBytesCodec for subclassing. """ is_fixed_size: bool @abstractmethod def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: """Given an input byte length, this method returns the output byte length. Raises a NotImplementedError for codecs with variable-sized outputs (e.g. compressors). Parameters ---------- input_byte_length : int chunk_spec : ArraySpec Returns ------- int """ ... def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: """Computed the spec of the chunk after it has been encoded by the codec. This is important for codecs that change the shape, data type or fill value of a chunk. The spec will then be used for subsequent codecs in the pipeline. Parameters ---------- chunk_spec : ArraySpec Returns ------- ArraySpec """ return chunk_spec def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: """Fills in codec configuration parameters that can be automatically inferred from the array metadata. Parameters ---------- array_spec : ArraySpec Returns ------- Self """ return self def validate( self, *, shape: tuple[int, ...], dtype: ZDType[TBaseDType, TBaseScalar], chunk_grid: ChunkGrid, ) -> None: """Validates that the codec configuration is compatible with the array metadata. Raises errors when the codec configuration is not compatible. Parameters ---------- shape : tuple[int, ...] The array shape dtype : np.dtype[Any] The array data type chunk_grid : ChunkGrid The array chunk grid """ async def _decode_single(self, chunk_data: CodecOutput, chunk_spec: ArraySpec) -> CodecInput: raise NotImplementedError # pragma: no cover async def decode( self, chunks_and_specs: Iterable[tuple[CodecOutput | None, ArraySpec]], ) -> Iterable[CodecInput | None]: """Decodes a batch of chunks. Chunks can be None in which case they are ignored by the codec. Parameters ---------- chunks_and_specs : Iterable[tuple[CodecOutput | None, ArraySpec]] Ordered set of encoded chunks with their accompanying chunk spec. Returns ------- Iterable[CodecInput | None] """ return await _batching_helper(self._decode_single, chunks_and_specs) async def _encode_single( self, chunk_data: CodecInput, chunk_spec: ArraySpec ) -> CodecOutput | None: raise NotImplementedError # pragma: no cover async def encode( self, chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]], ) -> Iterable[CodecOutput | None]: """Encodes a batch of chunks. Chunks can be None in which case they are ignored by the codec. Parameters ---------- chunks_and_specs : Iterable[tuple[CodecInput | None, ArraySpec]] Ordered set of to-be-encoded chunks with their accompanying chunk spec. Returns ------- Iterable[CodecOutput | None] """ return await _batching_helper(self._encode_single, chunks_and_specs) class ArrayArrayCodec(BaseCodec[NDBuffer, NDBuffer]): """Base class for array-to-array codecs.""" class ArrayBytesCodec(BaseCodec[NDBuffer, Buffer]): """Base class for array-to-bytes codecs.""" class BytesBytesCodec(BaseCodec[Buffer, Buffer]): """Base class for bytes-to-bytes codecs.""" Codec = ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec class ArrayBytesCodecPartialDecodeMixin: """Mixin for array-to-bytes codecs that implement partial decoding.""" async def _decode_partial_single( self, byte_getter: ByteGetter, selection: SelectorTuple, chunk_spec: ArraySpec ) -> NDBuffer | None: raise NotImplementedError async def decode_partial( self, batch_info: Iterable[tuple[ByteGetter, SelectorTuple, ArraySpec]], ) -> Iterable[NDBuffer | None]: """Partially decodes a batch of chunks. This method determines parts of a chunk from the slice selection, fetches these parts from the store (via ByteGetter) and decodes them. Parameters ---------- batch_info : Iterable[tuple[ByteGetter, SelectorTuple, ArraySpec]] Ordered set of information about slices of encoded chunks. The slice selection determines which parts of the chunk will be fetched. The ByteGetter is used to fetch the necessary bytes. The chunk spec contains information about the construction of an array from the bytes. Returns ------- Iterable[NDBuffer | None] """ return await concurrent_map( list(batch_info), self._decode_partial_single, config.get("async.concurrency"), ) class ArrayBytesCodecPartialEncodeMixin: """Mixin for array-to-bytes codecs that implement partial encoding.""" async def _encode_partial_single( self, byte_setter: ByteSetter, chunk_array: NDBuffer, selection: SelectorTuple, chunk_spec: ArraySpec, ) -> None: raise NotImplementedError # pragma: no cover async def encode_partial( self, batch_info: Iterable[tuple[ByteSetter, NDBuffer, SelectorTuple, ArraySpec]], ) -> None: """Partially encodes a batch of chunks. This method determines parts of a chunk from the slice selection, encodes them and writes these parts to the store (via ByteSetter). If merging with existing chunk data in the store is necessary, this method will read from the store first and perform the merge. Parameters ---------- batch_info : Iterable[tuple[ByteSetter, NDBuffer, SelectorTuple, ArraySpec]] Ordered set of information about slices of to-be-encoded chunks. The slice selection determines which parts of the chunk will be encoded. The ByteSetter is used to write the necessary bytes and fetch bytes for existing chunk data. The chunk spec contains information about the chunk. """ await concurrent_map( list(batch_info), self._encode_partial_single, config.get("async.concurrency"), ) class CodecPipeline: """Base class for implementing CodecPipeline. A CodecPipeline implements the read and write paths for chunk data. On the read path, it is responsible for fetching chunks from a store (via ByteGetter), decoding them and assembling an output array. On the write path, it encodes the chunks and writes them to a store (via ByteSetter).""" @abstractmethod def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: """Fills in codec configuration parameters that can be automatically inferred from the array metadata. Parameters ---------- array_spec : ArraySpec Returns ------- Self """ ... @classmethod @abstractmethod def from_codecs(cls, codecs: Iterable[Codec]) -> Self: """Creates a codec pipeline from an iterable of codecs. Parameters ---------- codecs : Iterable[Codec] Returns ------- Self """ ... @classmethod def from_array_metadata_and_store(cls, array_metadata: ArrayMetadata, store: Store) -> Self: """Creates a codec pipeline from array metadata and a store path. Raises NotImplementedError by default, indicating the CodecPipeline must be created with from_codecs instead. Parameters ---------- array_metadata : ArrayMetadata store : Store Returns ------- Self """ raise NotImplementedError( f"'{type(cls).__name__}' does not implement CodecPipeline.from_array_metadata_and_store." ) @property @abstractmethod def supports_partial_decode(self) -> bool: ... @property @abstractmethod def supports_partial_encode(self) -> bool: ... @abstractmethod def validate( self, *, shape: tuple[int, ...], dtype: ZDType[TBaseDType, TBaseScalar], chunk_grid: ChunkGrid, ) -> None: """Validates that all codec configurations are compatible with the array metadata. Raises errors when a codec configuration is not compatible. Parameters ---------- shape : tuple[int, ...] The array shape dtype : np.dtype[Any] The array data type chunk_grid : ChunkGrid The array chunk grid """ ... @abstractmethod def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: """Given an input byte length, this method returns the output byte length. Raises a NotImplementedError for codecs with variable-sized outputs (e.g. compressors). Parameters ---------- byte_length : int array_spec : ArraySpec Returns ------- int """ ... @abstractmethod async def decode( self, chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], ) -> Iterable[NDBuffer | None]: """Decodes a batch of chunks. Chunks can be None in which case they are ignored by the codec. Parameters ---------- chunk_bytes_and_specs : Iterable[tuple[Buffer | None, ArraySpec]] Ordered set of encoded chunks with their accompanying chunk spec. Returns ------- Iterable[NDBuffer | None] """ ... @abstractmethod async def encode( self, chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]], ) -> Iterable[Buffer | None]: """Encodes a batch of chunks. Chunks can be None in which case they are ignored by the codec. Parameters ---------- chunk_arrays_and_specs : Iterable[tuple[NDBuffer | None, ArraySpec]] Ordered set of to-be-encoded chunks with their accompanying chunk spec. Returns ------- Iterable[Buffer | None] """ ... @abstractmethod async def read( self, batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], out: NDBuffer, drop_axes: tuple[int, ...] = (), ) -> None: """Reads chunk data from the store, decodes it and writes it into an output array. Partial decoding may be utilized if the codecs and stores support it. Parameters ---------- batch_info : Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]] Ordered set of information about the chunks. The first slice selection determines which parts of the chunk will be fetched. The second slice selection determines where in the output array the chunk data will be written. The ByteGetter is used to fetch the necessary bytes. The chunk spec contains information about the construction of an array from the bytes. If the Store returns ``None`` for a chunk, then the chunk was not written and the implementation must set the values of that chunk (or ``out``) to the fill value for the array. out : NDBuffer """ ... @abstractmethod async def write( self, batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], value: NDBuffer, drop_axes: tuple[int, ...] = (), ) -> None: """Encodes chunk data and writes it to the store. Merges with existing chunk data by reading first, if necessary. Partial encoding may be utilized if the codecs and stores support it. Parameters ---------- batch_info : Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]] Ordered set of information about the chunks. The first slice selection determines which parts of the chunk will be encoded. The second slice selection determines where in the value array the chunk data is located. The ByteSetter is used to fetch and write the necessary bytes. The chunk spec contains information about the chunk. value : NDBuffer """ ... async def _batching_helper( func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput | None]], batch_info: Iterable[tuple[CodecInput | None, ArraySpec]], ) -> list[CodecOutput | None]: return await concurrent_map( list(batch_info), _noop_for_none(func), config.get("async.concurrency"), ) def _noop_for_none( func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput | None]], ) -> Callable[[CodecInput | None, ArraySpec], Awaitable[CodecOutput | None]]: async def wrap(chunk: CodecInput | None, chunk_spec: ArraySpec) -> CodecOutput | None: if chunk is None: return None return await func(chunk, chunk_spec) return wrap zarr-python-3.1.5/src/zarr/abc/metadata.py000066400000000000000000000025751511007055700204530ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Sequence from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Self from zarr.core.common import JSON from dataclasses import dataclass, fields __all__ = ["Metadata"] @dataclass(frozen=True) class Metadata: def to_dict(self) -> dict[str, JSON]: """ Recursively serialize this model to a dictionary. This method inspects the fields of self and calls `x.to_dict()` for any fields that are instances of `Metadata`. Sequences of `Metadata` are similarly recursed into, and the output of that recursion is collected in a list. """ out_dict = {} for field in fields(self): key = field.name value = getattr(self, key) if isinstance(value, Metadata): out_dict[field.name] = getattr(self, field.name).to_dict() elif isinstance(value, str): out_dict[key] = value elif isinstance(value, Sequence): out_dict[key] = tuple(v.to_dict() if isinstance(v, Metadata) else v for v in value) else: out_dict[key] = value return out_dict @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: """ Create an instance of the model from a dictionary """ return cls(**data) zarr-python-3.1.5/src/zarr/abc/numcodec.py000066400000000000000000000053761511007055700204720ustar00rootroot00000000000000from typing import Any, Self, TypeGuard from typing_extensions import Protocol class Numcodec(Protocol): """ A protocol that models the ``numcodecs.abc.Codec`` interface. This protocol should be considered experimental. Expect the type annotations for ``buf`` and ``out`` to narrow in the future. """ codec_id: str def encode(self, buf: Any) -> Any: """Encode data from ``buf``. Parameters ---------- buf : Any Data to be encoded. Returns ------- enc: Any Encoded data. """ ... def decode(self, buf: Any, out: Any | None = None) -> Any: """ Decode data in ``buf``. Parameters ---------- buf : Any Encoded data. out : Any Writeable buffer to store decoded data. If provided, this buffer must be exactly the right size to store the decoded data. Returns ------- dec : Any Decoded data. """ ... def get_config(self) -> Any: """ Return a JSON-serializable configuration dictionary for this codec. Must include an ``'id'`` field with the codec identifier. """ ... @classmethod def from_config(cls, config: Any) -> Self: """ Instantiate a codec from a configuration dictionary. Parameters ---------- config : Any A configuration dictionary for this codec. """ ... def _is_numcodec_cls(obj: object) -> TypeGuard[type[Numcodec]]: """ Check if the given object is a class implements the Numcodec protocol. The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method members (i.e., attributes), so we use this function to manually check for the presence of the required attributes and methods on a given object. """ return ( isinstance(obj, type) and hasattr(obj, "codec_id") and isinstance(obj.codec_id, str) and hasattr(obj, "encode") and callable(obj.encode) and hasattr(obj, "decode") and callable(obj.decode) and hasattr(obj, "get_config") and callable(obj.get_config) and hasattr(obj, "from_config") and callable(obj.from_config) ) def _is_numcodec(obj: object) -> TypeGuard[Numcodec]: """ Check if the given object implements the Numcodec protocol. The @runtime_checkable decorator does not allow issubclass checks for protocols with non-method members (i.e., attributes), so we use this function to manually check for the presence of the required attributes and methods on a given object. """ return _is_numcodec_cls(type(obj)) zarr-python-3.1.5/src/zarr/abc/store.py000066400000000000000000000360601511007055700200230ustar00rootroot00000000000000from __future__ import annotations from abc import ABC, abstractmethod from asyncio import gather from dataclasses import dataclass from itertools import starmap from typing import TYPE_CHECKING, Literal, Protocol, runtime_checkable if TYPE_CHECKING: from collections.abc import AsyncGenerator, AsyncIterator, Iterable from types import TracebackType from typing import Any, Self, TypeAlias from zarr.core.buffer import Buffer, BufferPrototype __all__ = ["ByteGetter", "ByteSetter", "Store", "set_or_delete"] @dataclass class RangeByteRequest: """Request a specific byte range""" start: int """The start of the byte range request (inclusive).""" end: int """The end of the byte range request (exclusive).""" @dataclass class OffsetByteRequest: """Request all bytes starting from a given byte offset""" offset: int """The byte offset for the offset range request.""" @dataclass class SuffixByteRequest: """Request up to the last `n` bytes""" suffix: int """The number of bytes from the suffix to request.""" ByteRequest: TypeAlias = RangeByteRequest | OffsetByteRequest | SuffixByteRequest class Store(ABC): """ Abstract base class for Zarr stores. """ _read_only: bool _is_open: bool def __init__(self, *, read_only: bool = False) -> None: self._is_open = False self._read_only = read_only @classmethod async def open(cls, *args: Any, **kwargs: Any) -> Self: """ Create and open the store. Parameters ---------- *args : Any Positional arguments to pass to the store constructor. **kwargs : Any Keyword arguments to pass to the store constructor. Returns ------- Store The opened store instance. """ store = cls(*args, **kwargs) await store._open() return store def with_read_only(self, read_only: bool = False) -> Store: """ Return a new store with a new read_only setting. The new store points to the same location with the specified new read_only state. The returned Store is not automatically opened, and this store is not automatically closed. Parameters ---------- read_only If True, the store will be created in read-only mode. Defaults to False. Returns ------- A new store of the same type with the new read only attribute. """ raise NotImplementedError( f"with_read_only is not implemented for the {type(self)} store type." ) def __enter__(self) -> Self: """Enter a context manager that will close the store upon exiting.""" return self def __exit__( self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None, ) -> None: """Close the store.""" self.close() async def _open(self) -> None: """ Open the store. Raises ------ ValueError If the store is already open. """ if self._is_open: raise ValueError("store is already open") self._is_open = True async def _ensure_open(self) -> None: """Open the store if it is not already open.""" if not self._is_open: await self._open() async def is_empty(self, prefix: str) -> bool: """ Check if the directory is empty. Parameters ---------- prefix : str Prefix of keys to check. Returns ------- bool True if the store is empty, False otherwise. """ if not self.supports_listing: raise NotImplementedError if prefix != "" and not prefix.endswith("/"): prefix += "/" async for _ in self.list_prefix(prefix): return False return True async def clear(self) -> None: """ Clear the store. Remove all keys and values from the store. """ if not self.supports_deletes: raise NotImplementedError if not self.supports_listing: raise NotImplementedError self._check_writable() await self.delete_dir("") @property def read_only(self) -> bool: """Is the store read-only?""" return self._read_only def _check_writable(self) -> None: """Raise an exception if the store is not writable.""" if self.read_only: raise ValueError("store was opened in read-only mode and does not support writing") @abstractmethod def __eq__(self, value: object) -> bool: """Equality comparison.""" ... @abstractmethod async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None, ) -> Buffer | None: """Retrieve the value associated with a given key. Parameters ---------- key : str prototype : BufferPrototype The prototype of the output buffer. Stores may support a default buffer prototype. byte_range : ByteRequest, optional ByteRequest may be one of the following. If not provided, all data associated with the key is retrieved. - RangeByteRequest(int, int): Request a specific range of bytes in the form (start, end). The end is exclusive. If the given range is zero-length or starts after the end of the object, an error will be returned. Additionally, if the range ends after the end of the object, the entire remainder of the object will be returned. Otherwise, the exact requested range will be returned. - OffsetByteRequest(int): Request all bytes starting from a given byte offset. This is equivalent to bytes={int}- as an HTTP header. - SuffixByteRequest(int): Request the last int bytes. Note that here, int is the size of the request, not the byte offset. This is equivalent to bytes=-{int} as an HTTP header. Returns ------- Buffer """ ... @abstractmethod async def get_partial_values( self, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: """Retrieve possibly partial values from given key_ranges. Parameters ---------- prototype : BufferPrototype The prototype of the output buffer. Stores may support a default buffer prototype. key_ranges : Iterable[tuple[str, tuple[int | None, int | None]]] Ordered set of key, range pairs, a key may occur multiple times with different ranges Returns ------- list of values, in the order of the key_ranges, may contain null/none for missing keys """ ... @abstractmethod async def exists(self, key: str) -> bool: """Check if a key exists in the store. Parameters ---------- key : str Returns ------- bool """ ... @property @abstractmethod def supports_writes(self) -> bool: """Does the store support writes?""" ... @abstractmethod async def set(self, key: str, value: Buffer) -> None: """Store a (key, value) pair. Parameters ---------- key : str value : Buffer """ ... async def set_if_not_exists(self, key: str, value: Buffer) -> None: """ Store a key to ``value`` if the key is not already present. Parameters ---------- key : str value : Buffer """ # Note for implementers: the default implementation provided here # is not safe for concurrent writers. There's a race condition between # the `exists` check and the `set` where another writer could set some # value at `key` or delete `key`. if not await self.exists(key): await self.set(key, value) async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None: """ Insert multiple (key, value) pairs into storage. """ await gather(*starmap(self.set, values)) @property def supports_consolidated_metadata(self) -> bool: """ Does the store support consolidated metadata?. If it doesn't an error will be raised on requests to consolidate the metadata. Returning `False` can be useful for stores which implement their own consolidation mechanism outside of the zarr-python implementation. """ return True @property @abstractmethod def supports_deletes(self) -> bool: """Does the store support deletes?""" ... @abstractmethod async def delete(self, key: str) -> None: """Remove a key from the store Parameters ---------- key : str """ ... @property def supports_partial_writes(self) -> Literal[False]: """Does the store support partial writes? Partial writes are no longer used by Zarr, so this is always false. """ return False @property @abstractmethod def supports_listing(self) -> bool: """Does the store support listing?""" ... @abstractmethod def list(self) -> AsyncIterator[str]: """Retrieve all keys in the store. Returns ------- AsyncIterator[str] """ # This method should be async, like overridden methods in child classes. # However, that's not straightforward: # https://stackoverflow.com/questions/68905848 @abstractmethod def list_prefix(self, prefix: str) -> AsyncIterator[str]: """ Retrieve all keys in the store that begin with a given prefix. Keys are returned relative to the root of the store. Parameters ---------- prefix : str Returns ------- AsyncIterator[str] """ # This method should be async, like overridden methods in child classes. # However, that's not straightforward: # https://stackoverflow.com/questions/68905848 @abstractmethod def list_dir(self, prefix: str) -> AsyncIterator[str]: """ Retrieve all keys and prefixes with a given prefix and which do not contain the character “/” after the given prefix. Parameters ---------- prefix : str Returns ------- AsyncIterator[str] """ # This method should be async, like overridden methods in child classes. # However, that's not straightforward: # https://stackoverflow.com/questions/68905848 async def delete_dir(self, prefix: str) -> None: """ Remove all keys and prefixes in the store that begin with a given prefix. """ if not self.supports_deletes: raise NotImplementedError if not self.supports_listing: raise NotImplementedError self._check_writable() if prefix != "" and not prefix.endswith("/"): prefix += "/" async for key in self.list_prefix(prefix): await self.delete(key) def close(self) -> None: """Close the store.""" self._is_open = False async def _get_many( self, requests: Iterable[tuple[str, BufferPrototype, ByteRequest | None]] ) -> AsyncGenerator[tuple[str, Buffer | None], None]: """ Retrieve a collection of objects from storage. In general this method does not guarantee that objects will be retrieved in the order in which they were requested, so this method yields tuple[str, Buffer | None] instead of just Buffer | None """ for req in requests: yield (req[0], await self.get(*req)) async def getsize(self, key: str) -> int: """ Return the size, in bytes, of a value in a Store. Parameters ---------- key : str Returns ------- nbytes : int The size of the value (in bytes). Raises ------ FileNotFoundError When the given key does not exist in the store. """ # Note to implementers: this default implementation is very inefficient since # it requires reading the entire object. Many systems will have ways to get the # size of an object without reading it. # avoid circular import from zarr.core.buffer.core import default_buffer_prototype value = await self.get(key, prototype=default_buffer_prototype()) if value is None: raise FileNotFoundError(key) return len(value) async def getsize_prefix(self, prefix: str) -> int: """ Return the size, in bytes, of all values under a prefix. Parameters ---------- prefix : str The prefix of the directory to measure. Returns ------- nbytes : int The sum of the sizes of the values in the directory (in bytes). See Also -------- zarr.Array.nbytes_stored Store.getsize Notes ----- ``getsize_prefix`` is just provided as a potentially faster alternative to listing all the keys under a prefix calling [`Store.getsize`][zarr.abc.store.Store.getsize] on each. In general, ``prefix`` should be the path of an Array or Group in the Store. Implementations may differ on the behavior when some other ``prefix`` is provided. """ # TODO: Overlap listing keys with getsize calls. # Currently, we load the list of keys into memory and only then move # on to getting sizes. Ideally we would overlap those two, which should # improve tail latency and might reduce memory pressure (since not all keys # would be in memory at once). # avoid circular import from zarr.core.common import concurrent_map from zarr.core.config import config keys = [(x,) async for x in self.list_prefix(prefix)] limit = config.get("async.concurrency") sizes = await concurrent_map(keys, self.getsize, limit=limit) return sum(sizes) @runtime_checkable class ByteGetter(Protocol): async def get( self, prototype: BufferPrototype, byte_range: ByteRequest | None = None ) -> Buffer | None: ... @runtime_checkable class ByteSetter(Protocol): async def get( self, prototype: BufferPrototype, byte_range: ByteRequest | None = None ) -> Buffer | None: ... async def set(self, value: Buffer) -> None: ... async def delete(self) -> None: ... async def set_if_not_exists(self, default: Buffer) -> None: ... async def set_or_delete(byte_setter: ByteSetter, value: Buffer | None) -> None: """Set or delete a value in a byte setter Parameters ---------- byte_setter : ByteSetter value : Buffer | None Notes ----- If value is None, the key will be deleted. """ if value is None: await byte_setter.delete() else: await byte_setter.set(value) zarr-python-3.1.5/src/zarr/api/000077500000000000000000000000001511007055700163345ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/api/__init__.py000066400000000000000000000000001511007055700204330ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/api/asynchronous.py000066400000000000000000001400641511007055700214460ustar00rootroot00000000000000from __future__ import annotations import asyncio import dataclasses import warnings from typing import TYPE_CHECKING, Any, Literal, NotRequired, TypeAlias, TypedDict, cast import numpy as np import numpy.typing as npt from typing_extensions import deprecated from zarr.abc.store import Store from zarr.core.array import ( DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike, create_array, from_array, get_array_metadata, ) from zarr.core.array_spec import ArrayConfigLike, parse_array_config from zarr.core.buffer import NDArrayLike from zarr.core.common import ( JSON, AccessModeLiteral, DimensionNames, MemoryOrder, ZarrFormat, _default_zarr_format, _warn_write_empty_chunks_kwarg, ) from zarr.core.dtype import ZDTypeLike, get_data_type_from_native_dtype from zarr.core.group import ( AsyncGroup, ConsolidatedMetadata, GroupMetadata, create_hierarchy, ) from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata from zarr.errors import ( ArrayNotFoundError, GroupNotFoundError, NodeTypeValidationError, ZarrDeprecationWarning, ZarrRuntimeWarning, ZarrUserWarning, ) from zarr.storage import StorePath from zarr.storage._common import make_store_path if TYPE_CHECKING: from collections.abc import Iterable from zarr.abc.codec import Codec from zarr.abc.numcodec import Numcodec from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_key_encodings import ChunkKeyEncoding from zarr.core.metadata.v2 import CompressorLikev2 from zarr.storage import StoreLike from zarr.types import AnyArray, AnyAsyncArray # TODO: this type could use some more thought ArrayLike: TypeAlias = AnyAsyncArray | AnyArray | npt.NDArray[Any] PathLike = str __all__ = [ "array", "consolidate_metadata", "copy", "copy_all", "copy_store", "create", "create_array", "create_hierarchy", "empty", "empty_like", "from_array", "full", "full_like", "group", "load", "ones", "ones_like", "open", "open_array", "open_consolidated", "open_group", "open_like", "save", "save_array", "save_group", "tree", "zeros", "zeros_like", ] _READ_MODES: tuple[AccessModeLiteral, ...] = ("r", "r+", "a") _CREATE_MODES: tuple[AccessModeLiteral, ...] = ("a", "w", "w-") _OVERWRITE_MODES: tuple[AccessModeLiteral, ...] = ("w",) def _infer_overwrite(mode: AccessModeLiteral) -> bool: """ Check that an ``AccessModeLiteral`` is compatible with overwriting an existing Zarr node. """ return mode in _OVERWRITE_MODES def _get_shape_chunks(a: ArrayLike | Any) -> tuple[tuple[int, ...] | None, tuple[int, ...] | None]: """Helper function to get the shape and chunks from an array-like object""" shape = None chunks = None if hasattr(a, "shape") and isinstance(a.shape, tuple): shape = a.shape if hasattr(a, "chunks") and isinstance(a.chunks, tuple) and (len(a.chunks) == len(a.shape)): chunks = a.chunks elif hasattr(a, "chunklen"): # bcolz carray chunks = (a.chunklen,) + a.shape[1:] return shape, chunks class _LikeArgs(TypedDict): shape: NotRequired[tuple[int, ...]] chunks: NotRequired[tuple[int, ...]] dtype: NotRequired[np.dtype[np.generic]] order: NotRequired[Literal["C", "F"]] filters: NotRequired[tuple[Numcodec, ...] | None] compressor: NotRequired[CompressorLikev2] codecs: NotRequired[tuple[Codec, ...]] def _like_args(a: ArrayLike) -> _LikeArgs: """Set default values for shape and chunks if they are not present in the array-like object""" new: _LikeArgs = {} shape, chunks = _get_shape_chunks(a) if shape is not None: new["shape"] = shape if chunks is not None: new["chunks"] = chunks if hasattr(a, "dtype"): new["dtype"] = a.dtype if isinstance(a, AsyncArray | Array): if isinstance(a.metadata, ArrayV2Metadata): new["order"] = a.order new["compressor"] = a.metadata.compressor new["filters"] = a.metadata.filters else: # TODO: Remove type: ignore statement when type inference improves. # mypy cannot correctly infer the type of a.metadata here for some reason. new["codecs"] = a.metadata.codecs else: # TODO: set default values compressor/codecs # to do this, we may need to evaluate if this is a v2 or v3 array # new["compressor"] = "default" pass return new def _handle_zarr_version_or_format( *, zarr_version: ZarrFormat | None, zarr_format: ZarrFormat | None ) -> ZarrFormat | None: """Handle the deprecated zarr_version kwarg and return zarr_format""" if zarr_format is not None and zarr_version is not None and zarr_format != zarr_version: raise ValueError( f"zarr_format {zarr_format} does not match zarr_version {zarr_version}, please only set one" ) if zarr_version is not None: warnings.warn( "zarr_version is deprecated, use zarr_format", ZarrDeprecationWarning, stacklevel=2 ) return zarr_version return zarr_format async def consolidate_metadata( store: StoreLike, path: str | None = None, zarr_format: ZarrFormat | None = None, ) -> AsyncGroup: """ Consolidate the metadata of all nodes in a hierarchy. Upon completion, the metadata of the root node in the Zarr hierarchy will be updated to include all the metadata of child nodes. For Stores that do not support consolidated metadata, this operation raises a ``TypeError``. Parameters ---------- store : StoreLike The store-like object whose metadata you wish to consolidate. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. path : str, optional A path to a group in the store to consolidate at. Only children below that group will be consolidated. By default, the root node is used so all the metadata in the store is consolidated. zarr_format : {2, 3, None}, optional The zarr format of the hierarchy. By default the zarr format is inferred. Returns ------- group: AsyncGroup The group, with the ``consolidated_metadata`` field set to include the metadata of each child node. If the Store doesn't support consolidated metadata, this function raises a `TypeError`. See ``Store.supports_consolidated_metadata``. """ store_path = await make_store_path(store, path=path) if not store_path.store.supports_consolidated_metadata: store_name = type(store_path.store).__name__ raise TypeError( f"The Zarr Store in use ({store_name}) doesn't support consolidated metadata", ) group = await AsyncGroup.open(store_path, zarr_format=zarr_format, use_consolidated=False) group.store_path.store._check_writable() members_metadata = { k: v.metadata async for k, v in group.members(max_depth=None, use_consolidated_for_children=False) } # While consolidating, we want to be explicit about when child groups # are empty by inserting an empty dict for consolidated_metadata.metadata for k, v in members_metadata.items(): if isinstance(v, GroupMetadata) and v.consolidated_metadata is None: v = dataclasses.replace(v, consolidated_metadata=ConsolidatedMetadata(metadata={})) members_metadata[k] = v if any(m.zarr_format == 3 for m in members_metadata.values()): warnings.warn( "Consolidated metadata is currently not part in the Zarr format 3 specification. It " "may not be supported by other zarr implementations and may change in the future.", category=ZarrUserWarning, stacklevel=1, ) ConsolidatedMetadata._flat_to_nested(members_metadata) consolidated_metadata = ConsolidatedMetadata(metadata=members_metadata) metadata = dataclasses.replace(group.metadata, consolidated_metadata=consolidated_metadata) group = dataclasses.replace( group, metadata=metadata, ) await group._save_metadata() return group async def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: """ Not implemented. """ raise NotImplementedError async def copy_all(*args: Any, **kwargs: Any) -> tuple[int, int, int]: """ Not implemented. """ raise NotImplementedError async def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: """ Not implemented. """ raise NotImplementedError async def load( *, store: StoreLike, path: str | None = None, zarr_format: ZarrFormat | None = None, zarr_version: ZarrFormat | None = None, ) -> NDArrayLikeOrScalar | dict[str, NDArrayLikeOrScalar]: """Load data from an array or group into memory. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. path : str or None, optional The path within the store from which to load. Returns ------- out If the path contains an array, out will be a numpy array. If the path contains a group, out will be a dict-like object where keys are array names and values are numpy arrays. See Also -------- save Notes ----- If loading data from a group of arrays, data will not be immediately loaded into memory. Rather, arrays will be loaded into memory as they are requested. """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) obj = await open(store=store, path=path, zarr_format=zarr_format) if isinstance(obj, AsyncArray): return await obj.getitem(slice(None)) else: raise NotImplementedError("loading groups not yet supported") async def open( *, store: StoreLike | None = None, mode: AccessModeLiteral | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, storage_options: dict[str, Any] | None = None, **kwargs: Any, # TODO: type kwargs as valid args to open_array ) -> AnyAsyncArray | AsyncGroup: """Convenience function to open a group or array using file-mode-like semantics. Parameters ---------- store : StoreLike or None, default=None StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). If the store is read-only, the default is 'r'; otherwise, it is 'a'. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str or None, optional The path within the store to open. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs Additional parameters are passed through to [`zarr.creation.open_array`][] or [`open_group`][zarr.api.asynchronous.open_group]. Returns ------- z : array or group Return type depends on what exists in the given store. """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) if mode is None: if isinstance(store, (Store, StorePath)) and store.read_only: mode = "r" else: mode = "a" store_path = await make_store_path(store, mode=mode, path=path, storage_options=storage_options) # TODO: the mode check below seems wrong! if "shape" not in kwargs and mode in {"a", "r", "r+", "w"}: try: metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) # TODO: remove this cast when we fix typing for array metadata dicts _metadata_dict = cast("ArrayMetadataDict", metadata_dict) # for v2, the above would already have raised an exception if not an array zarr_format = _metadata_dict["zarr_format"] is_v3_array = zarr_format == 3 and _metadata_dict.get("node_type") == "array" if is_v3_array or zarr_format == 2: return AsyncArray( store_path=store_path, metadata=_metadata_dict, config=kwargs.get("config") ) except (AssertionError, FileNotFoundError, NodeTypeValidationError): pass return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) try: return await open_array(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) except (KeyError, NodeTypeValidationError): # KeyError for a missing key # NodeTypeValidationError for failing to parse node metadata as an array when it's # actually a group return await open_group(store=store_path, zarr_format=zarr_format, mode=mode, **kwargs) async def open_consolidated( *args: Any, use_consolidated: Literal[True] = True, **kwargs: Any ) -> AsyncGroup: """ Alias for [`open_group`][zarr.api.asynchronous.open_group] with ``use_consolidated=True``. """ if use_consolidated is not True: raise TypeError( "'use_consolidated' must be 'True' in 'open_consolidated'. Use 'open' with " "'use_consolidated=False' to bypass consolidated metadata." ) return await open_group(*args, use_consolidated=use_consolidated, **kwargs) async def save( store: StoreLike, *args: NDArrayLike, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to save ) -> None: """Convenience function to save an array or group of arrays to the local file system. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. *args : ndarray NumPy arrays with data to save. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str or None, optional The path within the group where the arrays will be saved. **kwargs NumPy arrays with data to save. """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) if len(args) == 0 and len(kwargs) == 0: raise ValueError("at least one array must be provided") if len(args) == 1 and len(kwargs) == 0: await save_array(store, args[0], zarr_format=zarr_format, path=path) else: await save_group(store, *args, zarr_format=zarr_format, path=path, **kwargs) async def save_array( store: StoreLike, arr: NDArrayLike, *, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, storage_options: dict[str, Any] | None = None, **kwargs: Any, # TODO: type kwargs as valid args to create ) -> None: """Convenience function to save a NumPy array to the local file system, following a similar API to the NumPy save() function. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. arr : ndarray NumPy array with data to save. zarr_format : {2, 3, None}, optional The zarr format to use when saving. The default is ``None``, which will use the default Zarr format defined in the global configuration object. path : str or None, optional The path within the store where the array will be saved. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs Passed through to [`create`][zarr.api.asynchronous.create], e.g., compressor. """ zarr_format = ( _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) or _default_zarr_format() ) if not isinstance(arr, NDArrayLike): raise TypeError("arr argument must be numpy or other NDArrayLike array") mode = kwargs.pop("mode", "a") store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) if np.isscalar(arr): arr = np.array(arr) shape = arr.shape chunks = getattr(arr, "chunks", None) # for array-likes with chunks attribute overwrite = kwargs.pop("overwrite", None) or _infer_overwrite(mode) zarr_dtype = get_data_type_from_native_dtype(arr.dtype) new = await AsyncArray._create( store_path, zarr_format=zarr_format, shape=shape, dtype=zarr_dtype, chunks=chunks, overwrite=overwrite, **kwargs, ) await new.setitem(slice(None), arr) async def save_group( store: StoreLike, *args: NDArrayLike, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, storage_options: dict[str, Any] | None = None, **kwargs: NDArrayLike, ) -> None: """Convenience function to save several NumPy arrays to the local file system, following a similar API to the NumPy savez()/savez_compressed() functions. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. *args : ndarray NumPy arrays with data to save. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str or None, optional Path within the store where the group will be saved. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs NumPy arrays with data to save. """ store_path = await make_store_path(store, path=path, mode="w", storage_options=storage_options) zarr_format = ( _handle_zarr_version_or_format( zarr_version=zarr_version, zarr_format=zarr_format, ) or _default_zarr_format() ) for arg in args: if not isinstance(arg, NDArrayLike): raise TypeError( "All arguments must be numpy or other NDArrayLike arrays (except store, path, storage_options, and zarr_format)" ) for k, v in kwargs.items(): if not isinstance(v, NDArrayLike): raise TypeError(f"Keyword argument '{k}' must be a numpy or other NDArrayLike array") if len(args) == 0 and len(kwargs) == 0: raise ValueError("at least one array must be provided") aws = [] for i, arr in enumerate(args): aws.append( save_array( store_path, arr, zarr_format=zarr_format, path=f"arr_{i}", storage_options=storage_options, ) ) for k, arr in kwargs.items(): aws.append(save_array(store_path, arr, zarr_format=zarr_format, path=k)) await asyncio.gather(*aws) @deprecated("Use AsyncGroup.tree instead.", category=ZarrDeprecationWarning) async def tree(grp: AsyncGroup, expand: bool | None = None, level: int | None = None) -> Any: """Provide a rich display of the hierarchy. !!! warning "Deprecated" `zarr.tree()` is deprecated since v3.0.0 and will be removed in a future release. Use `group.tree()` instead. Parameters ---------- grp : Group Zarr or h5py group. expand : bool, optional Only relevant for HTML representation. If True, tree will be fully expanded. level : int, optional Maximum depth to descend into hierarchy. Returns ------- TreeRepr A pretty-printable object displaying the hierarchy. """ return await grp.tree(expand=expand, level=level) async def array(data: npt.ArrayLike | AnyArray, **kwargs: Any) -> AnyAsyncArray: """Create an array filled with `data`. Parameters ---------- data : array_like The data to fill the array with. **kwargs Passed through to [`create`][zarr.api.asynchronous.create]. Returns ------- array : array The new array. """ if isinstance(data, Array): return await from_array(data=data, **kwargs) # ensure data is array-like if not hasattr(data, "shape") or not hasattr(data, "dtype"): data = np.asanyarray(data) # setup dtype kw_dtype = kwargs.get("dtype") if kw_dtype is None and hasattr(data, "dtype"): kwargs["dtype"] = data.dtype else: kwargs["dtype"] = kw_dtype # setup shape and chunks data_shape, data_chunks = _get_shape_chunks(data) kwargs["shape"] = data_shape kw_chunks = kwargs.get("chunks") if kw_chunks is None: kwargs["chunks"] = data_chunks else: kwargs["chunks"] = kw_chunks read_only = kwargs.pop("read_only", False) if read_only: raise ValueError("read_only=True is no longer supported when creating new arrays") # instantiate array z = await create(**kwargs) # fill with data await z.setitem(Ellipsis, data) return z async def group( *, # Note: this is a change from v2 store: StoreLike | None = None, overwrite: bool = False, chunk_store: StoreLike | None = None, # not used cache_attrs: bool | None = None, # not used, default changed synchronizer: Any | None = None, # not used path: str | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used attributes: dict[str, JSON] | None = None, storage_options: dict[str, Any] | None = None, ) -> AsyncGroup: """Create a group. Parameters ---------- store : StoreLike or None, default=None StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. overwrite : bool, optional If True, delete any pre-existing data in `store` at `path` before creating the group. chunk_store : StoreLike or None, default=None Separate storage for chunks. Not implemented. cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. synchronizer : object, optional Array synchronizer. path : str, optional Group path within store. meta_array : array-like, optional An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. zarr_format : {2, 3, None}, optional The zarr format to use when saving. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. Returns ------- g : group The new group. """ mode: AccessModeLiteral if overwrite: mode = "w" else: mode = "a" return await open_group( store=store, mode=mode, chunk_store=chunk_store, cache_attrs=cache_attrs, synchronizer=synchronizer, path=path, zarr_version=zarr_version, zarr_format=zarr_format, meta_array=meta_array, attributes=attributes, storage_options=storage_options, ) async def create_group( *, store: StoreLike, path: str | None = None, overwrite: bool = False, zarr_format: ZarrFormat | None = None, attributes: dict[str, Any] | None = None, storage_options: dict[str, Any] | None = None, ) -> AsyncGroup: """Create a group. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. path : str, optional Group path within store. overwrite : bool, optional If True, pre-existing data at ``path`` will be deleted before creating the group. zarr_format : {2, 3, None}, optional The zarr format to use when saving. If no ``zarr_format`` is provided, the default format will be used. This default can be changed by modifying the value of ``default_zarr_format`` in [`zarr.config`][zarr.config]. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. Returns ------- AsyncGroup The new group. """ if zarr_format is None: zarr_format = _default_zarr_format() mode: Literal["a"] = "a" store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) return await AsyncGroup.from_store( store=store_path, zarr_format=zarr_format, overwrite=overwrite, attributes=attributes, ) async def open_group( store: StoreLike | None = None, *, # Note: this is a change from v2 mode: AccessModeLiteral = "a", cache_attrs: bool | None = None, # not used, default changed synchronizer: Any = None, # not used path: str | None = None, chunk_store: StoreLike | None = None, # not used storage_options: dict[str, Any] | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used attributes: dict[str, JSON] | None = None, use_consolidated: bool | str | None = None, ) -> AsyncGroup: """Open a group using file-mode-like semantics. Parameters ---------- store : StoreLike or None, default=None StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. synchronizer : object, optional Array synchronizer. path : str, optional Group path within store. chunk_store : StoreLike or None, default=None Separate storage for chunks. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. meta_array : array-like, optional An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. attributes : dict A dictionary of JSON-serializable values with user-defined attributes. use_consolidated : bool or str, default None Whether to use consolidated metadata. By default, consolidated metadata is used if it's present in the store (in the ``zarr.json`` for Zarr format 3 and in the ``.zmetadata`` file for Zarr format 2). To explicitly require consolidated metadata, set ``use_consolidated=True``, which will raise an exception if consolidated metadata is not found. To explicitly *not* use consolidated metadata, set ``use_consolidated=False``, which will fall back to using the regular, non consolidated metadata. Zarr format 2 allowed configuring the key storing the consolidated metadata (``.zmetadata`` by default). Specify the custom key as ``use_consolidated`` to load consolidated metadata from a non-default key. Returns ------- g : group The new group. """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) if cache_attrs is not None: warnings.warn("cache_attrs is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if synchronizer is not None: warnings.warn("synchronizer is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if meta_array is not None: warnings.warn("meta_array is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if chunk_store is not None: warnings.warn("chunk_store is not yet implemented", ZarrRuntimeWarning, stacklevel=2) store_path = await make_store_path(store, mode=mode, storage_options=storage_options, path=path) if attributes is None: attributes = {} try: if mode in _READ_MODES: return await AsyncGroup.open( store_path, zarr_format=zarr_format, use_consolidated=use_consolidated ) except (KeyError, FileNotFoundError): pass if mode in _CREATE_MODES: overwrite = _infer_overwrite(mode) _zarr_format = zarr_format or _default_zarr_format() return await AsyncGroup.from_store( store_path, zarr_format=_zarr_format, overwrite=overwrite, attributes=attributes, ) msg = f"No group found in store {store!r} at path {store_path.path!r}" raise GroupNotFoundError(msg) async def create( shape: tuple[int, ...] | int, *, # Note: this is a change from v2 chunks: tuple[int, ...] | int | bool | None = None, dtype: ZDTypeLike | None = None, compressor: CompressorLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, store: StoreLike | None = None, synchronizer: Any | None = None, overwrite: bool = False, path: PathLike | None = None, chunk_store: StoreLike | None = None, filters: Iterable[dict[str, JSON] | Numcodec] | None = None, cache_metadata: bool | None = None, cache_attrs: bool | None = None, read_only: bool | None = None, object_codec: Codec | None = None, # TODO: type has changed dimension_separator: Literal[".", "/"] | None = None, write_empty_chunks: bool | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # TODO: need type attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: tuple[int, ...] | int | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, config: ArrayConfigLike | None = None, **kwargs: Any, ) -> AnyAsyncArray: """Create an array. Parameters ---------- shape : int or tuple of ints Array shape. chunks : int or tuple of ints, optional Chunk shape. If True, will be guessed from ``shape`` and ``dtype``. If False, will be set to ``shape``, i.e., single chunk for the whole array. If an int, the chunk size in each dimension will be given by the value of ``chunks``. Default is True. dtype : str or dtype, optional NumPy dtype. compressor : Codec, optional Primary compressor to compress chunk data. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If neither ``compressor`` nor ``filters`` are provided, the default compressor [`zarr.codecs.ZstdCodec`][] is used. If ``compressor`` is set to ``None``, no compression is used. fill_value : Any, optional Fill value for the array. order : {'C', 'F'}, optional Deprecated in favor of the ``config`` keyword argument. Pass ``{'order': }`` to ``create`` instead of using this parameter. Memory layout to be used within each chunk. If not specified, the ``array.order`` parameter in the global config will be used. store : StoreLike or None, default=None StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. synchronizer : object, optional Array synchronizer. overwrite : bool, optional If True, delete all pre-existing data in ``store`` at ``path`` before creating the array. path : str, optional Path under which array is stored. chunk_store : StoreLike or None, default=None Separate storage for chunks. If not provided, ``store`` will be used for storage of both chunks and metadata. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. cache_metadata : bool, optional If True, array configuration metadata will be cached for the lifetime of the object. If False, array metadata will be reloaded prior to all data access and modification operations (may incur overhead depending on storage and data access pattern). cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. read_only : bool, optional True if array should be protected against modification. object_codec : Codec, optional A codec to encode object arrays, only needed if dtype=object. dimension_separator : {'.', '/'}, optional Separator placed between the dimensions of a chunk. Zarr format 2 only. Zarr format 3 arrays should use ``chunk_key_encoding`` instead. write_empty_chunks : bool, optional Deprecated in favor of the ``config`` keyword argument. Pass ``{'write_empty_chunks': }`` to ``create`` instead of using this parameter. If True, all chunks will be stored regardless of their contents. If False, each chunk is compared to the array's fill value prior to storing. If a chunk is uniformly equal to the fill value, then that chunk is not be stored, and the store entry for that chunk's key is deleted. zarr_format : {2, 3, None}, optional The Zarr format to use when creating an array. The default is ``None``, which instructs Zarr to choose the default Zarr format value defined in the runtime configuration. meta_array : array-like, optional Not implemented. attributes : dict[str, JSON], optional A dictionary of user attributes to store with the array. chunk_shape : int or tuple of ints, optional The shape of the Array's chunks (default is None). Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. Default is ``("default", "/")``. codecs : Sequence of Codecs or dicts, optional An iterable of Codec or dict serializations of Codecs. Zarr V3 only. The elements of ``codecs`` specify the transformation from array values to stored bytes. Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. If no codecs are provided, default codecs will be used based on the data type of the array. For most data types, the default codecs are the tuple ``(BytesCodec(), ZstdCodec())``; data types that require a special [`zarr.abc.codec.ArrayBytesCodec`][], like variable-length strings or bytes, will use the [`zarr.abc.codec.ArrayBytesCodec`][] required for the data type instead of [`zarr.codecs.BytesCodec`][]. dimension_names : Iterable[str | None] | None = None An iterable of dimension names. Zarr format 3 only. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. config : ArrayConfigLike, optional Runtime configuration of the array. If provided, will override the default values from `zarr.config.array`. Returns ------- z : array The array. """ zarr_format = ( _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) or _default_zarr_format() ) if synchronizer is not None: warnings.warn("synchronizer is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if chunk_store is not None: warnings.warn("chunk_store is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if cache_metadata is not None: warnings.warn("cache_metadata is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if cache_attrs is not None: warnings.warn("cache_attrs is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if object_codec is not None: warnings.warn("object_codec is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if read_only is not None: warnings.warn("read_only is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if meta_array is not None: warnings.warn("meta_array is not yet implemented", ZarrRuntimeWarning, stacklevel=2) if write_empty_chunks is not None: _warn_write_empty_chunks_kwarg() mode = kwargs.pop("mode", None) if mode is None: mode = "a" store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) config_parsed = parse_array_config(config) if write_empty_chunks is not None: if config is not None: msg = ( "Both write_empty_chunks and config keyword arguments are set. " "This is redundant. When both are set, write_empty_chunks will be used instead " "of the value in config." ) warnings.warn(ZarrUserWarning(msg), stacklevel=1) config_parsed = dataclasses.replace(config_parsed, write_empty_chunks=write_empty_chunks) return await AsyncArray._create( store_path, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, overwrite=overwrite, filters=filters, dimension_separator=dimension_separator, order=order, zarr_format=zarr_format, chunk_shape=chunk_shape, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, attributes=attributes, config=config_parsed, **kwargs, ) async def empty(shape: tuple[int, ...], **kwargs: Any) -> AnyAsyncArray: """Create an empty array with the specified shape. The contents will be filled with the specified fill value or zeros if no fill value is provided. Parameters ---------- shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Notes ----- The contents of an empty Zarr array are not defined. On attempting to retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ return await create(shape=shape, **kwargs) async def empty_like(a: ArrayLike, **kwargs: Any) -> AnyAsyncArray: """Create an empty array like `a`. The contents will be filled with the array's fill value or zeros if no fill value is provided. Parameters ---------- a : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- Array The new array. Notes ----- The contents of an empty Zarr array are not defined. On attempting to retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ like_kwargs = _like_args(a) | kwargs if isinstance(a, (AsyncArray | Array)): like_kwargs.setdefault("fill_value", a.metadata.fill_value) return await empty(**like_kwargs) # type: ignore[arg-type] # TODO: add type annotations for fill_value and kwargs async def full(shape: tuple[int, ...], fill_value: Any, **kwargs: Any) -> AnyAsyncArray: """Create an array, with `fill_value` being used as the default value for uninitialized portions of the array. Parameters ---------- shape : int or tuple of int Shape of the empty array. fill_value : scalar Fill value. **kwargs Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- Array The new array. """ return await create(shape=shape, fill_value=fill_value, **kwargs) # TODO: add type annotations for kwargs async def full_like(a: ArrayLike, **kwargs: Any) -> AnyAsyncArray: """Create a filled array like `a`. Parameters ---------- a : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- Array The new array. """ like_kwargs = _like_args(a) | kwargs if isinstance(a, (AsyncArray | Array)): like_kwargs.setdefault("fill_value", a.metadata.fill_value) return await full(**like_kwargs) # type: ignore[arg-type] async def ones(shape: tuple[int, ...], **kwargs: Any) -> AnyAsyncArray: """Create an array, with one being used as the default value for uninitialized portions of the array. Parameters ---------- shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- Array The new array. """ return await create(shape=shape, fill_value=1, **kwargs) async def ones_like(a: ArrayLike, **kwargs: Any) -> AnyAsyncArray: """Create an array of ones like `a`. Parameters ---------- a : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- Array The new array. """ like_kwargs = _like_args(a) | kwargs return await ones(**like_kwargs) # type: ignore[arg-type] async def open_array( *, # note: this is a change from v2 store: StoreLike | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: PathLike = "", storage_options: dict[str, Any] | None = None, **kwargs: Any, # TODO: type kwargs as valid args to save ) -> AnyAsyncArray: """Open an array using file-mode-like semantics. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. zarr_version : {2, 3, None}, optional The zarr format to use when saving. Deprecated in favor of zarr_format. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str, optional Path in store to array. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs Any keyword arguments to pass to [`create`][zarr.api.asynchronous.create]. Returns ------- AsyncArray The opened array. """ mode = kwargs.pop("mode", None) store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) if "write_empty_chunks" in kwargs: _warn_write_empty_chunks_kwarg() try: return await AsyncArray.open(store_path, zarr_format=zarr_format) except FileNotFoundError as err: if not store_path.read_only and mode in _CREATE_MODES: overwrite = _infer_overwrite(mode) _zarr_format = zarr_format or _default_zarr_format() return await create( store=store_path, zarr_format=_zarr_format, overwrite=overwrite, **kwargs, ) msg = f"No array found in store {store_path.store} at path {store_path.path}" raise ArrayNotFoundError(msg) from err async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AnyAsyncArray: """Open a persistent array like `a`. Parameters ---------- a : Array The shape and data-type of a define these same attributes of the returned array. path : str The path to the new array. **kwargs Any keyword arguments to pass to the array constructor. Returns ------- AsyncArray The opened array. """ like_kwargs = _like_args(a) | kwargs if isinstance(a, (AsyncArray | Array)): like_kwargs.setdefault("fill_value", a.metadata.fill_value) return await open_array(path=path, **like_kwargs) # type: ignore[arg-type] async def zeros(shape: tuple[int, ...], **kwargs: Any) -> AnyAsyncArray: """Create an array, with zero being used as the default value for uninitialized portions of the array. Parameters ---------- shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- Array The new array. """ return await create(shape=shape, fill_value=0, **kwargs) async def zeros_like(a: ArrayLike, **kwargs: Any) -> AnyAsyncArray: """Create an array of zeros like `a`. Parameters ---------- a : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- Array The new array. """ like_kwargs = _like_args(a) | kwargs return await zeros(**like_kwargs) # type: ignore[arg-type] zarr-python-3.1.5/src/zarr/api/synchronous.py000066400000000000000000001524351511007055700213120ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Any, Literal from typing_extensions import deprecated import zarr.api.asynchronous as async_api import zarr.core.array from zarr.core.array import DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike from zarr.core.group import Group from zarr.core.sync import sync from zarr.core.sync_group import create_hierarchy from zarr.errors import ZarrDeprecationWarning if TYPE_CHECKING: from collections.abc import Iterable import numpy as np import numpy.typing as npt from zarr.abc.codec import Codec from zarr.abc.numcodec import Numcodec from zarr.api.asynchronous import ArrayLike, PathLike from zarr.core.array import ( CompressorsLike, FiltersLike, SerializerLike, ShardsLike, ) from zarr.core.array_spec import ArrayConfigLike from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike from zarr.core.common import ( JSON, AccessModeLiteral, DimensionNames, MemoryOrder, ShapeLike, ZarrFormat, ) from zarr.core.dtype import ZDTypeLike from zarr.storage import StoreLike from zarr.types import AnyArray __all__ = [ "array", "consolidate_metadata", "copy", "copy_all", "copy_store", "create", "create_array", "create_hierarchy", "empty", "empty_like", "from_array", "full", "full_like", "group", "load", "ones", "ones_like", "open", "open_array", "open_consolidated", "open_group", "open_like", "save", "save_array", "save_group", "tree", "zeros", "zeros_like", ] def consolidate_metadata( store: StoreLike, path: str | None = None, zarr_format: ZarrFormat | None = None, ) -> Group: """ Consolidate the metadata of all nodes in a hierarchy. Upon completion, the metadata of the root node in the Zarr hierarchy will be updated to include all the metadata of child nodes. For Stores that do not use consolidated metadata, this operation raises a `TypeError`. Parameters ---------- store : StoreLike The store-like object whose metadata you wish to consolidate. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. path : str, optional A path to a group in the store to consolidate at. Only children below that group will be consolidated. By default, the root node is used so all the metadata in the store is consolidated. zarr_format : {2, 3, None}, optional The zarr format of the hierarchy. By default the zarr format is inferred. Returns ------- group: Group The group, with the ``consolidated_metadata`` field set to include the metadata of each child node. If the Store doesn't support consolidated metadata, this function raises a `TypeError`. See ``Store.supports_consolidated_metadata``. """ return Group(sync(async_api.consolidate_metadata(store, path=path, zarr_format=zarr_format))) def copy(*args: Any, **kwargs: Any) -> tuple[int, int, int]: """ Not implemented. """ return sync(async_api.copy(*args, **kwargs)) def copy_all(*args: Any, **kwargs: Any) -> tuple[int, int, int]: """ Not implemented. """ return sync(async_api.copy_all(*args, **kwargs)) def copy_store(*args: Any, **kwargs: Any) -> tuple[int, int, int]: """ Not implemented. """ return sync(async_api.copy_store(*args, **kwargs)) def load( store: StoreLike, path: str | None = None, zarr_format: ZarrFormat | None = None, zarr_version: ZarrFormat | None = None, ) -> NDArrayLikeOrScalar | dict[str, NDArrayLikeOrScalar]: """Load data from an array or group into memory. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. path : str or None, optional The path within the store from which to load. Returns ------- out If the path contains an array, out will be a numpy array. If the path contains a group, out will be a dict-like object where keys are array names and values are numpy arrays. See Also -------- save, savez Notes ----- If loading data from a group of arrays, data will not be immediately loaded into memory. Rather, arrays will be loaded into memory as they are requested. """ return sync( async_api.load(store=store, zarr_version=zarr_version, zarr_format=zarr_format, path=path) ) def open( store: StoreLike | None = None, *, mode: AccessModeLiteral | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, storage_options: dict[str, Any] | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.open ) -> AnyArray | Group: """Open a group or array using file-mode-like semantics. Parameters ---------- store : StoreLike or None, default=None StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). If the store is read-only, the default is 'r'; otherwise, it is 'a'. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str or None, optional The path within the store to open. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs Additional parameters are passed through to [`zarr.creation.open_array`][] or [`open_group`][zarr.api.asynchronous.open_group]. Returns ------- z : array or group Return type depends on what exists in the given store. """ obj = sync( async_api.open( store=store, mode=mode, zarr_version=zarr_version, zarr_format=zarr_format, path=path, storage_options=storage_options, **kwargs, ) ) if isinstance(obj, AsyncArray): return Array(obj) else: return Group(obj) def open_consolidated(*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any) -> Group: """ Alias for [`open_group`][zarr.api.synchronous.open_group] with ``use_consolidated=True``. """ return Group( sync(async_api.open_consolidated(*args, use_consolidated=use_consolidated, **kwargs)) ) def save( store: StoreLike, *args: NDArrayLike, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.save ) -> None: """Save an array or group of arrays to the local file system. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. *args : ndarray NumPy arrays with data to save. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str or None, optional The path within the group where the arrays will be saved. **kwargs NumPy arrays with data to save. """ return sync( async_api.save( store, *args, zarr_version=zarr_version, zarr_format=zarr_format, path=path, **kwargs ) ) def save_array( store: StoreLike, arr: NDArrayLike, *, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, storage_options: dict[str, Any] | None = None, **kwargs: Any, # TODO: type kwargs as valid args to async_api.save_array ) -> None: """Save a NumPy array to the local file system. Follows a similar API to the NumPy save() function. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. arr : ndarray NumPy array with data to save. zarr_format : {2, 3, None}, optional The zarr format to use when saving. The default is ``None``, which will use the default Zarr format defined in the global configuration object. path : str or None, optional The path within the store where the array will be saved. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs Passed through to [`create`][zarr.api.asynchronous.create], e.g., compressor. """ return sync( async_api.save_array( store=store, arr=arr, zarr_version=zarr_version, zarr_format=zarr_format, path=path, storage_options=storage_options, **kwargs, ) ) def save_group( store: StoreLike, *args: NDArrayLike, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, path: str | None = None, storage_options: dict[str, Any] | None = None, **kwargs: NDArrayLike, ) -> None: """Save several NumPy arrays to the local file system. Follows a similar API to the NumPy savez()/savez_compressed() functions. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. *args : ndarray NumPy arrays with data to save. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str or None, optional Path within the store where the group will be saved. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs NumPy arrays with data to save. """ return sync( async_api.save_group( store, *args, zarr_version=zarr_version, zarr_format=zarr_format, path=path, storage_options=storage_options, **kwargs, ) ) @deprecated("Use Group.tree instead.", category=ZarrDeprecationWarning) def tree(grp: Group, expand: bool | None = None, level: int | None = None) -> Any: """Provide a rich display of the hierarchy. !!! warning "Deprecated" `zarr.tree()` is deprecated since v3.0.0 and will be removed in a future release. Use `group.tree()` instead. Parameters ---------- grp : Group Zarr or h5py group. expand : bool, optional Only relevant for HTML representation. If True, tree will be fully expanded. level : int, optional Maximum depth to descend into hierarchy. Returns ------- TreeRepr A pretty-printable object displaying the hierarchy. """ return sync(async_api.tree(grp._async_group, expand=expand, level=level)) # TODO: add type annotations for kwargs def array(data: npt.ArrayLike | AnyArray, **kwargs: Any) -> AnyArray: """Create an array filled with `data`. Parameters ---------- data : array_like The data to fill the array with. **kwargs Passed through to [`create`][zarr.api.asynchronous.create]. Returns ------- array : Array The new array. """ return Array(sync(async_api.array(data=data, **kwargs))) def group( store: StoreLike | None = None, *, overwrite: bool = False, chunk_store: StoreLike | None = None, # not used cache_attrs: bool | None = None, # not used, default changed synchronizer: Any | None = None, # not used path: str | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used attributes: dict[str, JSON] | None = None, storage_options: dict[str, Any] | None = None, ) -> Group: """Create a group. Parameters ---------- store : StoreLike or None, default=None StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. overwrite : bool, optional If True, delete any pre-existing data in `store` at `path` before creating the group. chunk_store : StoreLike or None, default=None Separate storage for chunks. Not implemented. cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. synchronizer : object, optional Array synchronizer. path : str, optional Group path within store. meta_array : array-like, optional An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. zarr_format : {2, 3, None}, optional The zarr format to use when saving. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. Returns ------- g : Group The new group. """ return Group( sync( async_api.group( store=store, overwrite=overwrite, chunk_store=chunk_store, cache_attrs=cache_attrs, synchronizer=synchronizer, path=path, zarr_version=zarr_version, zarr_format=zarr_format, meta_array=meta_array, attributes=attributes, storage_options=storage_options, ) ) ) def open_group( store: StoreLike | None = None, *, mode: AccessModeLiteral = "a", cache_attrs: bool | None = None, # default changed, not used in async api synchronizer: Any = None, # not used in async api path: str | None = None, chunk_store: StoreLike | None = None, # not used in async api storage_options: dict[str, Any] | None = None, # not used in async api zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # not used in async api attributes: dict[str, JSON] | None = None, use_consolidated: bool | str | None = None, ) -> Group: """Open a group using file-mode-like semantics. Parameters ---------- store : StoreLike or None, default=None StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. synchronizer : object, optional Array synchronizer. path : str, optional Group path within store. chunk_store : StoreLike or None, default=None Separate storage for chunks. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. meta_array : array-like, optional An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. attributes : dict A dictionary of JSON-serializable values with user-defined attributes. use_consolidated : bool or str, default None Whether to use consolidated metadata. By default, consolidated metadata is used if it's present in the store (in the ``zarr.json`` for Zarr format 3 and in the ``.zmetadata`` file for Zarr format 2). To explicitly require consolidated metadata, set ``use_consolidated=True``, which will raise an exception if consolidated metadata is not found. To explicitly *not* use consolidated metadata, set ``use_consolidated=False``, which will fall back to using the regular, non consolidated metadata. Zarr format 2 allowed configuring the key storing the consolidated metadata (``.zmetadata`` by default). Specify the custom key as ``use_consolidated`` to load consolidated metadata from a non-default key. Returns ------- g : Group The new group. """ return Group( sync( async_api.open_group( store=store, mode=mode, cache_attrs=cache_attrs, synchronizer=synchronizer, path=path, chunk_store=chunk_store, storage_options=storage_options, zarr_version=zarr_version, zarr_format=zarr_format, meta_array=meta_array, attributes=attributes, use_consolidated=use_consolidated, ) ) ) def create_group( store: StoreLike, *, path: str | None = None, zarr_format: ZarrFormat | None = None, overwrite: bool = False, attributes: dict[str, Any] | None = None, storage_options: dict[str, Any] | None = None, ) -> Group: """Create a group. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. path : str, optional Group path within store. overwrite : bool, optional If True, pre-existing data at ``path`` will be deleted before creating the group. zarr_format : {2, 3, None}, optional The zarr format to use when saving. If no ``zarr_format`` is provided, the default format will be used. This default can be changed by modifying the value of ``default_zarr_format`` in [`zarr.config`][zarr.config]. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. Returns ------- Group The new group. """ return Group( sync( async_api.create_group( store=store, path=path, overwrite=overwrite, storage_options=storage_options, zarr_format=zarr_format, attributes=attributes, ) ) ) # TODO: add type annotations for kwargs def create( shape: tuple[int, ...] | int, *, # Note: this is a change from v2 chunks: tuple[int, ...] | int | bool | None = None, dtype: ZDTypeLike | None = None, compressor: CompressorLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, # TODO: need type order: MemoryOrder | None = None, store: StoreLike | None = None, synchronizer: Any | None = None, overwrite: bool = False, path: PathLike | None = None, chunk_store: StoreLike | None = None, filters: Iterable[dict[str, JSON] | Numcodec] | None = None, cache_metadata: bool | None = None, cache_attrs: bool | None = None, read_only: bool | None = None, object_codec: Codec | None = None, # TODO: type has changed dimension_separator: Literal[".", "/"] | None = None, write_empty_chunks: bool | None = None, # TODO: default has changed zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, meta_array: Any | None = None, # TODO: need type attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: tuple[int, ...] | int | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, config: ArrayConfigLike | None = None, **kwargs: Any, ) -> AnyArray: """Create an array. Parameters ---------- shape : int or tuple of ints Array shape. chunks : int or tuple of ints, optional Chunk shape. If True, will be guessed from ``shape`` and ``dtype``. If False, will be set to ``shape``, i.e., single chunk for the whole array. If an int, the chunk size in each dimension will be given by the value of ``chunks``. Default is True. dtype : str or dtype, optional NumPy dtype. compressor : Codec, optional Primary compressor to compress chunk data. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If neither ``compressor`` nor ``filters`` are provided, the default compressor [`zarr.codecs.ZstdCodec`][] is used. If ``compressor`` is set to ``None``, no compression is used. fill_value : Any, optional Fill value for the array. order : {'C', 'F'}, optional Deprecated in favor of the ``config`` keyword argument. Pass ``{'order': }`` to ``create`` instead of using this parameter. Memory layout to be used within each chunk. If not specified, the ``array.order`` parameter in the global config will be used. store : StoreLike or None, default=None StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. synchronizer : object, optional Array synchronizer. overwrite : bool, optional If True, delete all pre-existing data in ``store`` at ``path`` before creating the array. path : str, optional Path under which array is stored. chunk_store : StoreLike or None, default=None Separate storage for chunks. If not provided, ``store`` will be used for storage of both chunks and metadata. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. cache_metadata : bool, optional If True, array configuration metadata will be cached for the lifetime of the object. If False, array metadata will be reloaded prior to all data access and modification operations (may incur overhead depending on storage and data access pattern). cache_attrs : bool, optional If True (default), user attributes will be cached for attribute read operations. If False, user attributes are reloaded from the store prior to all attribute read operations. read_only : bool, optional True if array should be protected against modification. object_codec : Codec, optional A codec to encode object arrays, only needed if dtype=object. dimension_separator : {'.', '/'}, optional Separator placed between the dimensions of a chunk. Zarr format 2 only. Zarr format 3 arrays should use ``chunk_key_encoding`` instead. write_empty_chunks : bool, optional Deprecated in favor of the ``config`` keyword argument. Pass ``{'write_empty_chunks': }`` to ``create`` instead of using this parameter. If True, all chunks will be stored regardless of their contents. If False, each chunk is compared to the array's fill value prior to storing. If a chunk is uniformly equal to the fill value, then that chunk is not be stored, and the store entry for that chunk's key is deleted. zarr_format : {2, 3, None}, optional The Zarr format to use when creating an array. The default is ``None``, which instructs Zarr to choose the default Zarr format value defined in the runtime configuration. meta_array : array-like, optional Not implemented. attributes : dict[str, JSON], optional A dictionary of user attributes to store with the array. chunk_shape : int or tuple of ints, optional The shape of the Array's chunks (default is None). Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. Default is ``("default", "/")``. codecs : Sequence of Codecs or dicts, optional An iterable of Codec or dict serializations of Codecs. Zarr V3 only. The elements of ``codecs`` specify the transformation from array values to stored bytes. Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. If no codecs are provided, default codecs will be used based on the data type of the array. For most data types, the default codecs are the tuple ``(BytesCodec(), ZstdCodec())``; data types that require a special [`zarr.abc.codec.ArrayBytesCodec`][], like variable-length strings or bytes, will use the [`zarr.abc.codec.ArrayBytesCodec`][] required for the data type instead of [`zarr.codecs.BytesCodec`][]. dimension_names : Iterable[str | None] | None = None An iterable of dimension names. Zarr format 3 only. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. config : ArrayConfigLike, optional Runtime configuration of the array. If provided, will override the default values from `zarr.config.array`. Returns ------- z : Array The array. """ return Array( sync( async_api.create( shape=shape, chunks=chunks, dtype=dtype, compressor=compressor, fill_value=fill_value, order=order, store=store, synchronizer=synchronizer, overwrite=overwrite, path=path, chunk_store=chunk_store, filters=filters, cache_metadata=cache_metadata, cache_attrs=cache_attrs, read_only=read_only, object_codec=object_codec, dimension_separator=dimension_separator, write_empty_chunks=write_empty_chunks, zarr_version=zarr_version, zarr_format=zarr_format, meta_array=meta_array, attributes=attributes, chunk_shape=chunk_shape, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, storage_options=storage_options, config=config, **kwargs, ) ) ) def create_array( store: StoreLike, *, name: str | None = None, shape: ShapeLike | None = None, dtype: ZDTypeLike | None = None, data: np.ndarray[Any, np.dtype[Any]] | None = None, chunks: tuple[int, ...] | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfigLike | None = None, write_data: bool = True, ) -> AnyArray: """Create an array. This function wraps [zarr.core.array.create_array][]. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. name : str or None, optional The name of the array within the store. If ``name`` is ``None``, the array will be located at the root of the store. shape : ShapeLike, optional Shape of the array. Must be ``None`` if ``data`` is provided. dtype : ZDTypeLike | None Data type of the array. Must be ``None`` if ``data`` is provided. data : np.ndarray, optional Array-like data to use for initializing the array. If this parameter is provided, the ``shape`` and ``dtype`` parameters must be ``None``. chunks : tuple[int, ...] | Literal["auto"], default="auto" Chunk shape of the array. If chunks is "auto", a chunk shape is guessed based on the shape of the array and the dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. For Zarr format 3, a "compressor" is a codec that takes a bytestream, and returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncodingLike, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. storage_options : dict, optional If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. If ``True``, all existing paths in the store will be deleted. config : ArrayConfigLike, optional Runtime configuration for the array. write_data : bool If a pre-existing array-like object was provided to this function via the ``data`` parameter then ``write_data`` determines whether the values in that array-like object should be written to the Zarr array created by this function. If ``write_data`` is ``False``, then the array will be left empty. Returns ------- Array The array. Examples -------- ```python import zarr store = zarr.storage.MemoryStore() arr = zarr.create_array( store=store, shape=(100,100), chunks=(10,10), dtype='i4', fill_value=0) # ``` """ return Array( sync( zarr.core.array.create_array( store, name=name, shape=shape, dtype=dtype, data=data, chunks=chunks, shards=shards, filters=filters, compressors=compressors, serializer=serializer, fill_value=fill_value, order=order, zarr_format=zarr_format, attributes=attributes, chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, storage_options=storage_options, overwrite=overwrite, config=config, write_data=write_data, ) ) ) def from_array( store: StoreLike, *, data: AnyArray | npt.ArrayLike, write_data: bool = True, name: str | None = None, chunks: Literal["auto", "keep"] | tuple[int, ...] = "keep", shards: ShardsLike | None | Literal["keep"] = "keep", filters: FiltersLike | Literal["keep"] = "keep", compressors: CompressorsLike | Literal["keep"] = "keep", serializer: SerializerLike | Literal["keep"] = "keep", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = None, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfigLike | None = None, ) -> AnyArray: """Create an array from an existing array or array-like. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. data : Array | array-like The array to copy. write_data : bool, default True Whether to copy the data from the input array to the new array. If ``write_data`` is ``False``, the new array will be created with the same metadata as the input array, but without any data. name : str or None, optional The name of the array within the store. If ``name`` is ``None``, the array will be located at the root of the store. chunks : tuple[int, ...] or "auto" or "keep", optional Chunk shape of the array. Following values are supported: - "auto": Automatically determine the chunk shape based on the array's shape and dtype. - "keep": Retain the chunk shape of the data array if it is a zarr Array. - tuple[int, ...]: A tuple of integers representing the chunk shape. If not specified, defaults to "keep" if data is a zarr Array, otherwise "auto". shards : tuple[int, ...], optional Shard shape of the array. Following values are supported: - "auto": Automatically determine the shard shape based on the array's shape and chunk shape. - "keep": Retain the shard shape of the data array if it is a zarr Array. - tuple[int, ...]: A tuple of integers representing the shard shape. - None: No sharding. If not specified, defaults to "keep" if data is a zarr Array, otherwise None. filters : Iterable[Codec] | Literal["auto", "keep"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"keep"`` instructs Zarr to infer ``filters`` from ``data``. If that inference is not possible, Zarr will fall back to the behavior specified by ``"auto"``, which is to choose default filters based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are the empty tuple ``()``. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters is a tuple with a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec] or "auto" or "keep", optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. For Zarr format 3, a "compressor" is a codec that takes a bytestream, and returns another bytestream. Multiple compressors my be provided for Zarr format 3. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. Following values are supported: - Iterable[Codec]: List of compressors to apply to the array. - "auto": Automatically determine the compressors based on the array's dtype. - "keep": Retain the compressors of the input array if it is a zarr Array. If no ``compressors`` are provided, defaults to "keep" if data is a zarr Array, otherwise "auto". serializer : dict[str, JSON] | ArrayBytesCodec or "auto" or "keep", optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. Following values are supported: - dict[str, JSON]: A dict representation of an ``ArrayBytesCodec``. - ArrayBytesCodec: An instance of ``ArrayBytesCodec``. - "auto": a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. - "keep": Retain the serializer of the input array if it is a zarr Array. fill_value : Any, optional Fill value for the array. If not specified, defaults to the fill value of the data array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If not specified, defaults to the memory order of the data array. zarr_format : {2, 3}, optional The zarr format to use when saving. If not specified, defaults to the zarr format of the data array. attributes : dict, optional Attributes for the array. If not specified, defaults to the attributes of the data array. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. If not specified and the data array has the same zarr format as the target array, the chunk key encoding of the data array is used. dimension_names : Iterable[str | None] | None The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. If not specified, defaults to the dimension names of the data array. storage_options : dict, optional If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. Returns ------- Array The array. Examples -------- Create an array from an existing Array: ```python import zarr store = zarr.storage.MemoryStore() store2 = zarr.storage.LocalStore('example_from_array.zarr') arr = zarr.create_array( store=store, shape=(100,100), chunks=(10,10), dtype='int32', fill_value=0) arr2 = zarr.from_array(store2, data=arr, overwrite=True) # ``` Create an array from an existing NumPy array: ```python import zarr import numpy as np arr3 = zarr.from_array( zarr.storage.MemoryStore(), data=np.arange(10000, dtype='i4').reshape(100, 100), ) # ``` Create an array from any array-like object: ```python import zarr arr4 = zarr.from_array( zarr.storage.MemoryStore(), data=[[1, 2], [3, 4]], ) # arr4[...] # array([[1, 2],[3, 4]]) ``` Create an array from an existing Array without copying the data: ```python import zarr arr4 = zarr.from_array( zarr.storage.MemoryStore(), data=[[1, 2], [3, 4]], ) arr5 = zarr.from_array( zarr.storage.MemoryStore(), data=arr4, write_data=False, ) # arr5[...] # array([[0, 0],[0, 0]]) ``` """ return Array( sync( zarr.core.array.from_array( store, data=data, write_data=write_data, name=name, chunks=chunks, shards=shards, filters=filters, compressors=compressors, serializer=serializer, fill_value=fill_value, order=order, zarr_format=zarr_format, attributes=attributes, chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, storage_options=storage_options, overwrite=overwrite, config=config, ) ) ) # TODO: add type annotations for kwargs def empty(shape: tuple[int, ...], **kwargs: Any) -> AnyArray: """Create an empty array with the specified shape. The contents will be filled with the array's fill value or zeros if no fill value is provided. Parameters ---------- shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- Array The new array. Notes ----- The contents of an empty Zarr array are not defined. On attempting to retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ return Array(sync(async_api.empty(shape, **kwargs))) # TODO: move ArrayLike to common module # TODO: add type annotations for kwargs def empty_like(a: ArrayLike, **kwargs: Any) -> AnyArray: """Create an empty array like another array. The contents will be filled with the array's fill value or zeros if no fill value is provided. Parameters ---------- a : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- Array The new array. Notes ----- The contents of an empty Zarr array are not defined. On attempting to retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ return Array(sync(async_api.empty_like(a, **kwargs))) # TODO: add type annotations for kwargs and fill_value def full(shape: tuple[int, ...], fill_value: Any, **kwargs: Any) -> AnyArray: """Create an array with a default fill value. Parameters ---------- shape : int or tuple of int Shape of the empty array. fill_value : scalar Fill value. **kwargs Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- Array The new array. """ return Array(sync(async_api.full(shape=shape, fill_value=fill_value, **kwargs))) # TODO: move ArrayLike to common module # TODO: add type annotations for kwargs def full_like(a: ArrayLike, **kwargs: Any) -> AnyArray: """Create a filled array like another array. Parameters ---------- a : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- Array The new array. """ return Array(sync(async_api.full_like(a, **kwargs))) # TODO: add type annotations for kwargs def ones(shape: tuple[int, ...], **kwargs: Any) -> AnyArray: """Create an array with a fill value of one. Parameters ---------- shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- Array The new array. """ return Array(sync(async_api.ones(shape, **kwargs))) # TODO: add type annotations for kwargs def ones_like(a: ArrayLike, **kwargs: Any) -> AnyArray: """Create an array of ones like another array. Parameters ---------- a : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- Array The new array. """ return Array(sync(async_api.ones_like(a, **kwargs))) # TODO: update this once async_api.open_array is fully implemented def open_array( store: StoreLike | None = None, *, zarr_version: ZarrFormat | None = None, zarr_format: ZarrFormat | None = None, path: PathLike = "", storage_options: dict[str, Any] | None = None, **kwargs: Any, ) -> AnyArray: """Open an array using file-mode-like semantics. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. zarr_version : {2, 3, None}, optional The zarr format to use when saving. Deprecated in favor of zarr_format. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str, optional Path in store to array. storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. **kwargs Any keyword arguments to pass to [`create`][zarr.api.asynchronous.create]. Returns ------- AsyncArray The opened array. """ return Array( sync( async_api.open_array( store=store, zarr_version=zarr_version, zarr_format=zarr_format, path=path, storage_options=storage_options, **kwargs, ) ) ) # TODO: add type annotations for kwargs def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AnyArray: """Open a persistent array like another array. Parameters ---------- a : Array The shape and data-type of a define these same attributes of the returned array. path : str The path to the new array. **kwargs Any keyword arguments to pass to the array constructor. Returns ------- AsyncArray The opened array. """ return Array(sync(async_api.open_like(a, path=path, **kwargs))) # TODO: add type annotations for kwargs def zeros(shape: tuple[int, ...], **kwargs: Any) -> AnyArray: """Create an array with a fill value of zero. Parameters ---------- shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [`zarr.api.asynchronous.create`][]. Returns ------- Array The new array. """ return Array(sync(async_api.zeros(shape=shape, **kwargs))) # TODO: add type annotations for kwargs def zeros_like(a: ArrayLike, **kwargs: Any) -> AnyArray: """Create an array of zeros like another array. Parameters ---------- a : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [`create`][zarr.api.asynchronous.create]. Returns ------- Array The new array. """ return Array(sync(async_api.zeros_like(a, **kwargs))) zarr-python-3.1.5/src/zarr/buffer/000077500000000000000000000000001511007055700170345ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/buffer/__init__.py000066400000000000000000000004331511007055700211450ustar00rootroot00000000000000""" Implementations of the Zarr Buffer interface. See Also ======== zarr.abc.buffer: Abstract base class for the Zarr Buffer interface. """ from zarr.buffer import cpu, gpu from zarr.core.buffer import default_buffer_prototype __all__ = ["cpu", "default_buffer_prototype", "gpu"] zarr-python-3.1.5/src/zarr/buffer/cpu.py000066400000000000000000000004161511007055700201760ustar00rootroot00000000000000from zarr.core.buffer.cpu import ( Buffer, NDBuffer, as_numpy_array_wrapper, buffer_prototype, numpy_buffer_prototype, ) __all__ = [ "Buffer", "NDBuffer", "as_numpy_array_wrapper", "buffer_prototype", "numpy_buffer_prototype", ] zarr-python-3.1.5/src/zarr/buffer/gpu.py000066400000000000000000000002111511007055700201730ustar00rootroot00000000000000from zarr.core.buffer.gpu import Buffer, NDBuffer, buffer_prototype __all__ = [ "Buffer", "NDBuffer", "buffer_prototype", ] zarr-python-3.1.5/src/zarr/codecs/000077500000000000000000000000001511007055700170235ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/codecs/__init__.py000066400000000000000000000065221511007055700211410ustar00rootroot00000000000000from __future__ import annotations from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle from zarr.codecs.bytes import BytesCodec, Endian from zarr.codecs.crc32c_ import Crc32cCodec from zarr.codecs.gzip import GzipCodec from zarr.codecs.numcodecs import ( BZ2, CRC32, CRC32C, LZ4, LZMA, ZFPY, Adler32, AsType, BitRound, Blosc, Delta, FixedScaleOffset, Fletcher32, GZip, JenkinsLookup3, PackBits, PCodec, Quantize, Shuffle, Zlib, Zstd, ) from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation from zarr.codecs.transpose import TransposeCodec from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec from zarr.codecs.zstd import ZstdCodec from zarr.registry import register_codec __all__ = [ "BloscCname", "BloscCodec", "BloscShuffle", "BytesCodec", "Crc32cCodec", "Endian", "GzipCodec", "ShardingCodec", "ShardingCodecIndexLocation", "TransposeCodec", "VLenBytesCodec", "VLenUTF8Codec", "ZstdCodec", ] register_codec("blosc", BloscCodec) register_codec("bytes", BytesCodec) # compatibility with earlier versions of ZEP1 register_codec("endian", BytesCodec) register_codec("crc32c", Crc32cCodec) register_codec("gzip", GzipCodec) register_codec("sharding_indexed", ShardingCodec) register_codec("zstd", ZstdCodec) register_codec("vlen-utf8", VLenUTF8Codec) register_codec("vlen-bytes", VLenBytesCodec) register_codec("transpose", TransposeCodec) # Register all the codecs formerly contained in numcodecs.zarr3 register_codec("numcodecs.bz2", BZ2, qualname="zarr.codecs.numcodecs.BZ2") register_codec("numcodecs.crc32", CRC32, qualname="zarr.codecs.numcodecs.CRC32") register_codec("numcodecs.crc32c", CRC32C, qualname="zarr.codecs.numcodecs.CRC32C") register_codec("numcodecs.lz4", LZ4, qualname="zarr.codecs.numcodecs.LZ4") register_codec("numcodecs.lzma", LZMA, qualname="zarr.codecs.numcodecs.LZMA") register_codec("numcodecs.zfpy", ZFPY, qualname="zarr.codecs.numcodecs.ZFPY") register_codec("numcodecs.adler32", Adler32, qualname="zarr.codecs.numcodecs.Adler32") register_codec("numcodecs.astype", AsType, qualname="zarr.codecs.numcodecs.AsType") register_codec("numcodecs.bitround", BitRound, qualname="zarr.codecs.numcodecs.BitRound") register_codec("numcodecs.blosc", Blosc, qualname="zarr.codecs.numcodecs.Blosc") register_codec("numcodecs.delta", Delta, qualname="zarr.codecs.numcodecs.Delta") register_codec( "numcodecs.fixedscaleoffset", FixedScaleOffset, qualname="zarr.codecs.numcodecs.FixedScaleOffset", ) register_codec("numcodecs.fletcher32", Fletcher32, qualname="zarr.codecs.numcodecs.Fletcher32") register_codec("numcodecs.gzip", GZip, qualname="zarr.codecs.numcodecs.GZip") register_codec( "numcodecs.jenkins_lookup3", JenkinsLookup3, qualname="zarr.codecs.numcodecs.JenkinsLookup3" ) register_codec("numcodecs.pcodec", PCodec, qualname="zarr.codecs.numcodecs.PCodec") register_codec("numcodecs.packbits", PackBits, qualname="zarr.codecs.numcodecs.PackBits") register_codec("numcodecs.quantize", Quantize, qualname="zarr.codecs.numcodecs.Quantize") register_codec("numcodecs.shuffle", Shuffle, qualname="zarr.codecs.numcodecs.Shuffle") register_codec("numcodecs.zlib", Zlib, qualname="zarr.codecs.numcodecs.Zlib") register_codec("numcodecs.zstd", Zstd, qualname="zarr.codecs.numcodecs.Zstd") zarr-python-3.1.5/src/zarr/codecs/_v2.py000066400000000000000000000071011511007055700200620ustar00rootroot00000000000000from __future__ import annotations import asyncio from dataclasses import dataclass from typing import TYPE_CHECKING import numpy as np from numcodecs.compat import ensure_bytes, ensure_ndarray_like from zarr.abc.codec import ArrayBytesCodec from zarr.registry import get_ndbuffer_class if TYPE_CHECKING: from zarr.abc.numcodec import Numcodec from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer, NDBuffer @dataclass(frozen=True) class V2Codec(ArrayBytesCodec): filters: tuple[Numcodec, ...] | None compressor: Numcodec | None is_fixed_size = False async def _decode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> NDBuffer: cdata = chunk_bytes.as_array_like() # decompress if self.compressor: chunk = await asyncio.to_thread(self.compressor.decode, cdata) else: chunk = cdata # apply filters if self.filters: for f in reversed(self.filters): chunk = await asyncio.to_thread(f.decode, chunk) # view as numpy array with correct dtype chunk = ensure_ndarray_like(chunk) # special case object dtype, because incorrect handling can lead to # segfaults and other bad things happening if chunk_spec.dtype.dtype_cls is not np.dtypes.ObjectDType: try: chunk = chunk.view(chunk_spec.dtype.to_native_dtype()) except TypeError: # this will happen if the dtype of the chunk # does not match the dtype of the array spec i.g. if # the dtype of the chunk_spec is a string dtype, but the chunk # is an object array. In this case, we need to convert the object # array to the correct dtype. chunk = np.array(chunk).astype(chunk_spec.dtype.to_native_dtype()) elif chunk.dtype != object: # If we end up here, someone must have hacked around with the filters. # We cannot deal with object arrays unless there is an object # codec in the filter chain, i.e., a filter that converts from object # array to something else during encoding, and converts back to object # array during decoding. raise RuntimeError("cannot read object array without object codec") # ensure correct chunk shape chunk = chunk.reshape(-1, order="A") chunk = chunk.reshape(chunk_spec.shape, order=chunk_spec.order) return get_ndbuffer_class().from_ndarray_like(chunk) async def _encode_single( self, chunk_array: NDBuffer, chunk_spec: ArraySpec, ) -> Buffer | None: chunk = chunk_array.as_ndarray_like() # ensure contiguous and correct order chunk = chunk.astype(chunk_spec.dtype.to_native_dtype(), order=chunk_spec.order, copy=False) # apply filters if self.filters: for f in self.filters: chunk = await asyncio.to_thread(f.encode, chunk) # check object encoding if ensure_ndarray_like(chunk).dtype == object: raise RuntimeError("cannot write object array without object codec") # compress if self.compressor: cdata = await asyncio.to_thread(self.compressor.encode, chunk) else: cdata = chunk cdata = ensure_bytes(cdata) return chunk_spec.prototype.buffer.from_bytes(cdata) def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: raise NotImplementedError zarr-python-3.1.5/src/zarr/codecs/blosc.py000066400000000000000000000250711511007055700205040ustar00rootroot00000000000000from __future__ import annotations import asyncio from dataclasses import dataclass, field, replace from enum import Enum from functools import cached_property from typing import TYPE_CHECKING, Final, Literal, NotRequired, TypedDict import numcodecs from numcodecs.blosc import Blosc from packaging.version import Version from zarr.abc.codec import BytesBytesCodec from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, NamedRequiredConfig, parse_enum, parse_named_configuration from zarr.core.dtype.common import HasItemSize if TYPE_CHECKING: from typing import Self from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer Shuffle = Literal["noshuffle", "shuffle", "bitshuffle"] """The shuffle values permitted for the blosc codec""" SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle") CName = Literal["lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd"] """The codec identifiers used in the blosc codec """ class BloscConfigV2(TypedDict): """Configuration for the V2 Blosc codec""" cname: CName clevel: int shuffle: int blocksize: int typesize: NotRequired[int] class BloscConfigV3(TypedDict): """Configuration for the V3 Blosc codec""" cname: CName clevel: int shuffle: Shuffle blocksize: int typesize: int class BloscJSON_V3(NamedRequiredConfig[Literal["blosc"], BloscConfigV3]): """ The JSON form of the Blosc codec in Zarr V3. """ class BloscShuffle(Enum): """ Enum for shuffle filter used by blosc. """ noshuffle = "noshuffle" shuffle = "shuffle" bitshuffle = "bitshuffle" @classmethod def from_int(cls, num: int) -> BloscShuffle: blosc_shuffle_int_to_str = { 0: "noshuffle", 1: "shuffle", 2: "bitshuffle", } if num not in blosc_shuffle_int_to_str: raise ValueError(f"Value must be between 0 and 2. Got {num}.") return BloscShuffle[blosc_shuffle_int_to_str[num]] class BloscCname(Enum): """ Enum for compression library used by blosc. """ lz4 = "lz4" lz4hc = "lz4hc" blosclz = "blosclz" zstd = "zstd" snappy = "snappy" zlib = "zlib" # See https://zarr.readthedocs.io/en/stable/user-guide/performance.html#configuring-blosc numcodecs.blosc.use_threads = False def parse_typesize(data: JSON) -> int: if isinstance(data, int): if data > 0: return data else: raise ValueError( f"Value must be greater than 0. Got {data}, which is less or equal to 0." ) raise TypeError(f"Value must be an int. Got {type(data)} instead.") # todo: real validation def parse_clevel(data: JSON) -> int: if isinstance(data, int): return data raise TypeError(f"Value should be an int. Got {type(data)} instead.") def parse_blocksize(data: JSON) -> int: if isinstance(data, int): return data raise TypeError(f"Value should be an int. Got {type(data)} instead.") @dataclass(frozen=True) class BloscCodec(BytesBytesCodec): """ Blosc compression codec for zarr. Blosc is a high-performance compressor optimized for binary data. It uses a combination of blocking, shuffling, and fast compression algorithms to achieve excellent compression ratios and speed. Attributes ---------- is_fixed_size : bool Always False for Blosc codec, as compression produces variable-sized output. typesize : int The data type size in bytes used for shuffle filtering. cname : BloscCname The compression algorithm being used (lz4, lz4hc, blosclz, snappy, zlib, or zstd). clevel : int The compression level (0-9). shuffle : BloscShuffle The shuffle filter mode (noshuffle, shuffle, or bitshuffle). blocksize : int The size of compressed blocks in bytes (0 for automatic). Parameters ---------- typesize : int, optional The data type size in bytes. This affects how the shuffle filter processes the data. If None, defaults to 1 and the attribute is marked as tunable. Default: 1. cname : BloscCname or {'lz4', 'lz4hc', 'blosclz', 'snappy', 'zlib', 'zstd'}, optional The compression algorithm to use. Default: 'zstd'. clevel : int, optional The compression level, from 0 (no compression) to 9 (maximum compression). Higher values provide better compression at the cost of speed. Default: 5. shuffle : BloscShuffle or {'noshuffle', 'shuffle', 'bitshuffle'}, optional The shuffle filter to apply before compression: - 'noshuffle': No shuffling - 'shuffle': Byte shuffling (better for typesize > 1) - 'bitshuffle': Bit shuffling (better for typesize == 1) If None, defaults to 'bitshuffle' and the attribute is marked as tunable. Default: 'bitshuffle'. blocksize : int, optional The requested size of compressed blocks in bytes. A value of 0 means automatic block size selection. Default: 0. Notes ----- **Tunable attributes**: If `typesize` or `shuffle` are set to None during initialization, they are marked as tunable attributes. This means they can be adjusted later based on the data type of the array being compressed. **Thread Safety**: This codec sets `numcodecs.blosc.use_threads = False` at module import time to avoid threading issues in asyncio contexts. Examples -------- Create a Blosc codec with default settings: >>> codec = BloscCodec() >>> codec.typesize 1 >>> codec.shuffle Create a codec with specific compression settings: >>> codec = BloscCodec(cname='zstd', clevel=9, shuffle='shuffle') >>> codec.cname See Also -------- BloscShuffle : Enum for shuffle filter options BloscCname : Enum for compression algorithm options """ # This attribute tracks parameters were set to None at init time, and thus tunable _tunable_attrs: set[Literal["typesize", "shuffle"]] = field(init=False) is_fixed_size = False typesize: int cname: BloscCname clevel: int shuffle: BloscShuffle blocksize: int def __init__( self, *, typesize: int | None = None, cname: BloscCname | CName = BloscCname.zstd, clevel: int = 5, shuffle: BloscShuffle | Shuffle | None = None, blocksize: int = 0, ) -> None: object.__setattr__(self, "_tunable_attrs", set()) # If typesize was set to None, replace it with a valid typesize # and flag the typesize attribute as safe to replace later if typesize is None: typesize = 1 self._tunable_attrs.update({"typesize"}) # If shuffle was set to None, replace it with a valid shuffle # and flag the shuffle attribute as safe to replace later if shuffle is None: shuffle = BloscShuffle.bitshuffle self._tunable_attrs.update({"shuffle"}) typesize_parsed = parse_typesize(typesize) cname_parsed = parse_enum(cname, BloscCname) clevel_parsed = parse_clevel(clevel) shuffle_parsed = parse_enum(shuffle, BloscShuffle) blocksize_parsed = parse_blocksize(blocksize) object.__setattr__(self, "typesize", typesize_parsed) object.__setattr__(self, "cname", cname_parsed) object.__setattr__(self, "clevel", clevel_parsed) object.__setattr__(self, "shuffle", shuffle_parsed) object.__setattr__(self, "blocksize", blocksize_parsed) @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "blosc") return cls(**configuration_parsed) # type: ignore[arg-type] def to_dict(self) -> dict[str, JSON]: result: BloscJSON_V3 = { "name": "blosc", "configuration": { "typesize": self.typesize, "cname": self.cname.value, "clevel": self.clevel, "shuffle": self.shuffle.value, "blocksize": self.blocksize, }, } return result # type: ignore[return-value] def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: """ Create a new codec with typesize and shuffle parameters adjusted according to the size of each element in the data type associated with array_spec. Parameters are only updated if they were set to None when self.__init__ was called. """ item_size = 1 if isinstance(array_spec.dtype, HasItemSize): item_size = array_spec.dtype.item_size new_codec = self if "typesize" in self._tunable_attrs: new_codec = replace(new_codec, typesize=item_size) if "shuffle" in self._tunable_attrs: new_codec = replace( new_codec, shuffle=(BloscShuffle.bitshuffle if item_size == 1 else BloscShuffle.shuffle), ) return new_codec @cached_property def _blosc_codec(self) -> Blosc: map_shuffle_str_to_int = { BloscShuffle.noshuffle: 0, BloscShuffle.shuffle: 1, BloscShuffle.bitshuffle: 2, } config_dict: BloscConfigV2 = { "cname": self.cname.name, # type: ignore[typeddict-item] "clevel": self.clevel, "shuffle": map_shuffle_str_to_int[self.shuffle], "blocksize": self.blocksize, } # See https://github.com/zarr-developers/numcodecs/pull/713 if Version(numcodecs.__version__) >= Version("0.16.0"): config_dict["typesize"] = self.typesize return Blosc.from_config(config_dict) async def _decode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> Buffer: return await asyncio.to_thread( as_numpy_array_wrapper, self._blosc_codec.decode, chunk_bytes, chunk_spec.prototype ) async def _encode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> Buffer | None: # Since blosc only support host memory, we convert the input and output of the encoding # between numpy array and buffer return await asyncio.to_thread( lambda chunk: chunk_spec.prototype.buffer.from_bytes( self._blosc_codec.encode(chunk.as_numpy_array()) ), chunk_bytes, ) def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: raise NotImplementedError zarr-python-3.1.5/src/zarr/codecs/bytes.py000066400000000000000000000077231511007055700205340ustar00rootroot00000000000000from __future__ import annotations import sys from dataclasses import dataclass, replace from enum import Enum from typing import TYPE_CHECKING import numpy as np from zarr.abc.codec import ArrayBytesCodec from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer from zarr.core.common import JSON, parse_enum, parse_named_configuration from zarr.core.dtype.common import HasEndianness if TYPE_CHECKING: from typing import Self from zarr.core.array_spec import ArraySpec class Endian(Enum): """ Enum for endian type used by bytes codec. """ big = "big" little = "little" default_system_endian = Endian(sys.byteorder) @dataclass(frozen=True) class BytesCodec(ArrayBytesCodec): """bytes codec""" is_fixed_size = True endian: Endian | None def __init__(self, *, endian: Endian | str | None = default_system_endian) -> None: endian_parsed = None if endian is None else parse_enum(endian, Endian) object.__setattr__(self, "endian", endian_parsed) @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration( data, "bytes", require_configuration=False ) configuration_parsed = configuration_parsed or {} return cls(**configuration_parsed) # type: ignore[arg-type] def to_dict(self) -> dict[str, JSON]: if self.endian is None: return {"name": "bytes"} else: return {"name": "bytes", "configuration": {"endian": self.endian.value}} def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: if not isinstance(array_spec.dtype, HasEndianness): if self.endian is not None: return replace(self, endian=None) elif self.endian is None: raise ValueError( "The `endian` configuration needs to be specified for multi-byte data types." ) return self async def _decode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> NDBuffer: assert isinstance(chunk_bytes, Buffer) # TODO: remove endianness enum in favor of literal union endian_str = self.endian.value if self.endian is not None else None if isinstance(chunk_spec.dtype, HasEndianness): dtype = replace(chunk_spec.dtype, endianness=endian_str).to_native_dtype() # type: ignore[call-arg] else: dtype = chunk_spec.dtype.to_native_dtype() as_array_like = chunk_bytes.as_array_like() if isinstance(as_array_like, NDArrayLike): as_nd_array_like = as_array_like else: as_nd_array_like = np.asanyarray(as_array_like) chunk_array = chunk_spec.prototype.nd_buffer.from_ndarray_like( as_nd_array_like.view(dtype=dtype) ) # ensure correct chunk shape if chunk_array.shape != chunk_spec.shape: chunk_array = chunk_array.reshape( chunk_spec.shape, ) return chunk_array async def _encode_single( self, chunk_array: NDBuffer, chunk_spec: ArraySpec, ) -> Buffer | None: assert isinstance(chunk_array, NDBuffer) if ( chunk_array.dtype.itemsize > 1 and self.endian is not None and self.endian != chunk_array.byteorder ): # type-ignore is a numpy bug # see https://github.com/numpy/numpy/issues/26473 new_dtype = chunk_array.dtype.newbyteorder(self.endian.name) # type: ignore[arg-type] chunk_array = chunk_array.astype(new_dtype) nd_array = chunk_array.as_ndarray_like() # Flatten the nd-array (only copy if needed) and reinterpret as bytes nd_array = nd_array.ravel().view(dtype="B") return chunk_spec.prototype.buffer.from_array_like(nd_array) def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: return input_byte_length zarr-python-3.1.5/src/zarr/codecs/crc32c_.py000066400000000000000000000042551511007055700206210ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass from typing import TYPE_CHECKING, cast import google_crc32c import numpy as np import typing_extensions from zarr.abc.codec import BytesBytesCodec from zarr.core.common import JSON, parse_named_configuration if TYPE_CHECKING: from typing import Self from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer @dataclass(frozen=True) class Crc32cCodec(BytesBytesCodec): """crc32c codec""" is_fixed_size = True @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: parse_named_configuration(data, "crc32c", require_configuration=False) return cls() def to_dict(self) -> dict[str, JSON]: return {"name": "crc32c"} async def _decode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> Buffer: data = chunk_bytes.as_numpy_array() crc32_bytes = data[-4:] inner_bytes = data[:-4] # Need to do a manual cast until https://github.com/numpy/numpy/issues/26783 is resolved computed_checksum = np.uint32( google_crc32c.value(cast("typing_extensions.Buffer", inner_bytes)) ).tobytes() stored_checksum = bytes(crc32_bytes) if computed_checksum != stored_checksum: raise ValueError( f"Stored and computed checksum do not match. Stored: {stored_checksum!r}. Computed: {computed_checksum!r}." ) return chunk_spec.prototype.buffer.from_array_like(inner_bytes) async def _encode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> Buffer | None: data = chunk_bytes.as_numpy_array() # Calculate the checksum and "cast" it to a numpy array checksum = np.array( [google_crc32c.value(cast("typing_extensions.Buffer", data))], dtype=np.uint32 ) # Append the checksum (as bytes) to the data return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("B"))) def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: return input_byte_length + 4 zarr-python-3.1.5/src/zarr/codecs/gzip.py000066400000000000000000000040251511007055700203470ustar00rootroot00000000000000from __future__ import annotations import asyncio from dataclasses import dataclass from typing import TYPE_CHECKING from numcodecs.gzip import GZip from zarr.abc.codec import BytesBytesCodec from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, parse_named_configuration if TYPE_CHECKING: from typing import Self from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer def parse_gzip_level(data: JSON) -> int: if not isinstance(data, (int)): raise TypeError(f"Expected int, got {type(data)}") if data not in range(10): raise ValueError( f"Expected an integer from the inclusive range (0, 9). Got {data} instead." ) return data @dataclass(frozen=True) class GzipCodec(BytesBytesCodec): """gzip codec""" is_fixed_size = False level: int = 5 def __init__(self, *, level: int = 5) -> None: level_parsed = parse_gzip_level(level) object.__setattr__(self, "level", level_parsed) @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "gzip") return cls(**configuration_parsed) # type: ignore[arg-type] def to_dict(self) -> dict[str, JSON]: return {"name": "gzip", "configuration": {"level": self.level}} async def _decode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> Buffer: return await asyncio.to_thread( as_numpy_array_wrapper, GZip(self.level).decode, chunk_bytes, chunk_spec.prototype ) async def _encode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> Buffer | None: return await asyncio.to_thread( as_numpy_array_wrapper, GZip(self.level).encode, chunk_bytes, chunk_spec.prototype ) def compute_encoded_size( self, _input_byte_length: int, _chunk_spec: ArraySpec, ) -> int: raise NotImplementedError zarr-python-3.1.5/src/zarr/codecs/numcodecs/000077500000000000000000000000001511007055700210035ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/codecs/numcodecs/__init__.py000066400000000000000000000016051511007055700231160ustar00rootroot00000000000000from __future__ import annotations from zarr.codecs.numcodecs._codecs import ( BZ2, CRC32, CRC32C, LZ4, LZMA, ZFPY, Adler32, AsType, BitRound, Blosc, Delta, FixedScaleOffset, Fletcher32, GZip, JenkinsLookup3, PackBits, PCodec, Quantize, Shuffle, Zlib, Zstd, _NumcodecsArrayArrayCodec, _NumcodecsArrayBytesCodec, _NumcodecsBytesBytesCodec, _NumcodecsCodec, ) __all__ = [ "BZ2", "CRC32", "CRC32C", "LZ4", "LZMA", "ZFPY", "Adler32", "AsType", "BitRound", "Blosc", "Delta", "FixedScaleOffset", "Fletcher32", "GZip", "JenkinsLookup3", "PCodec", "PackBits", "Quantize", "Shuffle", "Zlib", "Zstd", "_NumcodecsArrayArrayCodec", "_NumcodecsArrayBytesCodec", "_NumcodecsBytesBytesCodec", "_NumcodecsCodec", ] zarr-python-3.1.5/src/zarr/codecs/numcodecs/_codecs.py000066400000000000000000000272611511007055700227640ustar00rootroot00000000000000""" This module provides compatibility for [numcodecs][] in Zarr version 3. These codecs were previously defined in [numcodecs][], and have now been moved to `zarr`. ```python import numpy as np import zarr import zarr.codecs.numcodecs as numcodecs array = zarr.create_array( store="data_numcodecs.zarr", shape=(1024, 1024), chunks=(64, 64), dtype="uint32", filters=[numcodecs.Delta(dtype="uint32")], compressors=[numcodecs.BZ2(level=5)], overwrite=True) array[:] = np.arange(np.prod(array.shape), dtype=array.dtype).reshape(*array.shape) ``` !!! note Please note that the codecs in [zarr.codecs.numcodecs][] are not part of the Zarr version 3 specification. Using these codecs might cause interoperability issues with other Zarr implementations. """ from __future__ import annotations import asyncio import math from dataclasses import dataclass, replace from functools import cached_property from typing import TYPE_CHECKING, Any, Self from warnings import warn import numpy as np from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec from zarr.abc.metadata import Metadata from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, parse_named_configuration, product from zarr.dtype import UInt8, ZDType, parse_dtype from zarr.errors import ZarrUserWarning from zarr.registry import get_numcodec if TYPE_CHECKING: from zarr.abc.numcodec import Numcodec from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer CODEC_PREFIX = "numcodecs." def _expect_name_prefix(codec_name: str) -> str: if not codec_name.startswith(CODEC_PREFIX): raise ValueError( f"Expected name to start with '{CODEC_PREFIX}'. Got {codec_name} instead." ) # pragma: no cover return codec_name.removeprefix(CODEC_PREFIX) def _parse_codec_configuration(data: dict[str, JSON]) -> dict[str, JSON]: parsed_name, parsed_configuration = parse_named_configuration(data) if not parsed_name.startswith(CODEC_PREFIX): raise ValueError( f"Expected name to start with '{CODEC_PREFIX}'. Got {parsed_name} instead." ) # pragma: no cover id = _expect_name_prefix(parsed_name) return {"id": id, **parsed_configuration} @dataclass(frozen=True) class _NumcodecsCodec(Metadata): codec_name: str codec_config: dict[str, JSON] def __init_subclass__(cls, *, codec_name: str | None = None, **kwargs: Any) -> None: """To be used only when creating the actual public-facing codec class.""" super().__init_subclass__(**kwargs) if codec_name is not None: namespace = codec_name cls_name = f"{CODEC_PREFIX}{namespace}.{cls.__name__}" cls.codec_name = f"{CODEC_PREFIX}{namespace}" cls.__doc__ = f""" See [{cls_name}][] for more details and parameters. """ def __init__(self, **codec_config: JSON) -> None: if not self.codec_name: raise ValueError( "The codec name needs to be supplied through the `codec_name` attribute." ) # pragma: no cover unprefixed_codec_name = _expect_name_prefix(self.codec_name) if "id" not in codec_config: codec_config = {"id": unprefixed_codec_name, **codec_config} elif codec_config["id"] != unprefixed_codec_name: raise ValueError( f"Codec id does not match {unprefixed_codec_name}. Got: {codec_config['id']}." ) # pragma: no cover object.__setattr__(self, "codec_config", codec_config) warn( "Numcodecs codecs are not in the Zarr version 3 specification and " "may not be supported by other zarr implementations.", category=ZarrUserWarning, stacklevel=2, ) @cached_property def _codec(self) -> Numcodec: return get_numcodec(self.codec_config) # type: ignore[arg-type] @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: codec_config = _parse_codec_configuration(data) return cls(**codec_config) def to_dict(self) -> dict[str, JSON]: codec_config = self.codec_config.copy() codec_config.pop("id", None) return { "name": self.codec_name, "configuration": codec_config, } def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: raise NotImplementedError # pragma: no cover # Override __repr__ because dynamically constructed classes don't seem to work otherwise def __repr__(self) -> str: codec_config = self.codec_config.copy() codec_config.pop("id", None) return f"{self.__class__.__name__}(codec_name={self.codec_name!r}, codec_config={codec_config!r})" class _NumcodecsBytesBytesCodec(_NumcodecsCodec, BytesBytesCodec): def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer: return await asyncio.to_thread( as_numpy_array_wrapper, self._codec.decode, chunk_data, chunk_spec.prototype, ) def _encode(self, chunk_data: Buffer, prototype: BufferPrototype) -> Buffer: encoded = self._codec.encode(chunk_data.as_array_like()) if isinstance(encoded, np.ndarray): # Required for checksum codecs return prototype.buffer.from_bytes(encoded.tobytes()) return prototype.buffer.from_bytes(encoded) async def _encode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> Buffer: return await asyncio.to_thread(self._encode, chunk_data, chunk_spec.prototype) class _NumcodecsArrayArrayCodec(_NumcodecsCodec, ArrayArrayCodec): def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) async def _decode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: chunk_ndarray = chunk_data.as_ndarray_like() out = await asyncio.to_thread(self._codec.decode, chunk_ndarray) return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape)) async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> NDBuffer: chunk_ndarray = chunk_data.as_ndarray_like() out = await asyncio.to_thread(self._codec.encode, chunk_ndarray) return chunk_spec.prototype.nd_buffer.from_ndarray_like(out) class _NumcodecsArrayBytesCodec(_NumcodecsCodec, ArrayBytesCodec): def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) async def _decode_single(self, chunk_data: Buffer, chunk_spec: ArraySpec) -> NDBuffer: chunk_bytes = chunk_data.to_bytes() out = await asyncio.to_thread(self._codec.decode, chunk_bytes) return chunk_spec.prototype.nd_buffer.from_ndarray_like(out.reshape(chunk_spec.shape)) async def _encode_single(self, chunk_data: NDBuffer, chunk_spec: ArraySpec) -> Buffer: chunk_ndarray = chunk_data.as_ndarray_like() out = await asyncio.to_thread(self._codec.encode, chunk_ndarray) return chunk_spec.prototype.buffer.from_bytes(out) # bytes-to-bytes codecs class Blosc(_NumcodecsBytesBytesCodec, codec_name="blosc"): pass class LZ4(_NumcodecsBytesBytesCodec, codec_name="lz4"): pass class Zstd(_NumcodecsBytesBytesCodec, codec_name="zstd"): pass class Zlib(_NumcodecsBytesBytesCodec, codec_name="zlib"): pass class GZip(_NumcodecsBytesBytesCodec, codec_name="gzip"): pass class BZ2(_NumcodecsBytesBytesCodec, codec_name="bz2"): pass class LZMA(_NumcodecsBytesBytesCodec, codec_name="lzma"): pass class Shuffle(_NumcodecsBytesBytesCodec, codec_name="shuffle"): def evolve_from_array_spec(self, array_spec: ArraySpec) -> Shuffle: if self.codec_config.get("elementsize") is None: dtype = array_spec.dtype.to_native_dtype() return Shuffle(**{**self.codec_config, "elementsize": dtype.itemsize}) return self # pragma: no cover # array-to-array codecs ("filters") class Delta(_NumcodecsArrayArrayCodec, codec_name="delta"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): dtype = parse_dtype(np.dtype(astype), zarr_format=3) # type: ignore[call-overload] return replace(chunk_spec, dtype=dtype) return chunk_spec class BitRound(_NumcodecsArrayArrayCodec, codec_name="bitround"): pass class FixedScaleOffset(_NumcodecsArrayArrayCodec, codec_name="fixedscaleoffset"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: if astype := self.codec_config.get("astype"): dtype = parse_dtype(np.dtype(astype), zarr_format=3) # type: ignore[call-overload] return replace(chunk_spec, dtype=dtype) return chunk_spec def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: if self.codec_config.get("dtype") is None: dtype = array_spec.dtype.to_native_dtype() return FixedScaleOffset(**{**self.codec_config, "dtype": str(dtype)}) return self class Quantize(_NumcodecsArrayArrayCodec, codec_name="quantize"): def __init__(self, **codec_config: JSON) -> None: super().__init__(**codec_config) def evolve_from_array_spec(self, array_spec: ArraySpec) -> Quantize: if self.codec_config.get("dtype") is None: dtype = array_spec.dtype.to_native_dtype() return Quantize(**{**self.codec_config, "dtype": str(dtype)}) return self class PackBits(_NumcodecsArrayArrayCodec, codec_name="packbits"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return replace( chunk_spec, shape=(1 + math.ceil(product(chunk_spec.shape) / 8),), dtype=UInt8(), ) # todo: remove this type: ignore when this class can be defined w.r.t. # a single zarr dtype API def validate(self, *, dtype: ZDType[Any, Any], **_kwargs: Any) -> None: # this is bugged and will fail _dtype = dtype.to_native_dtype() if _dtype != np.dtype("bool"): raise ValueError(f"Packbits filter requires bool dtype. Got {dtype}.") class AsType(_NumcodecsArrayArrayCodec, codec_name="astype"): def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: dtype = parse_dtype(np.dtype(self.codec_config["encode_dtype"]), zarr_format=3) # type: ignore[arg-type] return replace(chunk_spec, dtype=dtype) def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: if self.codec_config.get("decode_dtype") is None: # TODO: remove these coverage exemptions the correct way, i.e. with tests dtype = array_spec.dtype.to_native_dtype() # pragma: no cover return AsType(**{**self.codec_config, "decode_dtype": str(dtype)}) # pragma: no cover return self # bytes-to-bytes checksum codecs class _NumcodecsChecksumCodec(_NumcodecsBytesBytesCodec): def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: return input_byte_length + 4 # pragma: no cover class CRC32(_NumcodecsChecksumCodec, codec_name="crc32"): pass class CRC32C(_NumcodecsChecksumCodec, codec_name="crc32c"): pass class Adler32(_NumcodecsChecksumCodec, codec_name="adler32"): pass class Fletcher32(_NumcodecsChecksumCodec, codec_name="fletcher32"): pass class JenkinsLookup3(_NumcodecsChecksumCodec, codec_name="jenkins_lookup3"): pass # array-to-bytes codecs class PCodec(_NumcodecsArrayBytesCodec, codec_name="pcodec"): pass class ZFPY(_NumcodecsArrayBytesCodec, codec_name="zfpy"): pass zarr-python-3.1.5/src/zarr/codecs/sharding.py000066400000000000000000000613061511007055700212020ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import dataclass, replace from enum import Enum from functools import lru_cache from operator import itemgetter from typing import TYPE_CHECKING, Any, NamedTuple, cast import numpy as np import numpy.typing as npt from zarr.abc.codec import ( ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin, Codec, CodecPipeline, ) from zarr.abc.store import ( ByteGetter, ByteRequest, ByteSetter, RangeByteRequest, SuffixByteRequest, ) from zarr.codecs.bytes import BytesCodec from zarr.codecs.crc32c_ import Crc32cCodec from zarr.core.array_spec import ArrayConfig, ArraySpec from zarr.core.buffer import ( Buffer, BufferPrototype, NDBuffer, default_buffer_prototype, numpy_buffer_prototype, ) from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid from zarr.core.common import ( ShapeLike, parse_enum, parse_named_configuration, parse_shapelike, product, ) from zarr.core.dtype.npy.int import UInt64 from zarr.core.indexing import ( BasicIndexer, SelectorTuple, c_order_iter, get_indexer, morton_order_iter, ) from zarr.core.metadata.v3 import parse_codecs from zarr.registry import get_ndbuffer_class, get_pipeline_class if TYPE_CHECKING: from collections.abc import Iterator from typing import Self from zarr.core.common import JSON from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType MAX_UINT_64 = 2**64 - 1 ShardMapping = Mapping[tuple[int, ...], Buffer | None] ShardMutableMapping = MutableMapping[tuple[int, ...], Buffer | None] class ShardingCodecIndexLocation(Enum): """ Enum for index location used by the sharding codec. """ start = "start" end = "end" def parse_index_location(data: object) -> ShardingCodecIndexLocation: return parse_enum(data, ShardingCodecIndexLocation) @dataclass(frozen=True) class _ShardingByteGetter(ByteGetter): shard_dict: ShardMapping chunk_coords: tuple[int, ...] async def get( self, prototype: BufferPrototype, byte_range: ByteRequest | None = None ) -> Buffer | None: assert byte_range is None, "byte_range is not supported within shards" assert prototype == default_buffer_prototype(), ( f"prototype is not supported within shards currently. diff: {prototype} != {default_buffer_prototype()}" ) return self.shard_dict.get(self.chunk_coords) @dataclass(frozen=True) class _ShardingByteSetter(_ShardingByteGetter, ByteSetter): shard_dict: ShardMutableMapping async def set(self, value: Buffer, byte_range: ByteRequest | None = None) -> None: assert byte_range is None, "byte_range is not supported within shards" self.shard_dict[self.chunk_coords] = value async def delete(self) -> None: del self.shard_dict[self.chunk_coords] async def set_if_not_exists(self, default: Buffer) -> None: self.shard_dict.setdefault(self.chunk_coords, default) class _ShardIndex(NamedTuple): # dtype uint64, shape (chunks_per_shard_0, chunks_per_shard_1, ..., 2) offsets_and_lengths: npt.NDArray[np.uint64] @property def chunks_per_shard(self) -> tuple[int, ...]: result = tuple(self.offsets_and_lengths.shape[0:-1]) # The cast is required until https://github.com/numpy/numpy/pull/27211 is merged return cast("tuple[int, ...]", result) def _localize_chunk(self, chunk_coords: tuple[int, ...]) -> tuple[int, ...]: return tuple( chunk_i % shard_i for chunk_i, shard_i in zip(chunk_coords, self.offsets_and_lengths.shape, strict=False) ) def is_all_empty(self) -> bool: return bool(np.array_equiv(self.offsets_and_lengths, MAX_UINT_64)) def get_full_chunk_map(self) -> npt.NDArray[np.bool_]: return np.not_equal(self.offsets_and_lengths[..., 0], MAX_UINT_64) def get_chunk_slice(self, chunk_coords: tuple[int, ...]) -> tuple[int, int] | None: localized_chunk = self._localize_chunk(chunk_coords) chunk_start, chunk_len = self.offsets_and_lengths[localized_chunk] if (chunk_start, chunk_len) == (MAX_UINT_64, MAX_UINT_64): return None else: return (int(chunk_start), int(chunk_start + chunk_len)) def set_chunk_slice(self, chunk_coords: tuple[int, ...], chunk_slice: slice | None) -> None: localized_chunk = self._localize_chunk(chunk_coords) if chunk_slice is None: self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64) else: self.offsets_and_lengths[localized_chunk] = ( chunk_slice.start, chunk_slice.stop - chunk_slice.start, ) def is_dense(self, chunk_byte_length: int) -> bool: sorted_offsets_and_lengths = sorted( [ (offset, length) for offset, length in self.offsets_and_lengths if offset != MAX_UINT_64 ], key=itemgetter(0), ) # Are all non-empty offsets unique? if len( {offset for offset, _ in sorted_offsets_and_lengths if offset != MAX_UINT_64} ) != len(sorted_offsets_and_lengths): return False return all( offset % chunk_byte_length == 0 and length == chunk_byte_length for offset, length in sorted_offsets_and_lengths ) @classmethod def create_empty(cls, chunks_per_shard: tuple[int, ...]) -> _ShardIndex: offsets_and_lengths = np.zeros(chunks_per_shard + (2,), dtype=" _ShardReader: shard_index_size = codec._shard_index_size(chunks_per_shard) obj = cls() obj.buf = buf if codec.index_location == ShardingCodecIndexLocation.start: shard_index_bytes = obj.buf[:shard_index_size] else: shard_index_bytes = obj.buf[-shard_index_size:] obj.index = await codec._decode_shard_index(shard_index_bytes, chunks_per_shard) return obj @classmethod def create_empty( cls, chunks_per_shard: tuple[int, ...], buffer_prototype: BufferPrototype | None = None ) -> _ShardReader: if buffer_prototype is None: buffer_prototype = default_buffer_prototype() index = _ShardIndex.create_empty(chunks_per_shard) obj = cls() obj.buf = buffer_prototype.buffer.create_zero_length() obj.index = index return obj def __getitem__(self, chunk_coords: tuple[int, ...]) -> Buffer: chunk_byte_slice = self.index.get_chunk_slice(chunk_coords) if chunk_byte_slice: return self.buf[chunk_byte_slice[0] : chunk_byte_slice[1]] raise KeyError def __len__(self) -> int: return int(self.index.offsets_and_lengths.size / 2) def __iter__(self) -> Iterator[tuple[int, ...]]: return c_order_iter(self.index.offsets_and_lengths.shape[:-1]) @dataclass(frozen=True) class ShardingCodec( ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin ): """Sharding codec""" chunk_shape: tuple[int, ...] codecs: tuple[Codec, ...] index_codecs: tuple[Codec, ...] index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end def __init__( self, *, chunk_shape: ShapeLike, codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),), index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()), index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end, ) -> None: chunk_shape_parsed = parse_shapelike(chunk_shape) codecs_parsed = parse_codecs(codecs) index_codecs_parsed = parse_codecs(index_codecs) index_location_parsed = parse_index_location(index_location) object.__setattr__(self, "chunk_shape", chunk_shape_parsed) object.__setattr__(self, "codecs", codecs_parsed) object.__setattr__(self, "index_codecs", index_codecs_parsed) object.__setattr__(self, "index_location", index_location_parsed) # Use instance-local lru_cache to avoid memory leaks # numpy void scalars are not hashable, which means an array spec with a fill value that is # a numpy void scalar will break the lru_cache. This is commented for now but should be # fixed. See https://github.com/zarr-developers/zarr-python/issues/3054 # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) # todo: typedict return type def __getstate__(self) -> dict[str, Any]: return self.to_dict() def __setstate__(self, state: dict[str, Any]) -> None: config = state["configuration"] object.__setattr__(self, "chunk_shape", parse_shapelike(config["chunk_shape"])) object.__setattr__(self, "codecs", parse_codecs(config["codecs"])) object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"])) object.__setattr__(self, "index_location", parse_index_location(config["index_location"])) # Use instance-local lru_cache to avoid memory leaks # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec)) object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard)) @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "sharding_indexed") return cls(**configuration_parsed) # type: ignore[arg-type] @property def codec_pipeline(self) -> CodecPipeline: return get_pipeline_class().from_codecs(self.codecs) def to_dict(self) -> dict[str, JSON]: return { "name": "sharding_indexed", "configuration": { "chunk_shape": self.chunk_shape, "codecs": tuple(s.to_dict() for s in self.codecs), "index_codecs": tuple(s.to_dict() for s in self.index_codecs), "index_location": self.index_location.value, }, } def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: shard_spec = self._get_chunk_spec(array_spec) evolved_codecs = tuple(c.evolve_from_array_spec(array_spec=shard_spec) for c in self.codecs) if evolved_codecs != self.codecs: return replace(self, codecs=evolved_codecs) return self def validate( self, *, shape: tuple[int, ...], dtype: ZDType[TBaseDType, TBaseScalar], chunk_grid: ChunkGrid, ) -> None: if len(self.chunk_shape) != len(shape): raise ValueError( "The shard's `chunk_shape` and array's `shape` need to have the same number of dimensions." ) if not isinstance(chunk_grid, RegularChunkGrid): raise TypeError("Sharding is only compatible with regular chunk grids.") if not all( s % c == 0 for s, c in zip( chunk_grid.chunk_shape, self.chunk_shape, strict=False, ) ): raise ValueError( f"The array's `chunk_shape` (got {chunk_grid.chunk_shape}) " f"needs to be divisible by the shard's inner `chunk_shape` (got {self.chunk_shape})." ) async def _decode_single( self, shard_bytes: Buffer, shard_spec: ArraySpec, ) -> NDBuffer: shard_shape = shard_spec.shape chunk_shape = self.chunk_shape chunks_per_shard = self._get_chunks_per_shard(shard_spec) chunk_spec = self._get_chunk_spec(shard_spec) indexer = BasicIndexer( tuple(slice(0, s) for s in shard_shape), shape=shard_shape, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), ) # setup output array out = chunk_spec.prototype.nd_buffer.empty( shape=shard_shape, dtype=shard_spec.dtype.to_native_dtype(), order=shard_spec.order, ) shard_dict = await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard) if shard_dict.index.is_all_empty(): out.fill(shard_spec.fill_value) return out # decoding chunks and writing them into the output buffer await self.codec_pipeline.read( [ ( _ShardingByteGetter(shard_dict, chunk_coords), chunk_spec, chunk_selection, out_selection, is_complete_shard, ) for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer ], out, ) return out async def _decode_partial_single( self, byte_getter: ByteGetter, selection: SelectorTuple, shard_spec: ArraySpec, ) -> NDBuffer | None: shard_shape = shard_spec.shape chunk_shape = self.chunk_shape chunks_per_shard = self._get_chunks_per_shard(shard_spec) chunk_spec = self._get_chunk_spec(shard_spec) indexer = get_indexer( selection, shape=shard_shape, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), ) # setup output array out = shard_spec.prototype.nd_buffer.empty( shape=indexer.shape, dtype=shard_spec.dtype.to_native_dtype(), order=shard_spec.order, ) indexed_chunks = list(indexer) all_chunk_coords = {chunk_coords for chunk_coords, *_ in indexed_chunks} # reading bytes of all requested chunks shard_dict: ShardMapping = {} if self._is_total_shard(all_chunk_coords, chunks_per_shard): # read entire shard shard_dict_maybe = await self._load_full_shard_maybe( byte_getter=byte_getter, prototype=chunk_spec.prototype, chunks_per_shard=chunks_per_shard, ) if shard_dict_maybe is None: return None shard_dict = shard_dict_maybe else: # read some chunks within the shard shard_index = await self._load_shard_index_maybe(byte_getter, chunks_per_shard) if shard_index is None: return None shard_dict = {} for chunk_coords in all_chunk_coords: chunk_byte_slice = shard_index.get_chunk_slice(chunk_coords) if chunk_byte_slice: chunk_bytes = await byte_getter.get( prototype=chunk_spec.prototype, byte_range=RangeByteRequest(chunk_byte_slice[0], chunk_byte_slice[1]), ) if chunk_bytes: shard_dict[chunk_coords] = chunk_bytes # decoding chunks and writing them into the output buffer await self.codec_pipeline.read( [ ( _ShardingByteGetter(shard_dict, chunk_coords), chunk_spec, chunk_selection, out_selection, is_complete_shard, ) for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer ], out, ) if hasattr(indexer, "sel_shape"): return out.reshape(indexer.sel_shape) else: return out async def _encode_single( self, shard_array: NDBuffer, shard_spec: ArraySpec, ) -> Buffer | None: shard_shape = shard_spec.shape chunk_shape = self.chunk_shape chunks_per_shard = self._get_chunks_per_shard(shard_spec) chunk_spec = self._get_chunk_spec(shard_spec) indexer = list( BasicIndexer( tuple(slice(0, s) for s in shard_shape), shape=shard_shape, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), ) ) shard_builder = dict.fromkeys(morton_order_iter(chunks_per_shard)) await self.codec_pipeline.write( [ ( _ShardingByteSetter(shard_builder, chunk_coords), chunk_spec, chunk_selection, out_selection, is_complete_shard, ) for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer ], shard_array, ) return await self._encode_shard_dict( shard_builder, chunks_per_shard=chunks_per_shard, buffer_prototype=default_buffer_prototype(), ) async def _encode_partial_single( self, byte_setter: ByteSetter, shard_array: NDBuffer, selection: SelectorTuple, shard_spec: ArraySpec, ) -> None: shard_shape = shard_spec.shape chunk_shape = self.chunk_shape chunks_per_shard = self._get_chunks_per_shard(shard_spec) chunk_spec = self._get_chunk_spec(shard_spec) shard_reader = await self._load_full_shard_maybe( byte_getter=byte_setter, prototype=chunk_spec.prototype, chunks_per_shard=chunks_per_shard, ) shard_reader = shard_reader or _ShardReader.create_empty(chunks_per_shard) shard_dict = {k: shard_reader.get(k) for k in morton_order_iter(chunks_per_shard)} indexer = list( get_indexer( selection, shape=shard_shape, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape) ) ) await self.codec_pipeline.write( [ ( _ShardingByteSetter(shard_dict, chunk_coords), chunk_spec, chunk_selection, out_selection, is_complete_shard, ) for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer ], shard_array, ) buf = await self._encode_shard_dict( shard_dict, chunks_per_shard=chunks_per_shard, buffer_prototype=default_buffer_prototype(), ) if buf is None: await byte_setter.delete() else: await byte_setter.set(buf) async def _encode_shard_dict( self, map: ShardMapping, chunks_per_shard: tuple[int, ...], buffer_prototype: BufferPrototype, ) -> Buffer | None: index = _ShardIndex.create_empty(chunks_per_shard) buffers = [] template = buffer_prototype.buffer.create_zero_length() chunk_start = 0 for chunk_coords in morton_order_iter(chunks_per_shard): value = map.get(chunk_coords) if value is None: continue if len(value) == 0: continue chunk_length = len(value) buffers.append(value) index.set_chunk_slice(chunk_coords, slice(chunk_start, chunk_start + chunk_length)) chunk_start += chunk_length if len(buffers) == 0: return None index_bytes = await self._encode_shard_index(index) if self.index_location == ShardingCodecIndexLocation.start: empty_chunks_mask = index.offsets_and_lengths[..., 0] == MAX_UINT_64 index.offsets_and_lengths[~empty_chunks_mask, 0] += len(index_bytes) index_bytes = await self._encode_shard_index( index ) # encode again with corrected offsets buffers.insert(0, index_bytes) else: buffers.append(index_bytes) return template.combine(buffers) def _is_total_shard( self, all_chunk_coords: set[tuple[int, ...]], chunks_per_shard: tuple[int, ...] ) -> bool: return len(all_chunk_coords) == product(chunks_per_shard) and all( chunk_coords in all_chunk_coords for chunk_coords in c_order_iter(chunks_per_shard) ) async def _decode_shard_index( self, index_bytes: Buffer, chunks_per_shard: tuple[int, ...] ) -> _ShardIndex: index_array = next( iter( await get_pipeline_class() .from_codecs(self.index_codecs) .decode( [(index_bytes, self._get_index_chunk_spec(chunks_per_shard))], ) ) ) assert index_array is not None return _ShardIndex(index_array.as_numpy_array()) async def _encode_shard_index(self, index: _ShardIndex) -> Buffer: index_bytes = next( iter( await get_pipeline_class() .from_codecs(self.index_codecs) .encode( [ ( get_ndbuffer_class().from_numpy_array(index.offsets_and_lengths), self._get_index_chunk_spec(index.chunks_per_shard), ) ], ) ) ) assert index_bytes is not None assert isinstance(index_bytes, Buffer) return index_bytes def _shard_index_size(self, chunks_per_shard: tuple[int, ...]) -> int: return ( get_pipeline_class() .from_codecs(self.index_codecs) .compute_encoded_size( 16 * product(chunks_per_shard), self._get_index_chunk_spec(chunks_per_shard) ) ) def _get_index_chunk_spec(self, chunks_per_shard: tuple[int, ...]) -> ArraySpec: return ArraySpec( shape=chunks_per_shard + (2,), dtype=UInt64(endianness="little"), fill_value=MAX_UINT_64, config=ArrayConfig( order="C", write_empty_chunks=False ), # Note: this is hard-coded for simplicity -- it is not surfaced into user code, prototype=default_buffer_prototype(), ) def _get_chunk_spec(self, shard_spec: ArraySpec) -> ArraySpec: return ArraySpec( shape=self.chunk_shape, dtype=shard_spec.dtype, fill_value=shard_spec.fill_value, config=shard_spec.config, prototype=shard_spec.prototype, ) def _get_chunks_per_shard(self, shard_spec: ArraySpec) -> tuple[int, ...]: return tuple( s // c for s, c in zip( shard_spec.shape, self.chunk_shape, strict=False, ) ) async def _load_shard_index_maybe( self, byte_getter: ByteGetter, chunks_per_shard: tuple[int, ...] ) -> _ShardIndex | None: shard_index_size = self._shard_index_size(chunks_per_shard) if self.index_location == ShardingCodecIndexLocation.start: index_bytes = await byte_getter.get( prototype=numpy_buffer_prototype(), byte_range=RangeByteRequest(0, shard_index_size), ) else: index_bytes = await byte_getter.get( prototype=numpy_buffer_prototype(), byte_range=SuffixByteRequest(shard_index_size) ) if index_bytes is not None: return await self._decode_shard_index(index_bytes, chunks_per_shard) return None async def _load_shard_index( self, byte_getter: ByteGetter, chunks_per_shard: tuple[int, ...] ) -> _ShardIndex: return ( await self._load_shard_index_maybe(byte_getter, chunks_per_shard) ) or _ShardIndex.create_empty(chunks_per_shard) async def _load_full_shard_maybe( self, byte_getter: ByteGetter, prototype: BufferPrototype, chunks_per_shard: tuple[int, ...] ) -> _ShardReader | None: shard_bytes = await byte_getter.get(prototype=prototype) return ( await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard) if shard_bytes else None ) def compute_encoded_size(self, input_byte_length: int, shard_spec: ArraySpec) -> int: chunks_per_shard = self._get_chunks_per_shard(shard_spec) return input_byte_length + self._shard_index_size(chunks_per_shard) zarr-python-3.1.5/src/zarr/codecs/transpose.py000066400000000000000000000077571511007055700214330ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Iterable from dataclasses import dataclass, replace from typing import TYPE_CHECKING, cast import numpy as np from zarr.abc.codec import ArrayArrayCodec from zarr.core.array_spec import ArraySpec from zarr.core.common import JSON, parse_named_configuration if TYPE_CHECKING: from typing import Self from zarr.core.buffer import NDBuffer from zarr.core.chunk_grids import ChunkGrid from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType def parse_transpose_order(data: JSON | Iterable[int]) -> tuple[int, ...]: if not isinstance(data, Iterable): raise TypeError(f"Expected an iterable. Got {data} instead.") if not all(isinstance(a, int) for a in data): raise TypeError(f"Expected an iterable of integers. Got {data} instead.") return tuple(cast("Iterable[int]", data)) @dataclass(frozen=True) class TransposeCodec(ArrayArrayCodec): """Transpose codec""" is_fixed_size = True order: tuple[int, ...] def __init__(self, *, order: Iterable[int]) -> None: order_parsed = parse_transpose_order(order) object.__setattr__(self, "order", order_parsed) @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "transpose") return cls(**configuration_parsed) # type: ignore[arg-type] def to_dict(self) -> dict[str, JSON]: return {"name": "transpose", "configuration": {"order": tuple(self.order)}} def validate( self, shape: tuple[int, ...], dtype: ZDType[TBaseDType, TBaseScalar], chunk_grid: ChunkGrid, ) -> None: if len(self.order) != len(shape): raise ValueError( f"The `order` tuple must have as many entries as there are dimensions in the array. Got {self.order}." ) if len(self.order) != len(set(self.order)): raise ValueError( f"There must not be duplicates in the `order` tuple. Got {self.order}." ) if not all(0 <= x < len(shape) for x in self.order): raise ValueError( f"All entries in the `order` tuple must be between 0 and the number of dimensions in the array. Got {self.order}." ) def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: ndim = array_spec.ndim if len(self.order) != ndim: raise ValueError( f"The `order` tuple must have as many entries as there are dimensions in the array. Got {self.order}." ) if len(self.order) != len(set(self.order)): raise ValueError( f"There must not be duplicates in the `order` tuple. Got {self.order}." ) if not all(0 <= x < ndim for x in self.order): raise ValueError( f"All entries in the `order` tuple must be between 0 and the number of dimensions in the array. Got {self.order}." ) order = tuple(self.order) if order != self.order: return replace(self, order=order) return self def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: return ArraySpec( shape=tuple(chunk_spec.shape[self.order[i]] for i in range(chunk_spec.ndim)), dtype=chunk_spec.dtype, fill_value=chunk_spec.fill_value, config=chunk_spec.config, prototype=chunk_spec.prototype, ) async def _decode_single( self, chunk_array: NDBuffer, chunk_spec: ArraySpec, ) -> NDBuffer: inverse_order = np.argsort(self.order) return chunk_array.transpose(inverse_order) async def _encode_single( self, chunk_array: NDBuffer, _chunk_spec: ArraySpec, ) -> NDBuffer | None: return chunk_array.transpose(self.order) def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: return input_byte_length zarr-python-3.1.5/src/zarr/codecs/vlen_utf8.py000066400000000000000000000073261511007055700213170ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass from typing import TYPE_CHECKING import numpy as np from numcodecs.vlen import VLenBytes, VLenUTF8 from zarr.abc.codec import ArrayBytesCodec from zarr.core.buffer import Buffer, NDBuffer from zarr.core.common import JSON, parse_named_configuration if TYPE_CHECKING: from typing import Self from zarr.core.array_spec import ArraySpec # can use a global because there are no parameters _vlen_utf8_codec = VLenUTF8() _vlen_bytes_codec = VLenBytes() @dataclass(frozen=True) class VLenUTF8Codec(ArrayBytesCodec): """Variable-length UTF8 codec""" @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration( data, "vlen-utf8", require_configuration=False ) configuration_parsed = configuration_parsed or {} return cls(**configuration_parsed) def to_dict(self) -> dict[str, JSON]: return {"name": "vlen-utf8", "configuration": {}} def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: return self # TODO: expand the tests for this function async def _decode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> NDBuffer: assert isinstance(chunk_bytes, Buffer) raw_bytes = chunk_bytes.as_array_like() decoded = _vlen_utf8_codec.decode(raw_bytes) assert decoded.dtype == np.object_ decoded.shape = chunk_spec.shape as_string_dtype = decoded.astype(chunk_spec.dtype.to_native_dtype(), copy=False) return chunk_spec.prototype.nd_buffer.from_numpy_array(as_string_dtype) async def _encode_single( self, chunk_array: NDBuffer, chunk_spec: ArraySpec, ) -> Buffer | None: assert isinstance(chunk_array, NDBuffer) return chunk_spec.prototype.buffer.from_bytes( _vlen_utf8_codec.encode(chunk_array.as_numpy_array()) ) def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: # what is input_byte_length for an object dtype? raise NotImplementedError("compute_encoded_size is not implemented for VLen codecs") @dataclass(frozen=True) class VLenBytesCodec(ArrayBytesCodec): @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration( data, "vlen-bytes", require_configuration=False ) configuration_parsed = configuration_parsed or {} return cls(**configuration_parsed) def to_dict(self) -> dict[str, JSON]: return {"name": "vlen-bytes", "configuration": {}} def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: return self async def _decode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> NDBuffer: assert isinstance(chunk_bytes, Buffer) raw_bytes = chunk_bytes.as_array_like() decoded = _vlen_bytes_codec.decode(raw_bytes) assert decoded.dtype == np.object_ decoded.shape = chunk_spec.shape return chunk_spec.prototype.nd_buffer.from_numpy_array(decoded) async def _encode_single( self, chunk_array: NDBuffer, chunk_spec: ArraySpec, ) -> Buffer | None: assert isinstance(chunk_array, NDBuffer) return chunk_spec.prototype.buffer.from_bytes( _vlen_bytes_codec.encode(chunk_array.as_numpy_array()) ) def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: # what is input_byte_length for an object dtype? raise NotImplementedError("compute_encoded_size is not implemented for VLen codecs") zarr-python-3.1.5/src/zarr/codecs/zstd.py000066400000000000000000000057101511007055700203640ustar00rootroot00000000000000from __future__ import annotations import asyncio from dataclasses import dataclass from functools import cached_property from typing import TYPE_CHECKING import numcodecs from numcodecs.zstd import Zstd from packaging.version import Version from zarr.abc.codec import BytesBytesCodec from zarr.core.buffer.cpu import as_numpy_array_wrapper from zarr.core.common import JSON, parse_named_configuration if TYPE_CHECKING: from typing import Self from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer def parse_zstd_level(data: JSON) -> int: if isinstance(data, int): if data >= 23: raise ValueError(f"Value must be less than or equal to 22. Got {data} instead.") return data raise TypeError(f"Got value with type {type(data)}, but expected an int.") def parse_checksum(data: JSON) -> bool: if isinstance(data, bool): return data raise TypeError(f"Expected bool. Got {type(data)}.") @dataclass(frozen=True) class ZstdCodec(BytesBytesCodec): """zstd codec""" is_fixed_size = True level: int = 0 checksum: bool = False def __init__(self, *, level: int = 0, checksum: bool = False) -> None: # numcodecs 0.13.0 introduces the checksum attribute for the zstd codec _numcodecs_version = Version(numcodecs.__version__) if _numcodecs_version < Version("0.13.0"): raise RuntimeError( "numcodecs version >= 0.13.0 is required to use the zstd codec. " f"Version {_numcodecs_version} is currently installed." ) level_parsed = parse_zstd_level(level) checksum_parsed = parse_checksum(checksum) object.__setattr__(self, "level", level_parsed) object.__setattr__(self, "checksum", checksum_parsed) @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, configuration_parsed = parse_named_configuration(data, "zstd") return cls(**configuration_parsed) # type: ignore[arg-type] def to_dict(self) -> dict[str, JSON]: return {"name": "zstd", "configuration": {"level": self.level, "checksum": self.checksum}} @cached_property def _zstd_codec(self) -> Zstd: config_dict = {"level": self.level, "checksum": self.checksum} return Zstd.from_config(config_dict) async def _decode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> Buffer: return await asyncio.to_thread( as_numpy_array_wrapper, self._zstd_codec.decode, chunk_bytes, chunk_spec.prototype ) async def _encode_single( self, chunk_bytes: Buffer, chunk_spec: ArraySpec, ) -> Buffer | None: return await asyncio.to_thread( as_numpy_array_wrapper, self._zstd_codec.encode, chunk_bytes, chunk_spec.prototype ) def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int: raise NotImplementedError zarr-python-3.1.5/src/zarr/convenience.py000066400000000000000000000014401511007055700204300ustar00rootroot00000000000000""" Convenience helpers. !!! warning "Deprecated" This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. """ import warnings from zarr.api.synchronous import ( consolidate_metadata, copy, copy_all, copy_store, load, open, open_consolidated, save, save_array, save_group, tree, ) from zarr.errors import ZarrDeprecationWarning __all__ = [ "consolidate_metadata", "copy", "copy_all", "copy_store", "load", "open", "open_consolidated", "save", "save_array", "save_group", "tree", ] warnings.warn( "zarr.convenience is deprecated. " "Import these functions from the top level zarr. namespace instead.", ZarrDeprecationWarning, stacklevel=2, ) zarr-python-3.1.5/src/zarr/core/000077500000000000000000000000001511007055700165135ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/core/__init__.py000066400000000000000000000004331511007055700206240ustar00rootroot00000000000000""" The ``zarr.core`` module is considered private API and should not be imported directly by 3rd-party code. """ from __future__ import annotations from zarr.core.buffer import Buffer, NDBuffer # noqa: F401 from zarr.core.codec_pipeline import BatchedCodecPipeline # noqa: F401 zarr-python-3.1.5/src/zarr/core/_info.py000066400000000000000000000116271511007055700201660ustar00rootroot00000000000000from __future__ import annotations import dataclasses import textwrap from typing import TYPE_CHECKING, Literal if TYPE_CHECKING: from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec from zarr.abc.numcodec import Numcodec from zarr.core.common import ZarrFormat from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType @dataclasses.dataclass(kw_only=True) class GroupInfo: """ Visual summary for a Group. Note that this method and its properties is not part of Zarr's public API. """ _name: str _type: Literal["Group"] = "Group" _zarr_format: ZarrFormat _read_only: bool _store_type: str _count_members: int | None = None _count_arrays: int | None = None _count_groups: int | None = None def __repr__(self) -> str: template = textwrap.dedent("""\ Name : {_name} Type : {_type} Zarr format : {_zarr_format} Read-only : {_read_only} Store type : {_store_type}""") if self._count_members is not None: template += "\nNo. members : {_count_members}" if self._count_arrays is not None: template += "\nNo. arrays : {_count_arrays}" if self._count_groups is not None: template += "\nNo. groups : {_count_groups}" return template.format(**dataclasses.asdict(self)) def human_readable_size(size: int) -> str: if size < 2**10: return f"{size}" elif size < 2**20: return f"{size / float(2**10):.1f}K" elif size < 2**30: return f"{size / float(2**20):.1f}M" elif size < 2**40: return f"{size / float(2**30):.1f}G" elif size < 2**50: return f"{size / float(2**40):.1f}T" else: return f"{size / float(2**50):.1f}P" def byte_info(size: int) -> str: if size < 2**10: return str(size) else: return f"{size} ({human_readable_size(size)})" @dataclasses.dataclass(kw_only=True, frozen=True, slots=True) class ArrayInfo: """ Visual summary for an Array. Note that this method and its properties is not part of Zarr's public API. """ _type: Literal["Array"] = "Array" _zarr_format: ZarrFormat _data_type: ZDType[TBaseDType, TBaseScalar] _fill_value: object _shape: tuple[int, ...] _shard_shape: tuple[int, ...] | None = None _chunk_shape: tuple[int, ...] | None = None _order: Literal["C", "F"] _read_only: bool _store_type: str _filters: tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...] = () _serializer: ArrayBytesCodec | None = None _compressors: tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...] = () _count_bytes: int | None = None _count_bytes_stored: int | None = None _count_chunks_initialized: int | None = None def __repr__(self) -> str: template = textwrap.dedent("""\ Type : {_type} Zarr format : {_zarr_format} Data type : {_data_type} Fill value : {_fill_value} Shape : {_shape}""") if self._shard_shape is not None: template += textwrap.dedent(""" Shard shape : {_shard_shape}""") template += textwrap.dedent(""" Chunk shape : {_chunk_shape} Order : {_order} Read-only : {_read_only} Store type : {_store_type}""") # We can't use dataclasses.asdict, because we only want a shallow dict kwargs = {field.name: getattr(self, field.name) for field in dataclasses.fields(self)} if self._chunk_shape is None: # for non-regular chunk grids kwargs["chunk_shape"] = "" template += "\nFilters : {_filters}" if self._serializer is not None: template += "\nSerializer : {_serializer}" template += "\nCompressors : {_compressors}" if self._count_bytes is not None: template += "\nNo. bytes : {_count_bytes}" kwargs["_count_bytes"] = byte_info(self._count_bytes) if self._count_bytes_stored is not None: template += "\nNo. bytes stored : {_count_bytes_stored}" kwargs["_count_bytes_stored"] = byte_info(self._count_bytes_stored) if ( self._count_bytes is not None and self._count_bytes_stored is not None and self._count_bytes_stored > 0 ): template += "\nStorage ratio : {_storage_ratio}" kwargs["_storage_ratio"] = f"{self._count_bytes / self._count_bytes_stored:.1f}" if self._count_chunks_initialized is not None: if self._shard_shape is not None: template += "\nShards Initialized : {_count_chunks_initialized}" else: template += "\nChunks Initialized : {_count_chunks_initialized}" return template.format(**kwargs) zarr-python-3.1.5/src/zarr/core/_tree.py000066400000000000000000000040521511007055700201640ustar00rootroot00000000000000import io import os from collections.abc import Sequence from typing import Any from zarr.core.group import AsyncGroup try: import rich import rich.console import rich.tree except ImportError as e: raise ImportError("'rich' is required for Group.tree") from e class TreeRepr: """ A simple object with a tree-like repr for the Zarr Group. Note that this object and it's implementation isn't considered part of Zarr's public API. """ def __init__(self, tree: rich.tree.Tree) -> None: self._tree = tree def __repr__(self) -> str: color_system = os.environ.get("OVERRIDE_COLOR_SYSTEM", rich.get_console().color_system) console = rich.console.Console(file=io.StringIO(), color_system=color_system) console.print(self._tree) return str(console.file.getvalue()) def _repr_mimebundle_( self, include: Sequence[str], exclude: Sequence[str], **kwargs: Any, ) -> dict[str, str]: # For jupyter support. # Unsure why mypy infers the return type to by Any return self._tree._repr_mimebundle_(include=include, exclude=exclude, **kwargs) # type: ignore[no-any-return] async def group_tree_async(group: AsyncGroup, max_depth: int | None = None) -> TreeRepr: tree = rich.tree.Tree(label=f"[bold]{group.name}[/bold]") nodes = {"": tree} members = sorted([x async for x in group.members(max_depth=max_depth)]) for key, node in members: if key.count("/") == 0: parent_key = "" else: parent_key = key.rsplit("/", 1)[0] parent = nodes[parent_key] # We want what the spec calls the node "name", the part excluding all leading # /'s and path segments. But node.name includes all that, so we build it here. name = key.rsplit("/")[-1] if isinstance(node, AsyncGroup): label = f"[bold]{name}[/bold]" else: label = f"[bold]{name}[/bold] {node.shape} {node.dtype}" nodes[key] = parent.add(label) return TreeRepr(tree) zarr-python-3.1.5/src/zarr/core/array.py000066400000000000000000006177151511007055700202240ustar00rootroot00000000000000from __future__ import annotations import json import warnings from asyncio import gather from collections.abc import Iterable, Mapping from dataclasses import dataclass, field, replace from itertools import starmap from logging import getLogger from typing import ( TYPE_CHECKING, Any, Generic, Literal, TypeAlias, TypedDict, cast, overload, ) from warnings import warn import numpy as np from typing_extensions import deprecated import zarr from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec from zarr.abc.numcodec import Numcodec, _is_numcodec from zarr.codecs._v2 import V2Codec from zarr.codecs.bytes import BytesCodec from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec from zarr.codecs.zstd import ZstdCodec from zarr.core._info import ArrayInfo from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, parse_array_config from zarr.core.attributes import Attributes from zarr.core.buffer import ( BufferPrototype, NDArrayLike, NDArrayLikeOrScalar, NDBuffer, default_buffer_prototype, ) from zarr.core.buffer.cpu import buffer_prototype as cpu_buffer_prototype from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition, normalize_chunks from zarr.core.chunk_key_encodings import ( ChunkKeyEncoding, ChunkKeyEncodingLike, DefaultChunkKeyEncoding, V2ChunkKeyEncoding, parse_chunk_key_encoding, ) from zarr.core.common import ( JSON, ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, DimensionNames, MemoryOrder, ShapeLike, ZarrFormat, _default_zarr_format, _warn_order_kwarg, ceildiv, concurrent_map, parse_shapelike, product, ) from zarr.core.config import config as zarr_config from zarr.core.dtype import ( VariableLengthBytes, VariableLengthUTF8, ZDType, ZDTypeLike, parse_dtype, ) from zarr.core.dtype.common import HasEndianness, HasItemSize, HasObjectCodec from zarr.core.indexing import ( AsyncOIndex, AsyncVIndex, BasicIndexer, BasicSelection, BlockIndex, BlockIndexer, CoordinateIndexer, CoordinateSelection, Fields, Indexer, MaskIndexer, MaskSelection, OIndex, OrthogonalIndexer, OrthogonalSelection, Selection, VIndex, _iter_grid, _iter_regions, check_fields, check_no_multi_fields, is_pure_fancy_indexing, is_pure_orthogonal_indexing, is_scalar, pop_fields, ) from zarr.core.metadata import ( ArrayMetadata, ArrayMetadataDict, ArrayMetadataJSON_V3, ArrayV2Metadata, ArrayV2MetadataDict, ArrayV3Metadata, T_ArrayMetadata, ) from zarr.core.metadata.io import save_metadata from zarr.core.metadata.v2 import ( CompressorLikev2, get_object_codec_id, parse_compressor, parse_filters, ) from zarr.core.metadata.v3 import parse_node_type_array from zarr.core.sync import sync from zarr.errors import ( ArrayNotFoundError, MetadataValidationError, ZarrDeprecationWarning, ZarrUserWarning, ) from zarr.registry import ( _parse_array_array_codec, _parse_array_bytes_codec, _parse_bytes_bytes_codec, get_pipeline_class, ) from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path from zarr.storage._utils import _relativize_path if TYPE_CHECKING: from collections.abc import Iterator, Sequence from typing import Self import numpy.typing as npt from zarr.abc.codec import CodecPipeline from zarr.abc.store import Store from zarr.codecs.sharding import ShardingCodecIndexLocation from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar from zarr.storage import StoreLike from zarr.types import AnyArray, AnyAsyncArray, AsyncArrayV2, AsyncArrayV3 # Array and AsyncArray are defined in the base ``zarr`` namespace __all__ = [ "DEFAULT_FILL_VALUE", "DefaultFillValue", "create_codec_pipeline", "parse_array_metadata", ] logger = getLogger(__name__) class DefaultFillValue: """ Sentinel class to indicate that the default fill value should be used. This class exists because conventional values used to convey "defaultness" like ``None`` or ``"auto"` are ambiguous when specifying the fill value parameter of a Zarr array. The value ``None`` is ambiguous because it is a valid fill value for Zarr V2 (resulting in ``"fill_value": null`` in array metadata). A string like ``"auto"`` is ambiguous because such a string is a valid fill value for an array with a string data type. An instance of this class lies outside the space of valid fill values, which means it can umambiguously express that the default fill value should be used. """ DEFAULT_FILL_VALUE = DefaultFillValue() def parse_array_metadata(data: Any) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data elif isinstance(data, dict): zarr_format = data.get("zarr_format") if zarr_format == 3: meta_out = ArrayV3Metadata.from_dict(data) if len(meta_out.storage_transformers) > 0: msg = ( f"Array metadata contains storage transformers: {meta_out.storage_transformers}." "Arrays with storage transformers are not supported in zarr-python at this time." ) raise ValueError(msg) return meta_out elif zarr_format == 2: return ArrayV2Metadata.from_dict(data) else: raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") raise TypeError # pragma: no cover def create_codec_pipeline(metadata: ArrayMetadata, *, store: Store | None = None) -> CodecPipeline: if store is not None: try: return get_pipeline_class().from_array_metadata_and_store( array_metadata=metadata, store=store ) except NotImplementedError: pass if isinstance(metadata, ArrayV3Metadata): return get_pipeline_class().from_codecs(metadata.codecs) elif isinstance(metadata, ArrayV2Metadata): v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor) return get_pipeline_class().from_codecs([v2_codec]) raise TypeError # pragma: no cover async def get_array_metadata( store_path: StorePath, zarr_format: ZarrFormat | None = 3 ) -> dict[str, JSON]: if zarr_format == 2: zarray_bytes, zattrs_bytes = await gather( (store_path / ZARRAY_JSON).get(prototype=cpu_buffer_prototype), (store_path / ZATTRS_JSON).get(prototype=cpu_buffer_prototype), ) if zarray_bytes is None: msg = ( "A Zarr V2 array metadata document was not found in store " f"{store_path.store!r} at path {store_path.path!r}." ) raise ArrayNotFoundError(msg) elif zarr_format == 3: zarr_json_bytes = await (store_path / ZARR_JSON).get(prototype=cpu_buffer_prototype) if zarr_json_bytes is None: msg = ( "A Zarr V3 array metadata document was not found in store " f"{store_path.store!r} at path {store_path.path!r}." ) raise ArrayNotFoundError(msg) elif zarr_format is None: zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather( (store_path / ZARR_JSON).get(prototype=cpu_buffer_prototype), (store_path / ZARRAY_JSON).get(prototype=cpu_buffer_prototype), (store_path / ZATTRS_JSON).get(prototype=cpu_buffer_prototype), ) if zarr_json_bytes is not None and zarray_bytes is not None: # warn and favor v3 msg = f"Both zarr.json (Zarr format 3) and .zarray (Zarr format 2) metadata objects exist at {store_path}. Zarr v3 will be used." warnings.warn(msg, category=ZarrUserWarning, stacklevel=1) if zarr_json_bytes is None and zarray_bytes is None: msg = ( f"Neither Zarr V3 nor Zarr V2 array metadata documents " f"were found in store {store_path.store!r} at path {store_path.path!r}." ) raise ArrayNotFoundError(msg) # set zarr_format based on which keys were found if zarr_json_bytes is not None: zarr_format = 3 else: zarr_format = 2 else: msg = f"Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '{zarr_format}'." # type: ignore[unreachable] raise MetadataValidationError(msg) metadata_dict: dict[str, JSON] if zarr_format == 2: # V2 arrays are comprised of a .zarray and .zattrs objects assert zarray_bytes is not None metadata_dict = json.loads(zarray_bytes.to_bytes()) zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} metadata_dict["attributes"] = zattrs_dict else: # V3 arrays are comprised of a zarr.json object assert zarr_json_bytes is not None metadata_dict = json.loads(zarr_json_bytes.to_bytes()) parse_node_type_array(metadata_dict.get("node_type")) return metadata_dict @dataclass(frozen=True) class AsyncArray(Generic[T_ArrayMetadata]): """ An asynchronous array class representing a chunked array stored in a Zarr store. Parameters ---------- metadata : ArrayMetadata The metadata of the array. store_path : StorePath The path to the Zarr store. config : ArrayConfigLike, optional The runtime configuration of the array, by default None. Attributes ---------- metadata : ArrayMetadata The metadata of the array. store_path : StorePath The path to the Zarr store. codec_pipeline : CodecPipeline The codec pipeline used for encoding and decoding chunks. _config : ArrayConfig The runtime configuration of the array. """ metadata: T_ArrayMetadata store_path: StorePath codec_pipeline: CodecPipeline = field(init=False) _config: ArrayConfig @overload def __init__( self: AsyncArrayV2, metadata: ArrayV2Metadata | ArrayV2MetadataDict, store_path: StorePath, config: ArrayConfigLike | None = None, ) -> None: ... @overload def __init__( self: AsyncArrayV3, metadata: ArrayV3Metadata | ArrayMetadataJSON_V3, store_path: StorePath, config: ArrayConfigLike | None = None, ) -> None: ... def __init__( self, metadata: ArrayMetadata | ArrayMetadataDict, store_path: StorePath, config: ArrayConfigLike | None = None, ) -> None: metadata_parsed = parse_array_metadata(metadata) config_parsed = parse_array_config(config) object.__setattr__(self, "metadata", metadata_parsed) object.__setattr__(self, "store_path", store_path) object.__setattr__(self, "_config", config_parsed) object.__setattr__( self, "codec_pipeline", create_codec_pipeline(metadata=metadata_parsed, store=store_path.store), ) # this overload defines the function signature when zarr_format is 2 @overload @classmethod async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: ZDTypeLike, zarr_format: Literal[2], fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: CompressorLikev2 | Literal["auto"] = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfigLike | None = None, ) -> AsyncArrayV2: ... # this overload defines the function signature when zarr_format is 3 @overload @classmethod async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: ZDTypeLike, zarr_format: Literal[3], fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfigLike | None = None, ) -> AsyncArrayV3: ... @overload @classmethod async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: ZDTypeLike, zarr_format: Literal[3] = 3, fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfigLike | None = None, ) -> AsyncArrayV3: ... @overload @classmethod async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: ZDTypeLike, zarr_format: ZarrFormat, fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: CompressorLike = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfigLike | None = None, ) -> AnyAsyncArray: ... @classmethod @deprecated("Use zarr.api.asynchronous.create_array instead.", category=ZarrDeprecationWarning) async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: ZDTypeLike, zarr_format: ZarrFormat = 3, fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncodingLike | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: CompressorLike = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfigLike | None = None, ) -> AnyAsyncArray: """Method to create a new asynchronous array instance. !!! warning "Deprecated" `AsyncArray.create()` is deprecated since v3.0.0 and will be removed in a future release. Use [`zarr.api.asynchronous.create_array`][] instead. Parameters ---------- store : StoreLike The store where the array will be created. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. shape : ShapeLike The shape of the array. dtype : ZDTypeLike The data type of the array. zarr_format : ZarrFormat, optional The Zarr format version (default is 3). fill_value : Any, optional The fill value of the array (default is None). attributes : dict[str, JSON], optional The attributes of the array (default is None). chunk_shape : tuple[int, ...], optional The shape of the array's chunks Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. If not specified, default are guessed based on the shape and dtype. chunk_key_encoding : ChunkKeyEncodingLike, optional A specification of how the chunk keys are represented in storage. Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. Default is ``("default", "/")``. codecs : Sequence of Codecs or dicts, optional An iterable of Codec or dict serializations of Codecs. The elements of this collection specify the transformation from array values to stored bytes. Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. If no codecs are provided, default codecs will be used: dimension_names : Iterable[str | None], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. chunks : ShapeLike, optional The shape of the array's chunks. Zarr format 2 only. Zarr format 3 arrays should use ``chunk_shape`` instead. If not specified, default are guessed based on the shape and dtype. dimension_separator : Literal[".", "/"], optional The dimension separator (default is "."). Zarr format 2 only. Zarr format 3 arrays should use ``chunk_key_encoding`` instead. order : Literal["C", "F"], optional The memory of the array (default is "C"). If ``zarr_format`` is 2, this parameter sets the memory order of the array. If ``zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressor : dict[str, JSON], optional The compressor used to compress the data (default is None). Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``compressor`` is provided, a default compressor will be used: - For numeric arrays, the default is ``ZstdCodec``. - For Unicode strings, the default is ``VLenUTF8Codec``. - For bytes or objects, the default is ``VLenBytesCodec``. These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in [`zarr.config`][zarr.config]. overwrite : bool, optional Whether to raise an error if the store already exists (default is False). data : npt.ArrayLike, optional The data to be inserted into the array (default is None). config : ArrayConfigLike, optional Runtime configuration for the array. Returns ------- AsyncArray The created asynchronous array instance. """ return await cls._create( store, # v2 and v3 shape=shape, dtype=dtype, zarr_format=zarr_format, fill_value=fill_value, attributes=attributes, # v3 only chunk_shape=chunk_shape, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, # v2 only chunks=chunks, dimension_separator=dimension_separator, order=order, filters=filters, compressor=compressor, # runtime overwrite=overwrite, data=data, config=config, ) @classmethod async def _create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: ZDTypeLike | ZDType[TBaseDType, TBaseScalar], zarr_format: ZarrFormat = 3, fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncodingLike | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: Iterable[dict[str, JSON] | Numcodec] | None = None, compressor: CompressorLike = "auto", # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfigLike | None = None, ) -> AnyAsyncArray: """Method to create a new asynchronous array instance. Deprecated in favor of [`zarr.api.asynchronous.create_array`][]. """ dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format) store_path = await make_store_path(store) shape = parse_shapelike(shape) if chunks is not None and chunk_shape is not None: raise ValueError("Only one of chunk_shape or chunks can be provided.") item_size = 1 if isinstance(dtype_parsed, HasItemSize): item_size = dtype_parsed.item_size if chunks: _chunks = normalize_chunks(chunks, shape, item_size) else: _chunks = normalize_chunks(chunk_shape, shape, item_size) config_parsed = parse_array_config(config) result: AnyAsyncArray if zarr_format == 3: if dimension_separator is not None: raise ValueError( "dimension_separator cannot be used for arrays with zarr_format 3. Use chunk_key_encoding instead." ) if filters is not None: raise ValueError( "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead." ) if compressor != "auto": raise ValueError( "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead." ) if order is not None: _warn_order_kwarg() result = await cls._create_v3( store_path, shape=shape, dtype=dtype_parsed, chunk_shape=_chunks, fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, attributes=attributes, overwrite=overwrite, config=config_parsed, ) elif zarr_format == 2: if codecs is not None: raise ValueError( "codecs cannot be used for arrays with zarr_format 2. Use filters and compressor instead." ) if chunk_key_encoding is not None: raise ValueError( "chunk_key_encoding cannot be used for arrays with zarr_format 2. Use dimension_separator instead." ) if dimension_names is not None: raise ValueError("dimension_names cannot be used for arrays with zarr_format 2.") if order is None: order_parsed = config_parsed.order else: order_parsed = order config_parsed = replace(config_parsed, order=order) result = await cls._create_v2( store_path, shape=shape, dtype=dtype_parsed, chunks=_chunks, dimension_separator=dimension_separator, fill_value=fill_value, order=order_parsed, config=config_parsed, filters=filters, compressor=compressor, attributes=attributes, overwrite=overwrite, ) else: raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover if data is not None: # insert user-provided data await result.setitem(..., data) return result @staticmethod def _create_metadata_v3( shape: ShapeLike, dtype: ZDType[TBaseDType, TBaseScalar], chunk_shape: tuple[int, ...], fill_value: Any | None = DEFAULT_FILL_VALUE, chunk_key_encoding: ChunkKeyEncodingLike | None = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, attributes: dict[str, JSON] | None = None, ) -> ArrayV3Metadata: """ Create an instance of ArrayV3Metadata. """ filters: tuple[ArrayArrayCodec, ...] compressors: tuple[BytesBytesCodec, ...] shape = parse_shapelike(shape) if codecs is None: filters = default_filters_v3(dtype) serializer = default_serializer_v3(dtype) compressors = default_compressors_v3(dtype) codecs_parsed = (*filters, serializer, *compressors) else: codecs_parsed = tuple(codecs) chunk_key_encoding_parsed: ChunkKeyEncodingLike if chunk_key_encoding is None: chunk_key_encoding_parsed = {"name": "default", "separator": "/"} else: chunk_key_encoding_parsed = chunk_key_encoding if isinstance(fill_value, DefaultFillValue) or fill_value is None: # Use dtype's default scalar for DefaultFillValue sentinel # For v3, None is converted to DefaultFillValue behavior fill_value_parsed = dtype.default_scalar() else: fill_value_parsed = fill_value chunk_grid_parsed = RegularChunkGrid(chunk_shape=chunk_shape) return ArrayV3Metadata( shape=shape, data_type=dtype, chunk_grid=chunk_grid_parsed, chunk_key_encoding=chunk_key_encoding_parsed, fill_value=fill_value_parsed, codecs=codecs_parsed, # type: ignore[arg-type] dimension_names=tuple(dimension_names) if dimension_names else None, attributes=attributes or {}, ) @classmethod async def _create_v3( cls, store_path: StorePath, *, shape: ShapeLike, dtype: ZDType[TBaseDType, TBaseScalar], chunk_shape: tuple[int, ...], config: ArrayConfig, fill_value: Any | None = DEFAULT_FILL_VALUE, chunk_key_encoding: ( ChunkKeyEncodingLike | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, attributes: dict[str, JSON] | None = None, overwrite: bool = False, ) -> AsyncArrayV3: if overwrite: if store_path.store.supports_deletes: await store_path.delete_dir() else: await ensure_no_existing_node(store_path, zarr_format=3) else: await ensure_no_existing_node(store_path, zarr_format=3) if isinstance(chunk_key_encoding, tuple): chunk_key_encoding = ( V2ChunkKeyEncoding(separator=chunk_key_encoding[1]) if chunk_key_encoding[0] == "v2" else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) ) metadata = cls._create_metadata_v3( shape=shape, dtype=dtype, chunk_shape=chunk_shape, fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, attributes=attributes, ) array = cls(metadata=metadata, store_path=store_path, config=config) await array._save_metadata(metadata, ensure_parents=True) return array @staticmethod def _create_metadata_v2( shape: tuple[int, ...], dtype: ZDType[TBaseDType, TBaseScalar], chunks: tuple[int, ...], order: MemoryOrder, dimension_separator: Literal[".", "/"] | None = None, fill_value: Any | None = DEFAULT_FILL_VALUE, filters: Iterable[dict[str, JSON] | Numcodec] | None = None, compressor: CompressorLikev2 = None, attributes: dict[str, JSON] | None = None, ) -> ArrayV2Metadata: if dimension_separator is None: dimension_separator = "." # Handle DefaultFillValue sentinel if isinstance(fill_value, DefaultFillValue): fill_value_parsed: Any = dtype.default_scalar() else: # For v2, preserve None as-is (backward compatibility) fill_value_parsed = fill_value return ArrayV2Metadata( shape=shape, dtype=dtype, chunks=chunks, order=order, dimension_separator=dimension_separator, fill_value=fill_value_parsed, compressor=compressor, filters=filters, attributes=attributes, ) @classmethod async def _create_v2( cls, store_path: StorePath, *, shape: tuple[int, ...], dtype: ZDType[TBaseDType, TBaseScalar], chunks: tuple[int, ...], order: MemoryOrder, config: ArrayConfig, dimension_separator: Literal[".", "/"] | None = None, fill_value: Any | None = DEFAULT_FILL_VALUE, filters: Iterable[dict[str, JSON] | Numcodec] | None = None, compressor: CompressorLike = "auto", attributes: dict[str, JSON] | None = None, overwrite: bool = False, ) -> AsyncArrayV2: if overwrite: if store_path.store.supports_deletes: await store_path.delete_dir() else: await ensure_no_existing_node(store_path, zarr_format=2) else: await ensure_no_existing_node(store_path, zarr_format=2) compressor_parsed: CompressorLikev2 if compressor == "auto": compressor_parsed = default_compressor_v2(dtype) elif isinstance(compressor, BytesBytesCodec): raise ValueError( "Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. " "Use a numcodecs codec directly instead." ) else: compressor_parsed = compressor if filters is None: filters = default_filters_v2(dtype) metadata = cls._create_metadata_v2( shape=shape, dtype=dtype, chunks=chunks, order=order, dimension_separator=dimension_separator, fill_value=fill_value, filters=filters, compressor=compressor_parsed, attributes=attributes, ) array = cls(metadata=metadata, store_path=store_path, config=config) await array._save_metadata(metadata, ensure_parents=True) return array @classmethod def from_dict( cls, store_path: StorePath, data: dict[str, JSON], ) -> AnyAsyncArray: """ Create a Zarr array from a dictionary, with support for both Zarr format 2 and 3 metadata. Parameters ---------- store_path : StorePath The path within the store where the array should be created. data : dict A dictionary representing the array data. This dictionary should include necessary metadata for the array, such as shape, dtype, and other attributes. The format of the metadata will determine whether a Zarr format 2 or 3 array is created. Returns ------- AsyncArrayV3 or AsyncArrayV2 The created Zarr array, either using Zarr format 2 or 3 metadata based on the provided data. Raises ------ ValueError If the dictionary data is invalid or incompatible with either Zarr format 2 or 3 array creation. """ metadata = parse_array_metadata(data) return cls(metadata=metadata, store_path=store_path) @classmethod async def open( cls, store: StoreLike, zarr_format: ZarrFormat | None = 3, ) -> AnyAsyncArray: """ Async method to open an existing Zarr array from a given store. Parameters ---------- store : StoreLike The store containing the Zarr array. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. zarr_format : ZarrFormat | None, optional The Zarr format version (default is 3). Returns ------- AsyncArray The opened Zarr array. Examples -------- ```python import asyncio import zarr from zarr.core.array import AsyncArray async def example(): store = zarr.storage.MemoryStore() # First create an array to open await zarr.api.asynchronous.create_array( store=store, shape=(100, 100), dtype="int32" ) # Now open it async_arr = await AsyncArray.open(store) return async_arr async_arr = asyncio.run(example()) # ``` """ store_path = await make_store_path(store) metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) # TODO: remove this cast when we have better type hints _metadata_dict = cast("ArrayMetadataJSON_V3", metadata_dict) return cls(store_path=store_path, metadata=_metadata_dict) @property def store(self) -> Store: return self.store_path.store @property def ndim(self) -> int: """Returns the number of dimensions in the Array. Returns ------- int The number of dimensions in the Array. """ return len(self.metadata.shape) @property def shape(self) -> tuple[int, ...]: """Returns the shape of the Array. Returns ------- tuple The shape of the Array. """ return self.metadata.shape @property def chunks(self) -> tuple[int, ...]: """Returns the chunk shape of the Array. If sharding is used the inner chunk shape is returned. Only defined for arrays using using `RegularChunkGrid`. If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. Returns ------- tuple[int, ...]: The chunk shape of the Array. """ return self.metadata.chunks @property def shards(self) -> tuple[int, ...] | None: """Returns the shard shape of the Array. Returns None if sharding is not used. Only defined for arrays using using `RegularChunkGrid`. If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. Returns ------- tuple[int, ...]: The shard shape of the Array. """ return self.metadata.shards @property def size(self) -> int: """Returns the total number of elements in the array Returns ------- int Total number of elements in the array """ return np.prod(self.metadata.shape).item() @property def filters(self) -> tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...]: """ Filters that are applied to each chunk of the array, in order, before serializing that chunk to bytes. """ if self.metadata.zarr_format == 2: filters = self.metadata.filters if filters is None: return () return filters return tuple( codec for codec in self.metadata.inner_codecs if isinstance(codec, ArrayArrayCodec) ) @property def serializer(self) -> ArrayBytesCodec | None: """ Array-to-bytes codec to use for serializing the chunks into bytes. """ if self.metadata.zarr_format == 2: return None return next( codec for codec in self.metadata.inner_codecs if isinstance(codec, ArrayBytesCodec) ) @property @deprecated("Use AsyncArray.compressors instead.", category=ZarrDeprecationWarning) def compressor(self) -> Numcodec | None: """ Compressor that is applied to each chunk of the array. !!! warning "Deprecated" `Array.compressor` is deprecated since v3.0.0 and will be removed in a future release. Use [`Array.compressors`][zarr.AsyncArray.compressors] instead. """ if self.metadata.zarr_format == 2: return self.metadata.compressor raise TypeError("`compressor` is not available for Zarr format 3 arrays.") @property def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]: """ Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. """ if self.metadata.zarr_format == 2: if self.metadata.compressor is not None: return (self.metadata.compressor,) return () return tuple( codec for codec in self.metadata.inner_codecs if isinstance(codec, BytesBytesCodec) ) @property def _zdtype(self) -> ZDType[TBaseDType, TBaseScalar]: """ The zarr-specific representation of the array data type """ if self.metadata.zarr_format == 2: return self.metadata.dtype else: return self.metadata.data_type @property def dtype(self) -> TBaseDType: """Returns the data type of the array. Returns ------- np.dtype Data type of the array """ return self._zdtype.to_native_dtype() @property def order(self) -> MemoryOrder: """Returns the memory order of the array. Returns ------- bool Memory order of the array """ if self.metadata.zarr_format == 2: return self.metadata.order else: return self._config.order @property def attrs(self) -> dict[str, JSON]: """Returns the attributes of the array. Returns ------- dict Attributes of the array """ return self.metadata.attributes @property def read_only(self) -> bool: """Returns True if the array is read-only. Returns ------- bool True if the array is read-only """ # Backwards compatibility for 2.x return self.store_path.read_only @property def path(self) -> str: """Storage path. Returns ------- str The path to the array in the Zarr store. """ return self.store_path.path @property def name(self) -> str: """Array name following h5py convention. Returns ------- str The name of the array. """ # follow h5py convention: add leading slash name = self.path if not name.startswith("/"): name = "/" + name return name @property def basename(self) -> str: """Final component of name. Returns ------- str The basename or final component of the array name. """ return self.name.split("/")[-1] @property def cdata_shape(self) -> tuple[int, ...]: """ The shape of the chunk grid for this array. Returns ------- tuple[int, ...] The shape of the chunk grid for this array. """ return self._chunk_grid_shape @property def _chunk_grid_shape(self) -> tuple[int, ...]: """ The shape of the chunk grid for this array. Returns ------- tuple[int, ...] The shape of the chunk grid for this array. """ return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=True))) @property def _shard_grid_shape(self) -> tuple[int, ...]: """ The shape of the shard grid for this array. Returns ------- tuple[int, ...] The shape of the shard grid for this array. """ if self.shards is None: shard_shape = self.chunks else: shard_shape = self.shards return tuple(starmap(ceildiv, zip(self.shape, shard_shape, strict=True))) @property def nchunks(self) -> int: """ The number of chunks in this array. Note that if a sharding codec is used, then the number of chunks may exceed the number of stored objects supporting this array. Returns ------- int The total number of chunks in the array. """ return product(self._chunk_grid_shape) @property def _nshards(self) -> int: """ The number of shards in this array. Returns ------- int The total number of shards in the array. """ return product(self._shard_grid_shape) async def nchunks_initialized(self) -> int: """ Calculate the number of chunks that have been initialized in storage. This value is calculated as the product of the number of initialized shards and the number of chunks per shard. For arrays that do not use sharding, the number of chunks per shard is effectively 1, and in that case the number of chunks initialized is the same as the number of stored objects associated with an array. Returns ------- nchunks_initialized : int The number of chunks that have been initialized. Notes ----- On [`AsyncArray`][zarr.AsyncArray] this is an asynchronous method, unlike the (synchronous) property [`Array.nchunks_initialized`][zarr.Array.nchunks_initialized]. Examples -------- ```python import asyncio import zarr.api.asynchronous async def example(): arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(1,)) count = await arr.nchunks_initialized() print(f"Initial: {count}") #> Initial: 0 await arr.setitem(slice(5), 1) count = await arr.nchunks_initialized() print(f"After write: {count}") #> After write: 5 return count result = asyncio.run(example()) ``` """ if self.shards is None: chunks_per_shard = 1 else: chunks_per_shard = product( tuple(a // b for a, b in zip(self.shards, self.chunks, strict=True)) ) return (await self._nshards_initialized()) * chunks_per_shard async def _nshards_initialized(self) -> int: """ Calculate the number of shards that have been initialized in storage. This is the number of shards that have been persisted to the storage backend. Returns ------- nshards_initialized : int The number of shards that have been initialized. Notes ----- On [`AsyncArray`][zarr.AsyncArray] this is an asynchronous method, unlike the (synchronous) property [`Array._nshards_initialized`][zarr.Array._nshards_initialized]. Examples -------- ```python import asyncio import zarr.api.asynchronous async def example(): arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(2,)) count = await arr._nshards_initialized() print(f"Initial: {count}") #> Initial: 0 await arr.setitem(slice(5), 1) count = await arr._nshards_initialized() print(f"After write: {count}") #> After write: 3 return count result = asyncio.run(example()) ``` """ return len(await _shards_initialized(self)) async def nbytes_stored(self) -> int: return await self.store_path.store.getsize_prefix(self.store_path.path) def _iter_chunk_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[int, ...]]: """ Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. The default behavior is to start at the origin of the grid coordinate space. If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as per python indexing conventions. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ chunk_coords: tuple[int, ...] The coordinates of each chunk in the selection. """ return _iter_chunk_coords( array=self, origin=origin, selection_shape=selection_shape, ) def _iter_shard_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[int, ...]]: """ Create an iterator over the coordinates of shards in shard grid space. Note that If the `origin` keyword is used, iteration will start at the shard index specified by `origin`. The default behavior is to start at the origin of the grid coordinate space. If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as per python indexing conventions. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's shard grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in shard grid coordinates. Yields ------ chunk_coords: tuple[int, ...] The coordinates of each shard in the selection. """ return _iter_shard_coords( array=self, origin=origin, selection_shape=selection_shape, ) def _iter_shard_keys( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: """ Iterate over the keys of the stored objects supporting this array. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in shard grid coordinates. Yields ------ key: str The storage key of each chunk in the selection. """ # Iterate over the coordinates of chunks in chunk grid space. return _iter_shard_keys( array=self, origin=origin, selection_shape=selection_shape, ) def _iter_chunk_regions( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[slice, ...]]: """ Iterate over the regions spanned by each chunk. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ return _iter_chunk_regions( array=self, origin=origin, selection_shape=selection_shape, ) def _iter_shard_regions( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[slice, ...]]: """ Iterate over the regions spanned by each shard. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's shard grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in shard grid coordinates. Yields ------ region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each shard in the selection. """ return _iter_shard_regions(array=self, origin=origin, selection_shape=selection_shape) @property def nbytes(self) -> int: """ The total number of bytes that can be stored in the chunks of this array. Notes ----- This value is calculated by multiplying the number of elements in the array and the size of each element, the latter of which is determined by the dtype of the array. For this reason, ``nbytes`` will likely be inaccurate for arrays with variable-length dtypes. It is not possible to determine the size of an array with variable-length elements from the shape and dtype alone. """ return self.size * self.dtype.itemsize async def _get_selection( self, indexer: Indexer, *, prototype: BufferPrototype, out: NDBuffer | None = None, fields: Fields | None = None, ) -> NDArrayLikeOrScalar: # check fields are sensible out_dtype = check_fields(fields, self.dtype) # setup output buffer if out is not None: if isinstance(out, NDBuffer): out_buffer = out else: raise TypeError(f"out argument needs to be an NDBuffer. Got {type(out)!r}") if out_buffer.shape != indexer.shape: raise ValueError( f"shape of out argument doesn't match. Expected {indexer.shape}, got {out.shape}" ) else: out_buffer = prototype.nd_buffer.empty( shape=indexer.shape, dtype=out_dtype, order=self.order, ) if product(indexer.shape) > 0: # need to use the order from the metadata for v2 _config = self._config if self.metadata.zarr_format == 2: _config = replace(_config, order=self.order) # reading chunks and decoding them await self.codec_pipeline.read( [ ( self.store_path / self.metadata.encode_chunk_key(chunk_coords), self.metadata.get_chunk_spec(chunk_coords, _config, prototype=prototype), chunk_selection, out_selection, is_complete_chunk, ) for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer ], out_buffer, drop_axes=indexer.drop_axes, ) if isinstance(indexer, BasicIndexer) and indexer.shape == (): return out_buffer.as_scalar() return out_buffer.as_ndarray_like() async def getitem( self, selection: BasicSelection, *, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: """ Asynchronous function that retrieves a subset of the array's data based on the provided selection. Parameters ---------- selection : BasicSelection A selection object specifying the subset of data to retrieve. prototype : BufferPrototype, optional A buffer prototype to use for the retrieved data (default is None). Returns ------- NDArrayLikeOrScalar The retrieved subset of the array's data. Examples -------- ```python import asyncio import zarr.api.asynchronous async def example(): store = zarr.storage.MemoryStore() async_arr = await zarr.api.asynchronous.create_array( store=store, shape=(100,100), chunks=(10,10), dtype='i4', fill_value=0) result = await async_arr.getitem((0,1)) print(result) #> 0 return result value = asyncio.run(example()) ``` """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer( selection, shape=self.metadata.shape, chunk_grid=self.metadata.chunk_grid, ) return await self._get_selection(indexer, prototype=prototype) async def get_orthogonal_selection( self, selection: OrthogonalSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return await self._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) async def get_mask_selection( self, mask: MaskSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) return await self._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) async def get_coordinate_selection( self, selection: CoordinateSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: if prototype is None: prototype = default_buffer_prototype() indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) out_array = await self._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) if hasattr(out_array, "shape"): # restore shape out_array = np.array(out_array).reshape(indexer.sel_shape) return out_array async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = False) -> None: """ Asynchronously save the array metadata. """ await save_metadata(self.store_path, metadata, ensure_parents=ensure_parents) async def _set_selection( self, indexer: Indexer, value: npt.ArrayLike, *, prototype: BufferPrototype, fields: Fields | None = None, ) -> None: # check fields are sensible check_fields(fields, self.dtype) fields = check_no_multi_fields(fields) # check value shape if np.isscalar(value): array_like = prototype.buffer.create_zero_length().as_array_like() if isinstance(array_like, np._typing._SupportsArrayFunc): # TODO: need to handle array types that don't support __array_function__ # like PyTorch and JAX array_like_ = cast("np._typing._SupportsArrayFunc", array_like) value = np.asanyarray(value, dtype=self.dtype, like=array_like_) else: if not hasattr(value, "shape"): value = np.asarray(value, self.dtype) # assert ( # value.shape == indexer.shape # ), f"shape of value doesn't match indexer shape. Expected {indexer.shape}, got {value.shape}" if not hasattr(value, "dtype") or value.dtype.name != self.dtype.name: if hasattr(value, "astype"): # Handle things that are already NDArrayLike more efficiently value = value.astype(dtype=self.dtype, order="A") else: value = np.array(value, dtype=self.dtype, order="A") value = cast("NDArrayLike", value) # We accept any ndarray like object from the user and convert it # to a NDBuffer (or subclass). From this point onwards, we only pass # Buffer and NDBuffer between components. value_buffer = prototype.nd_buffer.from_ndarray_like(value) # need to use the order from the metadata for v2 _config = self._config if self.metadata.zarr_format == 2: _config = replace(_config, order=self.metadata.order) # merging with existing data and encoding chunks await self.codec_pipeline.write( [ ( self.store_path / self.metadata.encode_chunk_key(chunk_coords), self.metadata.get_chunk_spec(chunk_coords, _config, prototype), chunk_selection, out_selection, is_complete_chunk, ) for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer ], value_buffer, drop_axes=indexer.drop_axes, ) async def setitem( self, selection: BasicSelection, value: npt.ArrayLike, prototype: BufferPrototype | None = None, ) -> None: """ Asynchronously set values in the array using basic indexing. Parameters ---------- selection : BasicSelection The selection defining the region of the array to set. value : numpy.typing.ArrayLike The values to be written into the selected region of the array. prototype : BufferPrototype or None, optional A prototype buffer that defines the structure and properties of the array chunks being modified. If None, the default buffer prototype is used. Default is None. Returns ------- None This method does not return any value. Raises ------ IndexError If the selection is out of bounds for the array. ValueError If the values are not compatible with the array's dtype or shape. Notes ----- - This method is asynchronous and should be awaited. - Supports basic indexing, where the selection is contiguous and does not involve advanced indexing. """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer( selection, shape=self.metadata.shape, chunk_grid=self.metadata.chunk_grid, ) return await self._set_selection(indexer, value, prototype=prototype) @property def oindex(self) -> AsyncOIndex[T_ArrayMetadata]: """Shortcut for orthogonal (outer) indexing, see [get_orthogonal_selection][zarr.Array.get_orthogonal_selection] and [set_orthogonal_selection][zarr.Array.set_orthogonal_selection] for documentation and examples.""" return AsyncOIndex(self) @property def vindex(self) -> AsyncVIndex[T_ArrayMetadata]: """Shortcut for vectorized (inner) indexing, see [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_mask_selection][zarr.Array.get_mask_selection] and [set_mask_selection][zarr.Array.set_mask_selection] for documentation and examples.""" return AsyncVIndex(self) async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: """ Asynchronously resize the array to a new shape. Parameters ---------- new_shape : tuple[int, ...] The desired new shape of the array. delete_outside_chunks : bool, optional If True (default), chunks that fall outside the new shape will be deleted. If False, the data in those chunks will be preserved. Returns ------- AsyncArray The resized array. Raises ------ ValueError If the new shape is incompatible with the current array's chunking configuration. Notes ----- - This method is asynchronous and should be awaited. """ new_shape = parse_shapelike(new_shape) assert len(new_shape) == len(self.metadata.shape) new_metadata = self.metadata.update_shape(new_shape) if delete_outside_chunks: # Remove all chunks outside of the new shape old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape)) new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape)) async def _delete_key(key: str) -> None: await (self.store_path / key).delete() await concurrent_map( [ (self.metadata.encode_chunk_key(chunk_coords),) for chunk_coords in old_chunk_coords.difference(new_chunk_coords) ], _delete_key, zarr_config.get("async.concurrency"), ) # Write new metadata await self._save_metadata(new_metadata) # Update metadata (in place) object.__setattr__(self, "metadata", new_metadata) async def append(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: """Append `data` to `axis`. Parameters ---------- data : array-like Data to be appended. axis : int Axis along which to append. Returns ------- new_shape : tuple Notes ----- The size of all dimensions other than `axis` must match between this array and `data`. """ # ensure data is array-like if not hasattr(data, "shape"): data = np.asanyarray(data) self_shape_preserved = tuple(s for i, s in enumerate(self.shape) if i != axis) data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis) if self_shape_preserved != data_shape_preserved: raise ValueError( f"shape of data to append is not compatible with the array. " f"The shape of the data is ({data_shape_preserved})" f"and the shape of the array is ({self_shape_preserved})." "All dimensions must match except for the dimension being " "appended." ) # remember old shape old_shape = self.shape # determine new shape new_shape = tuple( self.shape[i] if i != axis else self.shape[i] + data.shape[i] for i in range(len(self.shape)) ) # resize await self.resize(new_shape) # store data append_selection = tuple( slice(None) if i != axis else slice(old_shape[i], new_shape[i]) for i in range(len(self.shape)) ) await self.setitem(append_selection, data) return new_shape async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: """ Asynchronously update the array's attributes. Parameters ---------- new_attributes : dict of str to JSON A dictionary of new attributes to update or add to the array. The keys represent attribute names, and the values must be JSON-compatible. Returns ------- AsyncArray The array with the updated attributes. Raises ------ ValueError If the attributes are invalid or incompatible with the array's metadata. Notes ----- - This method is asynchronous and should be awaited. - The updated attributes will be merged with existing attributes, and any conflicts will be overwritten by the new values. """ self.metadata.attributes.update(new_attributes) # Write new metadata await self._save_metadata(self.metadata) return self def __repr__(self) -> str: return f"" @property def info(self) -> Any: """ Return the statically known information for an array. Returns ------- ArrayInfo Related ------- [zarr.AsyncArray.info_complete][] - All information about a group, including dynamic information like the number of bytes and chunks written. Examples -------- >>> arr = await zarr.api.asynchronous.create( ... path="array", shape=(3, 4, 5), chunks=(2, 2, 2)) ... ) >>> arr.info Type : Array Zarr format : 3 Data type : DataType.float64 Shape : (3, 4, 5) Chunk shape : (2, 2, 2) Order : C Read-only : False Store type : MemoryStore Codecs : [{'endian': }] No. bytes : 480 """ return self._info() async def info_complete(self) -> Any: """ Return all the information for an array, including dynamic information like a storage size. In addition to the static information, this provides - The count of chunks initialized - The sum of the bytes written Returns ------- ArrayInfo Related ------- [zarr.AsyncArray.info][] - A property giving just the statically known information about an array. """ return self._info( await self._nshards_initialized(), await self.store_path.store.getsize_prefix(self.store_path.path), ) def _info( self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None ) -> Any: return ArrayInfo( _zarr_format=self.metadata.zarr_format, _data_type=self._zdtype, _fill_value=self.metadata.fill_value, _shape=self.shape, _order=self.order, _shard_shape=self.shards, _chunk_shape=self.chunks, _read_only=self.read_only, _compressors=self.compressors, _filters=self.filters, _serializer=self.serializer, _store_type=type(self.store_path.store).__name__, _count_bytes=self.nbytes, _count_bytes_stored=count_bytes_stored, _count_chunks_initialized=count_chunks_initialized, ) # TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed @dataclass(frozen=False) class Array(Generic[T_ArrayMetadata]): """ A Zarr array. """ _async_array: AsyncArray[T_ArrayMetadata] @property def async_array(self) -> AsyncArray[T_ArrayMetadata]: """An asynchronous version of the current array. Useful for batching requests. Returns ------- An asynchronous array whose metadata + store matches that of this synchronous array. """ return self._async_array @classmethod @deprecated("Use zarr.create_array instead.", category=ZarrDeprecationWarning) def create( cls, store: StoreLike, *, # v2 and v3 shape: tuple[int, ...], dtype: ZDTypeLike, zarr_format: ZarrFormat = 3, fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: tuple[int, ...] | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, # v2 only chunks: tuple[int, ...] | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: CompressorLike = "auto", # runtime overwrite: bool = False, config: ArrayConfigLike | None = None, ) -> AnyArray: """Creates a new Array instance from an initialized store. !!! warning "Deprecated" `Array.create()` is deprecated since v3.0.0 and will be removed in a future release. Use [`zarr.create_array`][] instead. Parameters ---------- store : StoreLike The array store that has already been initialized. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. shape : tuple[int, ...] The shape of the array. dtype : ZDTypeLike The data type of the array. chunk_shape : tuple[int, ...], optional The shape of the Array's chunks. Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. If not specified, default are guessed based on the shape and dtype. chunk_key_encoding : ChunkKeyEncodingLike, optional A specification of how the chunk keys are represented in storage. Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. Default is ``("default", "/")``. codecs : Sequence of Codecs or dicts, optional An iterable of Codec or dict serializations of Codecs. The elements of this collection specify the transformation from array values to stored bytes. Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. If no codecs are provided, default codecs will be used: - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``. - For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``. - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``. dimension_names : Iterable[str | None], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. chunks : tuple[int, ...], optional The shape of the array's chunks. Zarr format 2 only. Zarr format 3 arrays should use ``chunk_shape`` instead. If not specified, default are guessed based on the shape and dtype. dimension_separator : Literal[".", "/"], optional The dimension separator (default is "."). Zarr format 2 only. Zarr format 3 arrays should use ``chunk_key_encoding`` instead. order : Literal["C", "F"], optional The memory of the array (default is "C"). If ``zarr_format`` is 2, this parameter sets the memory order of the array. If ``zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'order': 'C'}``. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressor : dict[str, JSON], optional Primary compressor to compress chunk data. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``compressor`` is provided, a default compressor will be used: - For numeric arrays, the default is ``ZstdCodec``. - For Unicode strings, the default is ``VLenUTF8Codec``. - For bytes or objects, the default is ``VLenBytesCodec``. These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in [`zarr.config`][zarr.config]. overwrite : bool, optional Whether to raise an error if the store already exists (default is False). Returns ------- Array Array created from the store. """ return cls._create( store, # v2 and v3 shape=shape, dtype=dtype, zarr_format=zarr_format, attributes=attributes, fill_value=fill_value, # v3 only chunk_shape=chunk_shape, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, # v2 only chunks=chunks, dimension_separator=dimension_separator, order=order, filters=filters, compressor=compressor, # runtime overwrite=overwrite, config=config, ) @classmethod def _create( cls, store: StoreLike, *, # v2 and v3 shape: tuple[int, ...], dtype: ZDTypeLike, zarr_format: ZarrFormat = 3, fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: tuple[int, ...] | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, # v2 only chunks: tuple[int, ...] | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: CompressorLike = "auto", # runtime overwrite: bool = False, config: ArrayConfigLike | None = None, ) -> Self: """Creates a new Array instance from an initialized store. Deprecated in favor of [`zarr.create_array`][]. """ async_array = sync( AsyncArray._create( store=store, shape=shape, dtype=dtype, zarr_format=zarr_format, attributes=attributes, fill_value=fill_value, chunk_shape=chunk_shape, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, chunks=chunks, dimension_separator=dimension_separator, order=order, filters=filters, compressor=compressor, overwrite=overwrite, config=config, ), ) return cls(async_array) @classmethod def from_dict( cls, store_path: StorePath, data: dict[str, JSON], ) -> Self: """ Create a Zarr array from a dictionary. Parameters ---------- store_path : StorePath The path within the store where the array should be created. data : dict A dictionary representing the array data. This dictionary should include necessary metadata for the array, such as shape, dtype, fill value, and attributes. Returns ------- Array The created Zarr array. Raises ------ ValueError If the dictionary data is invalid or missing required fields for array creation. """ async_array = AsyncArray.from_dict(store_path=store_path, data=data) return cls(async_array) @classmethod def open( cls, store: StoreLike, ) -> Self: """Opens an existing Array from a store. Parameters ---------- store : StoreLike Store containing the Array. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. Returns ------- Array Array opened from the store. """ async_array = sync(AsyncArray.open(store)) return cls(async_array) @property def store(self) -> Store: return self.async_array.store @property def ndim(self) -> int: """Returns the number of dimensions in the array. Returns ------- int The number of dimensions in the array. """ return self.async_array.ndim @property def shape(self) -> tuple[int, ...]: """Returns the shape of the array. Returns ------- tuple[int, ...] The shape of the array. """ return self.async_array.shape @shape.setter def shape(self, value: tuple[int, ...]) -> None: """Sets the shape of the array by calling resize.""" self.resize(value) @property def chunks(self) -> tuple[int, ...]: """Returns a tuple of integers describing the length of each dimension of a chunk of the array. If sharding is used the inner chunk shape is returned. Only defined for arrays using using `RegularChunkGrid`. If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. Returns ------- tuple A tuple of integers representing the length of each dimension of a chunk. """ return self.async_array.chunks @property def shards(self) -> tuple[int, ...] | None: """Returns a tuple of integers describing the length of each dimension of a shard of the array. Returns None if sharding is not used. Only defined for arrays using using `RegularChunkGrid`. If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. Returns ------- tuple | None A tuple of integers representing the length of each dimension of a shard or None if sharding is not used. """ return self.async_array.shards @property def size(self) -> int: """Returns the total number of elements in the array. Returns ------- int Total number of elements in the array. """ return self.async_array.size @property def dtype(self) -> np.dtype[Any]: """Returns the NumPy data type. Returns ------- np.dtype The NumPy data type. """ return self.async_array.dtype @property def attrs(self) -> Attributes: """Returns a [MutableMapping][collections.abc.MutableMapping] containing user-defined attributes. Returns ------- attrs A [MutableMapping][collections.abc.MutableMapping] object containing user-defined attributes. Notes ----- Note that attribute values must be JSON serializable. """ return Attributes(self) @property def path(self) -> str: """Storage path.""" return self.async_array.path @property def name(self) -> str: """Array name following h5py convention.""" return self.async_array.name @property def basename(self) -> str: """Final component of name.""" return self.async_array.basename @property def metadata(self) -> ArrayMetadata: return self.async_array.metadata @property def store_path(self) -> StorePath: return self.async_array.store_path @property def order(self) -> MemoryOrder: return self.async_array.order @property def read_only(self) -> bool: return self.async_array.read_only @property def fill_value(self) -> Any: return self.metadata.fill_value @property def filters(self) -> tuple[Numcodec, ...] | tuple[ArrayArrayCodec, ...]: """ Filters that are applied to each chunk of the array, in order, before serializing that chunk to bytes. """ return self.async_array.filters @property def serializer(self) -> None | ArrayBytesCodec: """ Array-to-bytes codec to use for serializing the chunks into bytes. """ return self.async_array.serializer @property @deprecated("Use Array.compressors instead.", category=ZarrDeprecationWarning) def compressor(self) -> Numcodec | None: """ Compressor that is applied to each chunk of the array. !!! warning "Deprecated" `array.compressor` is deprecated since v3.0.0 and will be removed in a future release. Use [`array.compressors`][zarr.Array.compressors] instead. """ return self.async_array.compressor @property def compressors(self) -> tuple[Numcodec, ...] | tuple[BytesBytesCodec, ...]: """ Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. """ return self.async_array.compressors @property def cdata_shape(self) -> tuple[int, ...]: """ The shape of the chunk grid for this array. """ return self.async_array._chunk_grid_shape @property def _chunk_grid_shape(self) -> tuple[int, ...]: """ The shape of the chunk grid for this array. """ return self.async_array._chunk_grid_shape @property def _shard_grid_shape(self) -> tuple[int, ...]: """ The shape of the shard grid for this array. """ return self.async_array._shard_grid_shape @property def nchunks(self) -> int: """ The number of chunks in this array. Note that if a sharding codec is used, then the number of chunks may exceed the number of stored objects supporting this array. """ return self.async_array.nchunks @property def _nshards(self) -> int: """ The number of shards in the stored representation of this array. """ return self.async_array._nshards @property def nbytes(self) -> int: """ The total number of bytes that can be stored in the chunks of this array. Notes ----- This value is calculated by multiplying the number of elements in the array and the size of each element, the latter of which is determined by the dtype of the array. For this reason, ``nbytes`` will likely be inaccurate for arrays with variable-length dtypes. It is not possible to determine the size of an array with variable-length elements from the shape and dtype alone. """ return self.async_array.nbytes @property def nchunks_initialized(self) -> int: """ Calculate the number of chunks that have been initialized in storage. This value is calculated as the product of the number of initialized shards and the number of chunks per shard. For arrays that do not use sharding, the number of chunks per shard is effectively 1, and in that case the number of chunks initialized is the same as the number of stored objects associated with an array. For a direct count of the number of initialized stored objects, see ``nshards_initialized``. Returns ------- nchunks_initialized : int The number of chunks that have been initialized. Examples -------- >>> arr = zarr.create_array(store={}, shape=(10,), chunks=(1,), shards=(2,)) >>> arr.nchunks_initialized 0 >>> arr[:5] = 1 >>> arr.nchunks_initialized 6 """ return sync(self.async_array.nchunks_initialized()) @property def _nshards_initialized(self) -> int: """ Calculate the number of shards that have been initialized, i.e. the number of shards that have been persisted to the storage backend. Returns ------- nshards_initialized : int The number of shards that have been initialized. Examples -------- >>> arr = await zarr.create(shape=(10,), chunks=(2,)) >>> arr._nshards_initialized 0 >>> arr[:5] = 1 >>> arr._nshard_initialized 3 """ return sync(self.async_array._nshards_initialized()) def nbytes_stored(self) -> int: """ Determine the size, in bytes, of the array actually written to the store. Returns ------- size : int """ return sync(self.async_array.nbytes_stored()) def _iter_shard_keys( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: """ Iterate over the storage keys of each shard, relative to an optional origin, and optionally limited to a contiguous region in chunk grid coordinates. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's shard grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in shard grid coordinates. Yields ------ str The storage key of each shard in the selection. """ return self.async_array._iter_shard_keys(origin=origin, selection_shape=selection_shape) def _iter_chunk_coords( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[int, ...]]: """ Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. The default behavior is to start at the origin of the grid coordinate space. If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region ranging from `[origin, origin + selection_shape]`, where the upper bound is exclusive as per python indexing conventions. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ tuple[int, ...] The coordinates of each chunk in the selection. """ return self.async_array._iter_chunk_coords(origin=origin, selection_shape=selection_shape) def _iter_shard_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[int, ...]]: """ Create an iterator over the coordinates of shards in shard grid space. If the `origin` keyword is used, iteration will start at the shard index specified by `origin`. The default behavior is to start at the origin of the grid coordinate space. If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as per python indexing conventions. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's shard grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in shard grid coordinates. Yields ------ tuple[int, ...] The coordinates of each shard in the selection. """ return self.async_array._iter_shard_coords(origin=origin, selection_shape=selection_shape) def _iter_chunk_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[slice, ...]]: """ Iterate over the regions spanned by each chunk. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ return self.async_array._iter_chunk_regions(origin=origin, selection_shape=selection_shape) def _iter_shard_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[slice, ...]]: """ Iterate over the regions spanned by each shard. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ return self.async_array._iter_shard_regions(origin=origin, selection_shape=selection_shape) def __array__( self, dtype: npt.DTypeLike | None = None, copy: bool | None = None ) -> NDArrayLike: """ This method is used by numpy when converting zarr.Array into a numpy array. For more information, see https://numpy.org/devdocs/user/basics.interoperability.html#the-array-method """ if copy is False: msg = "`copy=False` is not supported. This method always creates a copy." raise ValueError(msg) arr = self[...] arr_np: NDArrayLike = np.array(arr, dtype=dtype) if dtype is not None: arr_np = arr_np.astype(dtype) return arr_np def __getitem__(self, selection: Selection) -> NDArrayLikeOrScalar: """Retrieve data for an item or region of the array. Parameters ---------- selection : tuple An integer index or slice or tuple of int/slice objects specifying the requested item or region for each dimension of the array. Returns ------- NDArrayLikeOrScalar An array-like or scalar containing the data for the requested region. Examples -------- Setup a 1-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(100, dtype="uint16") >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(10,), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve a single item:: >>> z[5] 5 Retrieve a region via slicing:: >>> z[:5] array([0, 1, 2, 3, 4]) >>> z[-5:] array([95, 96, 97, 98, 99]) >>> z[5:10] array([5, 6, 7, 8, 9]) >>> z[5:10:2] array([5, 7, 9]) >>> z[::2] array([ 0, 2, 4, ..., 94, 96, 98]) Load the entire array into memory:: >>> z[...] array([ 0, 1, 2, ..., 97, 98, 99]) Setup a 2-dimensional array:: >>> data = np.arange(100, dtype="uint16").reshape(10, 10) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(10, 10), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve an item:: >>> z[2, 2] 22 Retrieve a region via slicing:: >>> z[1:3, 1:3] array([[11, 12], [21, 22]]) >>> z[1:3, :] array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) >>> z[:, 1:3] array([[ 1, 2], [11, 12], [21, 22], [31, 32], [41, 42], [51, 52], [61, 62], [71, 72], [81, 82], [91, 92]]) >>> z[0:5:2, 0:5:2] array([[ 0, 2, 4], [20, 22, 24], [40, 42, 44]]) >>> z[::2, ::2] array([[ 0, 2, 4, 6, 8], [20, 22, 24, 26, 28], [40, 42, 44, 46, 48], [60, 62, 64, 66, 68], [80, 82, 84, 86, 88]]) Load the entire array into memory:: >>> z[...] array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]) Notes ----- Slices with step > 1 are supported, but slices with negative step are not. For arrays with a structured dtype, see Zarr format 2 for examples of how to use fields Currently the implementation for __getitem__ is provided by [`vindex`][zarr.Array.vindex] if the indexing is pure fancy indexing (ie a broadcast-compatible tuple of integer array indices), or by [`set_basic_selection`][zarr.Array.set_basic_selection] otherwise. Effectively, this means that the following indexing modes are supported: - integer indexing - slice indexing - mixed slice and integer indexing - boolean indexing - fancy indexing (vectorized list of integers) For specific indexing options including outer indexing, see the methods listed under Related. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection] [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__setitem__][zarr.Array.__setitem__] """ fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): return self.vindex[cast("CoordinateSelection | MaskSelection", selection)] elif is_pure_orthogonal_indexing(pure_selection, self.ndim): return self.get_orthogonal_selection(pure_selection, fields=fields) else: return self.get_basic_selection(cast("BasicSelection", pure_selection), fields=fields) def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: """Modify data for an item or region of the array. Parameters ---------- selection : tuple An integer index or slice or tuple of int/slice specifying the requested region for each dimension of the array. value : npt.ArrayLike An array-like containing the data to be stored in the selection. Examples -------- Setup a 1-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(100,), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5,), >>> dtype="i4", >>> ) Set all array elements to the same scalar value:: >>> z[...] = 42 >>> z[...] array([42, 42, 42, ..., 42, 42, 42]) Set a portion of the array:: >>> z[:10] = np.arange(10) >>> z[-10:] = np.arange(10)[::-1] >>> z[...] array([ 0, 1, 2, ..., 2, 1, 0]) Setup a 2-dimensional array:: >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set all array elements to the same scalar value:: >>> z[...] = 42 Set a portion of the array:: >>> z[0, :] = np.arange(z.shape[1]) >>> z[:, 0] = np.arange(z.shape[0]) >>> z[...] array([[ 0, 1, 2, 3, 4], [ 1, 42, 42, 42, 42], [ 2, 42, 42, 42, 42], [ 3, 42, 42, 42, 42], [ 4, 42, 42, 42, 42]]) Notes ----- Slices with step > 1 are supported, but slices with negative step are not. For arrays with a structured dtype, see Zarr format 2 for examples of how to use fields Currently the implementation for __setitem__ is provided by [`vindex`][zarr.Array.vindex] if the indexing is pure fancy indexing (ie a broadcast-compatible tuple of integer array indices), or by [`set_basic_selection`][zarr.Array.set_basic_selection] otherwise. Effectively, this means that the following indexing modes are supported: - integer indexing - slice indexing - mixed slice and integer indexing - boolean indexing - fancy indexing (vectorized list of integers) For specific indexing options including outer indexing, see the methods listed under Related. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__] """ fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): self.vindex[cast("CoordinateSelection | MaskSelection", selection)] = value elif is_pure_orthogonal_indexing(pure_selection, self.ndim): self.set_orthogonal_selection(pure_selection, value, fields=fields) else: self.set_basic_selection(cast("BasicSelection", pure_selection), value, fields=fields) def get_basic_selection( self, selection: BasicSelection = Ellipsis, *, out: NDBuffer | None = None, prototype: BufferPrototype | None = None, fields: Fields | None = None, ) -> NDArrayLikeOrScalar: """Retrieve data for an item or region of the array. Parameters ---------- selection : tuple A tuple specifying the requested item or region for each dimension of the array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. out : NDBuffer, optional If given, load the selected data directly into this buffer. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. Returns ------- NDArrayLikeOrScalar An array-like or scalar containing the data for the requested region. Examples -------- Setup a 1-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(100, dtype="uint16") >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(3,), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve a single item:: >>> z.get_basic_selection(5) 5 Retrieve a region via slicing:: >>> z.get_basic_selection(slice(5)) array([0, 1, 2, 3, 4]) >>> z.get_basic_selection(slice(-5, None)) array([95, 96, 97, 98, 99]) >>> z.get_basic_selection(slice(5, 10)) array([5, 6, 7, 8, 9]) >>> z.get_basic_selection(slice(5, 10, 2)) array([5, 7, 9]) >>> z.get_basic_selection(slice(None, None, 2)) array([ 0, 2, 4, ..., 94, 96, 98]) Setup a 3-dimensional array:: >>> data = np.arange(1000).reshape(10, 10, 10) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(5, 5, 5), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve an item:: >>> z.get_basic_selection((1, 2, 3)) 123 Retrieve a region via slicing and Ellipsis:: >>> z.get_basic_selection((slice(1, 3), slice(1, 3), 0)) array([[110, 120], [210, 220]]) >>> z.get_basic_selection(0, (slice(1, 3), slice(None))) array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) >>> z.get_basic_selection((..., 5)) array([[ 2 12 22 32 42 52 62 72 82 92] [102 112 122 132 142 152 162 172 182 192] ... [802 812 822 832 842 852 862 872 882 892] [902 912 922 932 942 952 962 972 982 992]] Notes ----- Slices with step > 1 are supported, but slices with negative step are not. For arrays with a structured dtype, see Zarr format 2 for examples of how to use the `fields` parameter. This method provides the implementation for accessing data via the square bracket notation (__getitem__). See [`__getitem__`][zarr.Array.__getitem__] for examples using the alternative notation. Related ------- [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() return sync( self.async_array._get_selection( BasicIndexer(selection, self.shape, self.metadata.chunk_grid), out=out, fields=fields, prototype=prototype, ) ) def set_basic_selection( self, selection: BasicSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify data for an item or region of the array. Parameters ---------- selection : tuple A tuple specifying the requested item or region for each dimension of the array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. value : npt.ArrayLike An array-like containing values to be stored into the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. prototype : BufferPrototype, optional The prototype of the buffer used for setting the data. If not provided, the default buffer prototype is used. Examples -------- Setup a 1-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(100,), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(100,), >>> dtype="i4", >>> ) Set all array elements to the same scalar value:: >>> z.set_basic_selection(..., 42) >>> z[...] array([42, 42, 42, ..., 42, 42, 42]) Set a portion of the array:: >>> z.set_basic_selection(slice(10), np.arange(10)) >>> z.set_basic_selection(slice(-10, None), np.arange(10)[::-1]) >>> z[...] array([ 0, 1, 2, ..., 2, 1, 0]) Setup a 2-dimensional array:: >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set all array elements to the same scalar value:: >>> z.set_basic_selection(..., 42) Set a portion of the array:: >>> z.set_basic_selection((0, slice(None)), np.arange(z.shape[1])) >>> z.set_basic_selection((slice(None), 0), np.arange(z.shape[0])) >>> z[...] array([[ 0, 1, 2, 3, 4], [ 1, 42, 42, 42, 42], [ 2, 42, 42, 42, 42], [ 3, 42, 42, 42, 42], [ 4, 42, 42, 42, 42]]) Notes ----- For arrays with a structured dtype, see Zarr format 2 for examples of how to use the `fields` parameter. This method provides the underlying implementation for modifying data via square bracket notation, see [`__setitem__`][zarr.Array.__setitem__] for equivalent examples using the alternative notation. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer(selection, self.shape, self.metadata.chunk_grid) sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) def get_orthogonal_selection( self, selection: OrthogonalSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: """Retrieve data by making a selection for each dimension of the array. For example, if an array has 2 dimensions, allows selecting specific rows and/or columns. The selection for each dimension can be either an integer (indexing a single item), a slice, an array of integers, or a Boolean array where True values indicate a selection. Parameters ---------- selection : tuple A selection for each dimension of the array. May be any combination of int, slice, integer array or Boolean array. out : NDBuffer, optional If given, load the selected data directly into this buffer. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. Returns ------- NDArrayLikeOrScalar An array-like or scalar containing the data for the requested selection. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(100).reshape(10, 10) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=data.shape, >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve rows and columns via any combination of int, slice, integer array and/or Boolean array:: >>> z.get_orthogonal_selection(([1, 4], slice(None))) array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) >>> z.get_orthogonal_selection((slice(None), [1, 4])) array([[ 1, 4], [11, 14], [21, 24], [31, 34], [41, 44], [51, 54], [61, 64], [71, 74], [81, 84], [91, 94]]) >>> z.get_orthogonal_selection(([1, 4], [1, 4])) array([[11, 14], [41, 44]]) >>> sel = np.zeros(z.shape[0], dtype=bool) >>> sel[1] = True >>> sel[4] = True >>> z.get_orthogonal_selection((sel, sel)) array([[11, 14], [41, 44]]) For convenience, the orthogonal selection functionality is also available via the `oindex` property, e.g.:: >>> z.oindex[[1, 4], :] array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) >>> z.oindex[:, [1, 4]] array([[ 1, 4], [11, 14], [21, 24], [31, 34], [41, 44], [51, 54], [61, 64], [71, 74], [81, 84], [91, 94]]) >>> z.oindex[[1, 4], [1, 4]] array([[11, 14], [41, 44]]) >>> sel = np.zeros(z.shape[0], dtype=bool) >>> sel[1] = True >>> sel[4] = True >>> z.oindex[sel, sel] array([[11, 14], [41, 44]]) Notes ----- Orthogonal indexing is also known as outer indexing. Slices with step > 1 are supported, but slices with negative step are not. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self.async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) ) def set_orthogonal_selection( self, selection: OrthogonalSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify data via a selection for each dimension of the array. Parameters ---------- selection : tuple A selection for each dimension of the array. May be any combination of int, slice, integer array or Boolean array. value : npt.ArrayLike An array-like array containing the data to be stored in the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. prototype : BufferPrototype, optional The prototype of the buffer used for setting the data. If not provided, the default buffer prototype is used. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set data for a selection of rows:: >>> z.set_orthogonal_selection(([1, 4], slice(None)), 1) >>> z[...] array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 1, 1, 1, 1]]) Set data for a selection of columns:: >>> z.set_orthogonal_selection((slice(None), [1, 4]), 2) >>> z[...] array([[0, 2, 0, 0, 2], [1, 2, 1, 1, 2], [0, 2, 0, 0, 2], [0, 2, 0, 0, 2], [1, 2, 1, 1, 2]]) Set data for a selection of rows and columns:: >>> z.set_orthogonal_selection(([1, 4], [1, 4]), 3) >>> z[...] array([[0, 2, 0, 0, 2], [1, 3, 1, 1, 3], [0, 2, 0, 0, 2], [0, 2, 0, 0, 2], [1, 3, 1, 1, 3]]) Set data from a 2D array:: >>> values = np.arange(10).reshape(2, 5) >>> z.set_orthogonal_selection(([0, 3], ...), values) >>> z[...] array([[0, 1, 2, 3, 4], [1, 3, 1, 1, 3], [0, 2, 0, 0, 2], [5, 6, 7, 8, 9], [1, 3, 1, 1, 3]]) For convenience, this functionality is also available via the `oindex` property. E.g.:: >>> z.oindex[[1, 4], [1, 4]] = 4 >>> z[...] array([[0, 1, 2, 3, 4], [1, 4, 1, 1, 4], [0, 2, 0, 0, 2], [5, 6, 7, 8, 9], [1, 4, 1, 1, 4]]) Notes ----- Orthogonal indexing is also known as outer indexing. Slices with step > 1 are supported, but slices with negative step are not. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype) ) def get_mask_selection( self, mask: MaskSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: """Retrieve a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True values indicate a selected item. Parameters ---------- mask : ndarray, bool A Boolean array of the same shape as the array against which the selection is being made. out : NDBuffer, optional If given, load the selected data directly into this buffer. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. Returns ------- NDArrayLikeOrScalar An array-like or scalar containing the data for the requested selection. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(100).reshape(10, 10) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=data.shape, >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve items by specifying a mask:: >>> sel = np.zeros_like(z, dtype=bool) >>> sel[1, 1] = True >>> sel[4, 4] = True >>> z.get_mask_selection(sel) array([11, 44]) For convenience, the mask selection functionality is also available via the `vindex` property, e.g.:: >>> z.vindex[sel] array([11, 44]) Notes ----- Mask indexing is a form of vectorized or inner indexing, and is equivalent to coordinate indexing. Internally the mask array is converted to coordinate arrays by calling `np.nonzero`. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) return sync( self.async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) ) def set_mask_selection( self, mask: MaskSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True values indicate a selected item. Parameters ---------- mask : ndarray, bool A Boolean array of the same shape as the array against which the selection is being made. value : npt.ArrayLike An array-like containing values to be stored into the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set data for a selection of items:: >>> sel = np.zeros_like(z, dtype=bool) >>> sel[1, 1] = True >>> sel[4, 4] = True >>> z.set_mask_selection(sel, 1) >>> z[...] array([[0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 1]]) For convenience, this functionality is also available via the `vindex` property. E.g.:: >>> z.vindex[sel] = 2 >>> z[...] array([[0, 0, 0, 0, 0], [0, 2, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 2]]) Notes ----- Mask indexing is a form of vectorized or inner indexing, and is equivalent to coordinate indexing. Internally the mask array is converted to coordinate arrays by calling `np.nonzero`. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) def get_coordinate_selection( self, selection: CoordinateSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. Parameters ---------- selection : tuple An integer (coordinate) array for each dimension of the array. out : NDBuffer, optional If given, load the selected data directly into this buffer. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. Returns ------- NDArrayLikeOrScalar An array-like or scalar containing the data for the requested coordinate selection. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(3, 3), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve items by specifying their coordinates:: >>> z.get_coordinate_selection(([1, 4], [1, 4])) array([11, 44]) For convenience, the coordinate selection functionality is also available via the `vindex` property, e.g.:: >>> z.vindex[[1, 4], [1, 4]] array([11, 44]) Notes ----- Coordinate indexing is also known as point selection, and is a form of vectorized or inner indexing. Slices are not supported. Coordinate arrays must be provided for all dimensions of the array. Coordinate arrays may be multidimensional, in which case the output array will also be multidimensional. Coordinate arrays are broadcast against each other before being applied. The shape of the output will be the same as the shape of each coordinate array after broadcasting. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) out_array = sync( self.async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) ) if hasattr(out_array, "shape"): # restore shape out_array = np.array(out_array).reshape(indexer.sel_shape) return out_array def set_coordinate_selection( self, selection: CoordinateSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify a selection of individual items, by providing the indices (coordinates) for each item to be modified. Parameters ---------- selection : tuple An integer (coordinate) array for each dimension of the array. value : npt.ArrayLike An array-like containing values to be stored into the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set data for a selection of items:: >>> z.set_coordinate_selection(([1, 4], [1, 4]), 1) >>> z[...] array([[0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 1]]) For convenience, this functionality is also available via the `vindex` property. E.g.:: >>> z.vindex[[1, 4], [1, 4]] = 2 >>> z[...] array([[0, 0, 0, 0, 0], [0, 2, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 2]]) Notes ----- Coordinate indexing is also known as point selection, and is a form of vectorized or inner indexing. Slices are not supported. Coordinate arrays must be provided for all dimensions of the array. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() # setup indexer indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) # handle value - need ndarray-like flatten value if not is_scalar(value, self.dtype): try: from numcodecs.compat import ensure_ndarray_like value = ensure_ndarray_like(value) # TODO replace with agnostic except TypeError: # Handle types like `list` or `tuple` value = np.array(value) # TODO replace with agnostic if hasattr(value, "shape") and len(value.shape) > 1: value = np.array(value).reshape(-1) if not is_scalar(value, self.dtype) and ( isinstance(value, NDArrayLike) and indexer.shape != value.shape ): raise ValueError( f"Attempting to set a selection of {indexer.sel_shape[0]} " f"elements with an array of {value.shape[0]} elements." ) sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) def get_block_selection( self, selection: BasicSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. Parameters ---------- selection : int or slice or tuple of int or slice An integer (coordinate) or slice for each dimension of the array. out : NDBuffer, optional If given, load the selected data directly into this buffer. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. Returns ------- NDArrayLikeOrScalar An array-like or scalar containing the data for the requested block selection. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(3, 3), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve items by specifying their block coordinates:: >>> z.get_block_selection((1, slice(None))) array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) Which is equivalent to:: >>> z[3:6, :] array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) For convenience, the block selection functionality is also available via the `blocks` property, e.g.:: >>> z.blocks[1] array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) Notes ----- Block indexing is a convenience indexing method to work on individual chunks with chunk index slicing. It has the same concept as Dask's `Array.blocks` indexing. Slices are supported. However, only with a step size of one. Block index arrays may be multidimensional to index multidimensional arrays. For example:: >>> z.blocks[0, 1:3] array([[ 3, 4, 5, 6, 7, 8], [13, 14, 15, 16, 17, 18], [23, 24, 25, 26, 27, 28]]) Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self.async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) ) def set_block_selection( self, selection: BasicSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify a selection of individual blocks, by providing the chunk indices (coordinates) for each block to be modified. Parameters ---------- selection : tuple An integer (coordinate) or slice for each dimension of the array. value : npt.ArrayLike An array-like containing the data to be stored in the block selection. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. prototype : BufferPrototype, optional The prototype of the buffer used for setting the data. If not provided, the default buffer prototype is used. Examples -------- Set up a 2-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(6, 6), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(2, 2), >>> dtype="i4", >>> ) Set data for a selection of items:: >>> z.set_block_selection((1, 0), 1) >>> z[...] array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]) For convenience, this functionality is also available via the `blocks` property. E.g.:: >>> z.blocks[2, 1] = 4 >>> z[...] array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [0, 0, 4, 4, 0, 0], [0, 0, 4, 4, 0, 0]]) >>> z.blocks[:, 2] = 7 >>> z[...] array([[0, 0, 0, 0, 7, 7], [0, 0, 0, 0, 7, 7], [1, 1, 0, 0, 7, 7], [1, 1, 0, 0, 7, 7], [0, 0, 4, 4, 7, 7], [0, 0, 4, 4, 7, 7]]) Notes ----- Block indexing is a convenience indexing method to work on individual chunks with chunk index slicing. It has the same concept as Dask's `Array.blocks` indexing. Slices are supported. However, only with a step size of one. Related ------- [get_basic_selection][zarr.Array.get_basic_selection], [set_basic_selection][zarr.Array.set_basic_selection], [get_mask_selection][zarr.Array.get_mask_selection], [set_mask_selection][zarr.Array.set_mask_selection], [get_orthogonal_selection][zarr.Array.get_orthogonal_selection], [set_orthogonal_selection][zarr.Array.set_orthogonal_selection], [get_coordinate_selection][zarr.Array.get_coordinate_selection], [get_block_selection][zarr.Array.get_block_selection], [set_block_selection][zarr.Array.set_block_selection], [vindex][zarr.Array.vindex], [oindex][zarr.Array.oindex], [blocks][zarr.Array.blocks], [__getitem__][zarr.Array.__getitem__], [__setitem__][zarr.Array.__setitem__] """ if prototype is None: prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) sync(self.async_array._set_selection(indexer, value, fields=fields, prototype=prototype)) @property def vindex(self) -> VIndex: """Shortcut for vectorized (inner) indexing, see [get_coordinate_selection][zarr.Array.get_coordinate_selection], [set_coordinate_selection][zarr.Array.set_coordinate_selection], [get_mask_selection][zarr.Array.get_mask_selection] and [set_mask_selection][zarr.Array.set_mask_selection] for documentation and examples.""" return VIndex(self) @property def oindex(self) -> OIndex: """Shortcut for orthogonal (outer) indexing, see [get_orthogonal_selection][zarr.Array.get_orthogonal_selection] and [set_orthogonal_selection][zarr.Array.set_orthogonal_selection] for documentation and examples.""" return OIndex(self) @property def blocks(self) -> BlockIndex: """Shortcut for blocked chunked indexing, see [get_block_selection][zarr.Array.get_block_selection] and [set_block_selection][zarr.Array.set_block_selection] for documentation and examples.""" return BlockIndex(self) def resize(self, new_shape: ShapeLike) -> None: """ Change the shape of the array by growing or shrinking one or more dimensions. This is an in-place operation that modifies the array. Parameters ---------- new_shape : tuple New shape of the array. Notes ----- If one or more dimensions are shrunk, any chunks falling outside the new array shape will be deleted from the underlying store. However, it is noteworthy that the chunks partially falling inside the new array (i.e. boundary chunks) will remain intact, and therefore, the data falling outside the new array but inside the boundary chunks would be restored by a subsequent resize operation that grows the array size. Examples -------- ```python import zarr z = zarr.zeros(shape=(10000, 10000), chunk_shape=(1000, 1000), dtype="int32",) z.shape #> (10000, 10000) z.resize((20000, 1000)) z.shape #> (20000, 1000) z.resize((50, 50)) z.shape #>(50, 50) ``` """ sync(self.async_array.resize(new_shape)) def append(self, data: npt.ArrayLike, axis: int = 0) -> tuple[int, ...]: """Append `data` to `axis`. Parameters ---------- data : array-like Data to be appended. axis : int Axis along which to append. Returns ------- new_shape : tuple Notes ----- The size of all dimensions other than `axis` must match between this array and `data`. Examples -------- >>> import numpy as np >>> import zarr >>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000) >>> z = zarr.array(a, chunks=(1000, 100)) >>> z.shape (10000, 1000) >>> z.append(a) (20000, 1000) >>> z.append(np.vstack([a, a]), axis=1) (20000, 2000) >>> z.shape (20000, 2000) """ return sync(self.async_array.append(data, axis=axis)) def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: """ Update the array's attributes. Parameters ---------- new_attributes : dict A dictionary of new attributes to update or add to the array. The keys represent attribute names, and the values must be JSON-compatible. Returns ------- Array The array with the updated attributes. Raises ------ ValueError If the attributes are invalid or incompatible with the array's metadata. Notes ----- - The updated attributes will be merged with existing attributes, and any conflicts will be overwritten by the new values. """ new_array = sync(self.async_array.update_attributes(new_attributes)) return type(self)(new_array) def __repr__(self) -> str: return f"" @property def info(self) -> Any: """ Return the statically known information for an array. Returns ------- ArrayInfo Related ------- [zarr.Array.info_complete][] - All information about a group, including dynamic information like the number of bytes and chunks written. Examples -------- >>> arr = zarr.create(shape=(10,), chunks=(2,), dtype="float32") >>> arr.info Type : Array Zarr format : 3 Data type : DataType.float32 Shape : (10,) Chunk shape : (2,) Order : C Read-only : False Store type : MemoryStore Codecs : [BytesCodec(endian=)] No. bytes : 40 """ return self.async_array.info def info_complete(self) -> Any: """ Returns all the information about an array, including information from the Store. In addition to the statically known information like ``name`` and ``zarr_format``, this includes additional information like the size of the array in bytes and the number of chunks written. Note that this method will need to read metadata from the store. Returns ------- ArrayInfo Related ------- [zarr.Array.info][] - The statically known subset of metadata about an array. """ return sync(self.async_array.info_complete()) async def _shards_initialized( array: AnyAsyncArray, ) -> tuple[str, ...]: """ Return the keys of the chunks that have been persisted to the storage backend. Parameters ---------- array : AsyncArray The array to inspect. Returns ------- chunks_initialized : tuple[str, ...] The keys of the chunks that have been initialized. Related ------- [nchunks_initialized][zarr.Array.nchunks_initialized] """ store_contents = [ x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path) ] store_contents_relative = [ _relativize_path(path=key, prefix=array.store_path.path) for key in store_contents ] return tuple( chunk_key for chunk_key in array._iter_shard_keys() if chunk_key in store_contents_relative ) FiltersLike: TypeAlias = ( Iterable[dict[str, JSON] | ArrayArrayCodec | Numcodec] | ArrayArrayCodec | Iterable[Numcodec] | Numcodec | Literal["auto"] | None ) # Union of acceptable types for users to pass in for both v2 and v3 compressors CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | Numcodec | Literal["auto"] | None CompressorsLike: TypeAlias = ( Iterable[dict[str, JSON] | BytesBytesCodec | Numcodec] | Mapping[str, JSON] | BytesBytesCodec | Numcodec | Literal["auto"] | None ) SerializerLike: TypeAlias = dict[str, JSON] | ArrayBytesCodec | Literal["auto"] class ShardsConfigParam(TypedDict): shape: tuple[int, ...] index_location: ShardingCodecIndexLocation | None ShardsLike: TypeAlias = tuple[int, ...] | ShardsConfigParam | Literal["auto"] async def from_array( store: StoreLike, *, data: AnyArray | npt.ArrayLike, write_data: bool = True, name: str | None = None, chunks: Literal["auto", "keep"] | tuple[int, ...] = "keep", shards: ShardsLike | None | Literal["keep"] = "keep", filters: FiltersLike | Literal["keep"] = "keep", compressors: CompressorsLike | Literal["keep"] = "keep", serializer: SerializerLike | Literal["keep"] = "keep", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = None, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfigLike | None = None, ) -> AnyAsyncArray: """Create an array from an existing array or array-like. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. data : Array | array-like The array to copy. write_data : bool, default True Whether to copy the data from the input array to the new array. If ``write_data`` is ``False``, the new array will be created with the same metadata as the input array, but without any data. name : str or None, optional The name of the array within the store. If ``name`` is ``None``, the array will be located at the root of the store. chunks : tuple[int, ...] or "auto" or "keep", optional Chunk shape of the array. Following values are supported: - "auto": Automatically determine the chunk shape based on the array's shape and dtype. - "keep": Retain the chunk shape of the data array if it is a zarr Array. - tuple[int, ...]: A tuple of integers representing the chunk shape. If not specified, defaults to "keep" if data is a zarr Array, otherwise "auto". shards : tuple[int, ...], optional Shard shape of the array. Following values are supported: - "auto": Automatically determine the shard shape based on the array's shape and chunk shape. - "keep": Retain the shard shape of the data array if it is a zarr Array. - tuple[int, ...]: A tuple of integers representing the shard shape. - None: No sharding. If not specified, defaults to "keep" if data is a zarr Array, otherwise None. filters : Iterable[Codec] | Literal["auto", "keep"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"keep"`` instructs Zarr to infer ``filters`` from ``data``. If that inference is not possible, Zarr will fall back to the behavior specified by ``"auto"``, which is to choose default filters based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are the empty tuple ``()``. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters is a tuple with a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec] or "auto" or "keep", optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. For Zarr format 3, a "compressor" is a codec that takes a bytestream, and returns another bytestream. Multiple compressors my be provided for Zarr format 3. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. Following values are supported: - Iterable[Codec]: List of compressors to apply to the array. - "auto": Automatically determine the compressors based on the array's dtype. - "keep": Retain the compressors of the input array if it is a zarr Array. If no ``compressors`` are provided, defaults to "keep" if data is a zarr Array, otherwise "auto". serializer : dict[str, JSON] | ArrayBytesCodec or "auto" or "keep", optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. Following values are supported: - dict[str, JSON]: A dict representation of an ``ArrayBytesCodec``. - ArrayBytesCodec: An instance of ``ArrayBytesCodec``. - "auto": a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. - "keep": Retain the serializer of the input array if it is a zarr Array. fill_value : Any, optional Fill value for the array. If not specified, defaults to the fill value of the data array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If not specified, defaults to the memory order of the data array. zarr_format : {2, 3}, optional The zarr format to use when saving. If not specified, defaults to the zarr format of the data array. attributes : dict, optional Attributes for the array. If not specified, defaults to the attributes of the data array. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. If not specified and the data array has the same zarr format as the target array, the chunk key encoding of the data array is used. dimension_names : Iterable[str | None] | None The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. If not specified, defaults to the dimension names of the data array. storage_options : dict, optional If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. Returns ------- AsyncArray The array. Examples -------- Create an array from an existing Array:: >>> import zarr >>> store = zarr.storage.MemoryStore() >>> store2 = zarr.storage.LocalStore('example.zarr') >>> arr = zarr.create_array( >>> store=store, >>> shape=(100,100), >>> chunks=(10,10), >>> dtype='int32', >>> fill_value=0) >>> arr2 = await zarr.api.asynchronous.from_array(store2, data=arr) Create an array from an existing NumPy array:: >>> arr3 = await zarr.api.asynchronous.from_array( >>> zarr.storage.MemoryStore(), >>> data=np.arange(10000, dtype='i4').reshape(100, 100), >>> ) Create an array from any array-like object:: >>> arr4 = await zarr.api.asynchronous.from_array( >>> zarr.storage.MemoryStore(), >>> data=[[1, 2], [3, 4]], >>> ) >>> await arr4.getitem(...) array([[1, 2],[3, 4]]) Create an array from an existing Array without copying the data:: >>> arr5 = await zarr.api.asynchronous.from_array( >>> zarr.storage.MemoryStore(), >>> data=Array(arr4), >>> write_data=False, >>> ) >>> await arr5.getitem(...) array([[0, 0],[0, 0]]) """ mode: Literal["a"] = "a" config_parsed = parse_array_config(config) store_path = await make_store_path(store, path=name, mode=mode, storage_options=storage_options) ( chunks, shards, filters, compressors, serializer, fill_value, order, zarr_format, chunk_key_encoding, dimension_names, ) = _parse_keep_array_attr( data=data, chunks=chunks, shards=shards, filters=filters, compressors=compressors, serializer=serializer, fill_value=fill_value, order=order, zarr_format=zarr_format, chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, ) if not hasattr(data, "dtype") or not hasattr(data, "shape"): data = np.array(data) result = await init_array( store_path=store_path, shape=data.shape, dtype=data.dtype, chunks=chunks, shards=shards, filters=filters, compressors=compressors, serializer=serializer, fill_value=fill_value, order=order, zarr_format=zarr_format, attributes=attributes, chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, overwrite=overwrite, config=config_parsed, ) if write_data: if isinstance(data, Array): async def _copy_array_region( chunk_coords: tuple[int, ...] | slice, _data: AnyArray ) -> None: arr = await _data.async_array.getitem(chunk_coords) await result.setitem(chunk_coords, arr) # Stream data from the source array to the new array await concurrent_map( [(region, data) for region in result._iter_shard_regions()], _copy_array_region, zarr.core.config.config.get("async.concurrency"), ) else: async def _copy_arraylike_region(chunk_coords: slice, _data: NDArrayLike) -> None: await result.setitem(chunk_coords, _data[chunk_coords]) # Stream data from the source array to the new array await concurrent_map( [(region, data) for region in result._iter_shard_regions()], _copy_arraylike_region, zarr.core.config.config.get("async.concurrency"), ) return result async def init_array( *, store_path: StorePath, shape: ShapeLike, dtype: ZDTypeLike, chunks: tuple[int, ...] | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, overwrite: bool = False, config: ArrayConfigLike | None = None, ) -> AnyAsyncArray: """Create and persist an array metadata document. Parameters ---------- store_path : StorePath StorePath instance. The path attribute is the name of the array to initialize. shape : tuple[int, ...] Shape of the array. dtype : ZDTypeLike Data type of the array. chunks : tuple[int, ...], optional Chunk shape of the array. If not specified, default are guessed based on the shape and dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec] | Literal["auto"], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. The default value of ``"auto"`` instructs Zarr to use a default of [`zarr.codecs.ZstdCodec`][]. To create an array with no compressors, provide an empty iterable or the value ``None``. serializer : dict[str, JSON] | ArrayBytesCodec | Literal["auto"], optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. The default value of ``"auto"`` instructs Zarr to use a default codec based on the data type of the array. For most data types this default codec is [`zarr.codecs.BytesCodec`][]. For [`zarr.dtype.VariableLengthUTF8`][], the default codec is [`zarr.codecs.VlenUTF8Codec`][]. For [`zarr.dtype.VariableLengthBytes`][], the default codec is [`zarr.codecs.VlenBytesCodec`][]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncodingLike, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfigLike or None, default=None Configuration for this array. If ``None``, the default array runtime configuration will be used. This default is stored in the global configuration object. Returns ------- AsyncArray The AsyncArray. """ if zarr_format is None: zarr_format = _default_zarr_format() from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation zdtype = parse_dtype(dtype, zarr_format=zarr_format) shape_parsed = parse_shapelike(shape) chunk_key_encoding_parsed = _parse_chunk_key_encoding( chunk_key_encoding, zarr_format=zarr_format ) if overwrite: if store_path.store.supports_deletes: await store_path.delete_dir() else: await ensure_no_existing_node(store_path, zarr_format=zarr_format) else: await ensure_no_existing_node(store_path, zarr_format=zarr_format) item_size = 1 if isinstance(zdtype, HasItemSize): item_size = zdtype.item_size shard_shape_parsed, chunk_shape_parsed = _auto_partition( array_shape=shape_parsed, shard_shape=shards, chunk_shape=chunks, item_size=item_size, ) chunks_out: tuple[int, ...] meta: ArrayV2Metadata | ArrayV3Metadata if zarr_format == 2: if shard_shape_parsed is not None: msg = ( "Zarr format 2 arrays can only be created with `shard_shape` set to `None`. " f"Got `shard_shape={shards}` instead." ) raise ValueError(msg) if serializer != "auto": raise ValueError("Zarr format 2 arrays do not support `serializer`.") filters_parsed, compressor_parsed = _parse_chunk_encoding_v2( compressor=compressors, filters=filters, dtype=zdtype ) if dimension_names is not None: raise ValueError("Zarr format 2 arrays do not support dimension names.") if order is None: order_parsed = zarr_config.get("array.order") else: order_parsed = order chunk_key_encoding_parsed = cast("V2ChunkKeyEncoding", chunk_key_encoding_parsed) meta = AsyncArray._create_metadata_v2( shape=shape_parsed, dtype=zdtype, chunks=chunk_shape_parsed, dimension_separator=chunk_key_encoding_parsed.separator, fill_value=fill_value, order=order_parsed, filters=filters_parsed, compressor=compressor_parsed, attributes=attributes, ) else: array_array, array_bytes, bytes_bytes = _parse_chunk_encoding_v3( compressors=compressors, filters=filters, serializer=serializer, dtype=zdtype, ) sub_codecs = cast("tuple[Codec, ...]", (*array_array, array_bytes, *bytes_bytes)) codecs_out: tuple[Codec, ...] if shard_shape_parsed is not None: index_location = None if isinstance(shards, dict): index_location = ShardingCodecIndexLocation(shards.get("index_location", None)) if index_location is None: index_location = ShardingCodecIndexLocation.end sharding_codec = ShardingCodec( chunk_shape=chunk_shape_parsed, codecs=sub_codecs, index_location=index_location ) sharding_codec.validate( shape=chunk_shape_parsed, dtype=zdtype, chunk_grid=RegularChunkGrid(chunk_shape=shard_shape_parsed), ) codecs_out = (sharding_codec,) chunks_out = shard_shape_parsed else: chunks_out = chunk_shape_parsed codecs_out = sub_codecs if order is not None: _warn_order_kwarg() meta = AsyncArray._create_metadata_v3( shape=shape_parsed, dtype=zdtype, fill_value=fill_value, chunk_shape=chunks_out, chunk_key_encoding=chunk_key_encoding_parsed, codecs=codecs_out, dimension_names=dimension_names, attributes=attributes, ) arr = AsyncArray(metadata=meta, store_path=store_path, config=config) await arr._save_metadata(meta, ensure_parents=True) return arr async def create_array( store: StoreLike, *, name: str | None = None, shape: ShapeLike | None = None, dtype: ZDTypeLike | None = None, data: np.ndarray[Any, np.dtype[Any]] | None = None, chunks: tuple[int, ...] | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfigLike | None = None, write_data: bool = True, ) -> AnyAsyncArray: """Create an array. Parameters ---------- store : StoreLike StoreLike object to open. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. name : str or None, optional The name of the array within the store. If ``name`` is ``None``, the array will be located at the root of the store. shape : ShapeLike, optional Shape of the array. Must be ``None`` if ``data`` is provided. dtype : ZDTypeLike | None Data type of the array. Must be ``None`` if ``data`` is provided. data : np.ndarray, optional Array-like data to use for initializing the array. If this parameter is provided, the ``shape`` and ``dtype`` parameters must be ``None``. chunks : tuple[int, ...] | Literal["auto"], default="auto" Chunk shape of the array. If chunks is "auto", a chunk shape is guessed based on the shape of the array and the dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. For Zarr format 3, a "compressor" is a codec that takes a bytestream, and returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncodingLike, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. storage_options : dict, optional If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. If ``True``, all existing paths in the store will be deleted. config : ArrayConfigLike, optional Runtime configuration for the array. write_data : bool If a pre-existing array-like object was provided to this function via the ``data`` parameter then ``write_data`` determines whether the values in that array-like object should be written to the Zarr array created by this function. If ``write_data`` is ``False``, then the array will be left empty. Returns ------- AsyncArray The array. Examples -------- >>> import zarr >>> store = zarr.storage.MemoryStore(mode='w') >>> async_arr = await zarr.api.asynchronous.create_array( >>> store=store, >>> shape=(100,100), >>> chunks=(10,10), >>> dtype='i4', >>> fill_value=0) """ data_parsed, shape_parsed, dtype_parsed = _parse_data_params( data=data, shape=shape, dtype=dtype ) if data_parsed is not None: return await from_array( store, data=data_parsed, write_data=write_data, name=name, chunks=chunks, shards=shards, filters=filters, compressors=compressors, serializer=serializer, fill_value=fill_value, order=order, zarr_format=zarr_format, attributes=attributes, chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, storage_options=storage_options, overwrite=overwrite, config=config, ) else: mode: Literal["a"] = "a" store_path = await make_store_path( store, path=name, mode=mode, storage_options=storage_options ) return await init_array( store_path=store_path, shape=shape_parsed, dtype=dtype_parsed, chunks=chunks, shards=shards, filters=filters, compressors=compressors, serializer=serializer, fill_value=fill_value, order=order, zarr_format=zarr_format, attributes=attributes, chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, overwrite=overwrite, config=config, ) def _parse_keep_array_attr( data: AnyArray | npt.ArrayLike, chunks: Literal["auto", "keep"] | tuple[int, ...], shards: ShardsLike | None | Literal["keep"], filters: FiltersLike | Literal["keep"], compressors: CompressorsLike | Literal["keep"], serializer: SerializerLike | Literal["keep"], fill_value: Any | None, order: MemoryOrder | None, zarr_format: ZarrFormat | None, chunk_key_encoding: ChunkKeyEncodingLike | None, dimension_names: DimensionNames, ) -> tuple[ tuple[int, ...] | Literal["auto"], ShardsLike | None, FiltersLike, CompressorsLike, SerializerLike, Any | None, MemoryOrder | None, ZarrFormat, ChunkKeyEncodingLike | None, DimensionNames, ]: if isinstance(data, Array): if chunks == "keep": chunks = data.chunks if shards == "keep": shards = data.shards if zarr_format is None: zarr_format = data.metadata.zarr_format if filters == "keep": if zarr_format == data.metadata.zarr_format: filters = data.filters or None else: filters = "auto" if compressors == "keep": if zarr_format == data.metadata.zarr_format: compressors = data.compressors or None else: compressors = "auto" if serializer == "keep": if zarr_format == 3 and data.metadata.zarr_format == 3: serializer = cast("SerializerLike", data.serializer) else: serializer = "auto" if fill_value is None: fill_value = data.fill_value if data.metadata.zarr_format == 2 and zarr_format == 3 and data.order == "F": # Can't set order="F" for v3 arrays warnings.warn( "The 'order' attribute of a Zarr format 2 array does not have a direct analogue in Zarr format 3. " "The existing order='F' of the source Zarr format 2 array will be ignored.", ZarrUserWarning, stacklevel=2, ) elif order is None and zarr_format == 2: order = data.order if chunk_key_encoding is None and zarr_format == data.metadata.zarr_format: if isinstance(data.metadata, ArrayV2Metadata): chunk_key_encoding = {"name": "v2", "separator": data.metadata.dimension_separator} elif isinstance(data.metadata, ArrayV3Metadata): chunk_key_encoding = data.metadata.chunk_key_encoding if dimension_names is None and data.metadata.zarr_format == 3: dimension_names = data.metadata.dimension_names else: if chunks == "keep": chunks = "auto" if shards == "keep": shards = None if zarr_format is None: zarr_format = 3 if filters == "keep": filters = "auto" if compressors == "keep": compressors = "auto" if serializer == "keep": serializer = "auto" return ( chunks, shards, filters, compressors, serializer, fill_value, order, zarr_format, chunk_key_encoding, dimension_names, ) def _parse_chunk_key_encoding( data: ChunkKeyEncodingLike | None, zarr_format: ZarrFormat ) -> ChunkKeyEncoding: """ Take an implicit specification of a chunk key encoding and parse it into a ChunkKeyEncoding object. """ if data is None: if zarr_format == 2: data = {"name": "v2", "configuration": {"separator": "."}} else: data = {"name": "default", "configuration": {"separator": "/"}} result = parse_chunk_key_encoding(data) if zarr_format == 2 and result.name != "v2": msg = ( "Invalid chunk key encoding. For Zarr format 2 arrays, the `name` field of the " f"chunk key encoding must be 'v2'. Got `name` = {result.name} instead." ) raise ValueError(msg) return result def default_filters_v3(dtype: ZDType[Any, Any]) -> tuple[ArrayArrayCodec, ...]: """ Given a data type, return the default filters for that data type. This is an empty tuple. No data types have default filters. """ return () def default_compressors_v3(dtype: ZDType[Any, Any]) -> tuple[BytesBytesCodec, ...]: """ Given a data type, return the default compressors for that data type. This is just a tuple containing ``ZstdCodec`` """ return (ZstdCodec(),) def default_serializer_v3(dtype: ZDType[Any, Any]) -> ArrayBytesCodec: """ Given a data type, return the default serializer for that data type. The default serializer for most data types is the ``BytesCodec``, which may or may not be parameterized with an endianness, depending on whether the data type has endianness. Variable length strings and variable length bytes have hard-coded serializers -- ``VLenUTF8Codec`` and ``VLenBytesCodec``, respectively. """ serializer: ArrayBytesCodec = BytesCodec(endian=None) if isinstance(dtype, HasEndianness): serializer = BytesCodec(endian="little") elif isinstance(dtype, HasObjectCodec): if dtype.object_codec_id == "vlen-bytes": serializer = VLenBytesCodec() elif dtype.object_codec_id == "vlen-utf8": serializer = VLenUTF8Codec() else: msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}." raise ValueError(msg) return serializer def default_filters_v2(dtype: ZDType[Any, Any]) -> tuple[Numcodec] | None: """ Given a data type, return the default filters for that data type. For data types that require an object codec, namely variable length data types, this is a tuple containing the object codec. Otherwise it's ``None``. """ if isinstance(dtype, HasObjectCodec): if dtype.object_codec_id == "vlen-bytes": from numcodecs import VLenBytes return (VLenBytes(),) elif dtype.object_codec_id == "vlen-utf8": from numcodecs import VLenUTF8 return (VLenUTF8(),) else: msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}." raise ValueError(msg) return None def default_compressor_v2(dtype: ZDType[Any, Any]) -> Numcodec: """ Given a data type, return the default compressors for that data type. This is just the numcodecs ``Zstd`` codec. """ from numcodecs import Zstd return Zstd(level=0, checksum=False) # type: ignore[no-any-return] def _parse_chunk_encoding_v2( *, compressor: CompressorsLike, filters: FiltersLike, dtype: ZDType[TBaseDType, TBaseScalar], ) -> tuple[tuple[Numcodec, ...] | None, Numcodec | None]: """ Generate chunk encoding classes for Zarr format 2 arrays with optional defaults. """ _filters: tuple[Numcodec, ...] | None _compressor: Numcodec | None if compressor is None or compressor == (): _compressor = None elif compressor == "auto": _compressor = default_compressor_v2(dtype) elif isinstance(compressor, tuple | list) and len(compressor) == 1: _compressor = parse_compressor(compressor[0]) else: if isinstance(compressor, Iterable) and not isinstance(compressor, dict): msg = f"For Zarr format 2 arrays, the `compressor` must be a single codec. Got an iterable with type {type(compressor)} instead." raise TypeError(msg) _compressor = parse_compressor(compressor) if filters is None: _filters = None elif filters == "auto": _filters = default_filters_v2(dtype) else: if isinstance(filters, Iterable): for idx, f in enumerate(filters): if not _is_numcodec(f): msg = ( "For Zarr format 2 arrays, all elements of `filters` must be numcodecs codecs. " f"Element at index {idx} has type {type(f)}, which is not a numcodecs codec." ) raise TypeError(msg) _filters = parse_filters(filters) if isinstance(dtype, HasObjectCodec): # check the filters and the compressor for the object codec required for this data type if _filters is None: if _compressor is None: object_codec_id = None else: object_codec_id = get_object_codec_id((_compressor.get_config(),)) else: object_codec_id = get_object_codec_id( ( *[f.get_config() for f in _filters], _compressor.get_config() if _compressor is not None else None, ) ) if object_codec_id is None: if isinstance(dtype, VariableLengthUTF8): # type: ignore[unreachable] codec_name = "the numcodecs.VLenUTF8 codec" # type: ignore[unreachable] elif isinstance(dtype, VariableLengthBytes): # type: ignore[unreachable] codec_name = "the numcodecs.VLenBytes codec" # type: ignore[unreachable] else: codec_name = f"an unknown object codec with id {dtype.object_codec_id!r}" msg = ( f"Data type {dtype} requires {codec_name}, " "but no such codec was specified in the filters or compressor parameters for " "this array. " ) raise ValueError(msg) return _filters, _compressor def _parse_chunk_encoding_v3( *, compressors: CompressorsLike, filters: FiltersLike, serializer: SerializerLike, dtype: ZDType[TBaseDType, TBaseScalar], ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]: """ Generate chunk encoding classes for v3 arrays with optional defaults. """ if filters is None: out_array_array: tuple[ArrayArrayCodec, ...] = () elif filters == "auto": out_array_array = default_filters_v3(dtype) else: maybe_array_array: Iterable[Codec | dict[str, JSON]] if isinstance(filters, dict | Codec): maybe_array_array = (filters,) else: maybe_array_array = cast("Iterable[Codec | dict[str, JSON]]", filters) out_array_array = tuple(_parse_array_array_codec(c) for c in maybe_array_array) if serializer == "auto": out_array_bytes = default_serializer_v3(dtype) else: # TODO: ensure that the serializer is compatible with the ndarray produced by the # array-array codecs. For example, if a sequence of array-array codecs produces an # array with a single-byte data type, then the serializer should not specify endiannesss. out_array_bytes = _parse_array_bytes_codec(serializer) if compressors is None: out_bytes_bytes: tuple[BytesBytesCodec, ...] = () elif compressors == "auto": out_bytes_bytes = default_compressors_v3(dtype) else: maybe_bytes_bytes: Iterable[Codec | dict[str, JSON]] if isinstance(compressors, dict | Codec): maybe_bytes_bytes = (compressors,) else: maybe_bytes_bytes = cast("Iterable[Codec | dict[str, JSON]]", compressors) out_bytes_bytes = tuple(_parse_bytes_bytes_codec(c) for c in maybe_bytes_bytes) # TODO: ensure that the serializer is compatible with the ndarray produced by the # array-array codecs. For example, if a sequence of array-array codecs produces an # array with a single-byte data type, then the serializer should not specify endiannesss. # TODO: add checks to ensure that the right serializer is used for vlen data types return out_array_array, out_array_bytes, out_bytes_bytes def _parse_deprecated_compressor( compressor: CompressorLike | None, compressors: CompressorsLike, zarr_format: int = 3 ) -> CompressorsLike | None: if compressor != "auto": if compressors != "auto": raise ValueError("Cannot specify both `compressor` and `compressors`.") if zarr_format == 3: warn( "The `compressor` argument is deprecated. Use `compressors` instead.", category=ZarrUserWarning, stacklevel=2, ) if compressor is None: # "no compression" compressors = () else: compressors = (compressor,) elif zarr_format == 2 and compressor == compressors == "auto": compressors = ({"id": "blosc"},) return compressors def _parse_data_params( *, data: np.ndarray[Any, np.dtype[Any]] | None, shape: ShapeLike | None, dtype: ZDTypeLike | None, ) -> tuple[np.ndarray[Any, np.dtype[Any]] | None, ShapeLike, ZDTypeLike]: """ Ensure an array-like ``data`` parameter is consistent with the ``dtype`` and ``shape`` parameters. """ if data is None: if shape is None: msg = ( "The data parameter was set to None, but shape was not specified. " "Either provide a value for data, or specify shape." ) raise ValueError(msg) shape_out = shape if dtype is None: msg = ( "The data parameter was set to None, but dtype was not specified." "Either provide an array-like value for data, or specify dtype." ) raise ValueError(msg) dtype_out = dtype else: if shape is not None: msg = ( "The data parameter was used, but the shape parameter was also " "used. This is an error. Either use the data parameter, or the shape parameter, " "but not both." ) raise ValueError(msg) shape_out = data.shape if dtype is not None: msg = ( "The data parameter was used, but the dtype parameter was also " "used. This is an error. Either use the data parameter, or the dtype parameter, " "but not both." ) raise ValueError(msg) dtype_out = data.dtype return data, shape_out, dtype_out def _iter_chunk_coords( array: AnyArray | AnyAsyncArray, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None, ) -> Iterator[tuple[int, ...]]: """ Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. The default behavior is to start at the origin of the grid coordinate space. If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as per python indexing conventions. Parameters ---------- array : Array | AsyncArray The array to iterate over. origin : Sequence[int] | None, default=None The origin of the selection in grid coordinates. selection_shape : Sequence[int] | None, default=None The shape of the selection in grid coordinates. Yields ------ chunk_coords: tuple[int, ...] The coordinates of each chunk in the selection. """ return _iter_grid(array._chunk_grid_shape, origin=origin, selection_shape=selection_shape) def _iter_shard_coords( array: AnyArray | AnyAsyncArray, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None, ) -> Iterator[tuple[int, ...]]: """ Create an iterator over the coordinates of shards in shard grid space. If the `origin` keyword is used, iteration will start at the shard index specified by `origin`. The default behavior is to start at the origin of the grid coordinate space. If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as per python indexing conventions. Parameters ---------- array : Array | AsyncArray The array to iterate over. origin : Sequence[int] | None, default=None The origin of the selection in grid coordinates. selection_shape : Sequence[int] | None, default=None The shape of the selection in grid coordinates. Yields ------ chunk_coords: tuple[int, ...] The coordinates of each shard in the selection. """ return _iter_grid(array._shard_grid_shape, origin=origin, selection_shape=selection_shape) def _iter_shard_keys( array: AnyArray | AnyAsyncArray, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None, ) -> Iterator[str]: """ Iterate over the storage keys of each shard, relative to an optional origin, and optionally limited to a contiguous region in shard grid coordinates. Parameters ---------- array : Array | AsyncArray The array to iterate over. origin : Sequence[int] | None, default=None The origin of the selection in grid coordinates. selection_shape : Sequence[int] | None, default=None The shape of the selection in grid coordinates. Yields ------ key: str The storage key of each chunk in the selection. """ # Iterate over the coordinates of chunks in chunk grid space. _iter = _iter_grid(array._shard_grid_shape, origin=origin, selection_shape=selection_shape) return (array.metadata.encode_chunk_key(k) for k in _iter) def _iter_shard_regions( array: AnyArray | AnyAsyncArray, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None, ) -> Iterator[tuple[slice, ...]]: """ Iterate over the regions spanned by each shard. These are the smallest regions of the array that are safe to write concurrently. Parameters ---------- array : Array | AsyncArray The array to iterate over. origin : Sequence[int] | None, default=None The origin of the selection relative to the array's shard grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in shard grid coordinates. Yields ------ region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each shard in the selection. """ if array.shards is None: shard_shape = array.chunks else: shard_shape = array.shards return _iter_regions( array.shape, shard_shape, origin=origin, selection_shape=selection_shape, trim_excess=True ) def _iter_chunk_regions( array: AnyArray | AnyAsyncArray, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None, ) -> Iterator[tuple[slice, ...]]: """ Iterate over the regions spanned by each shard. These are the smallest regions of the array that are efficient to read concurrently. Parameters ---------- array : Array | AsyncArray The array to iterate over. origin : Sequence[int] | None, default=None The origin of the selection in grid coordinates. selection_shape : Sequence[int] | None, default=None The shape of the selection in grid coordinates. Returns ------- region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each shard in the selection. """ return _iter_regions( array.shape, array.chunks, origin=origin, selection_shape=selection_shape, trim_excess=True ) zarr-python-3.1.5/src/zarr/core/array_spec.py000066400000000000000000000070771511007055700212300ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass, fields from typing import TYPE_CHECKING, Any, Literal, Self, TypedDict, cast from zarr.core.common import ( MemoryOrder, parse_bool, parse_fill_value, parse_order, parse_shapelike, ) from zarr.core.config import config as zarr_config if TYPE_CHECKING: from typing import NotRequired from zarr.core.buffer import BufferPrototype from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType class ArrayConfigParams(TypedDict): """ A TypedDict model of the attributes of an ArrayConfig class, but with no required fields. This allows for partial construction of an ArrayConfig, with the assumption that the unset keys will be taken from a global configuration. """ order: NotRequired[MemoryOrder] write_empty_chunks: NotRequired[bool] @dataclass(frozen=True) class ArrayConfig: """ A model of the runtime configuration of an array. Parameters ---------- order : MemoryOrder The memory layout of the arrays returned when reading data from the store. write_empty_chunks : bool If True, empty chunks will be written to the store. """ order: MemoryOrder write_empty_chunks: bool def __init__(self, order: MemoryOrder, write_empty_chunks: bool) -> None: order_parsed = parse_order(order) write_empty_chunks_parsed = parse_bool(write_empty_chunks) object.__setattr__(self, "order", order_parsed) object.__setattr__(self, "write_empty_chunks", write_empty_chunks_parsed) @classmethod def from_dict(cls, data: ArrayConfigParams) -> Self: """ Create an ArrayConfig from a dict. The keys of that dict are a subset of the attributes of the ArrayConfig class. Any keys missing from that dict will be set to the the values in the ``array`` namespace of ``zarr.config``. """ kwargs_out: ArrayConfigParams = {} for f in fields(ArrayConfig): field_name = cast("Literal['order', 'write_empty_chunks']", f.name) if field_name not in data: kwargs_out[field_name] = zarr_config.get(f"array.{field_name}") else: kwargs_out[field_name] = data[field_name] return cls(**kwargs_out) ArrayConfigLike = ArrayConfig | ArrayConfigParams def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig: """ Convert various types of data to an ArrayConfig. """ if data is None: return ArrayConfig.from_dict({}) elif isinstance(data, ArrayConfig): return data else: return ArrayConfig.from_dict(data) @dataclass(frozen=True) class ArraySpec: shape: tuple[int, ...] dtype: ZDType[TBaseDType, TBaseScalar] fill_value: Any config: ArrayConfig prototype: BufferPrototype def __init__( self, shape: tuple[int, ...], dtype: ZDType[TBaseDType, TBaseScalar], fill_value: Any, config: ArrayConfig, prototype: BufferPrototype, ) -> None: shape_parsed = parse_shapelike(shape) fill_value_parsed = parse_fill_value(fill_value) object.__setattr__(self, "shape", shape_parsed) object.__setattr__(self, "dtype", dtype) object.__setattr__(self, "fill_value", fill_value_parsed) object.__setattr__(self, "config", config) object.__setattr__(self, "prototype", prototype) @property def ndim(self) -> int: return len(self.shape) @property def order(self) -> MemoryOrder: return self.config.order zarr-python-3.1.5/src/zarr/core/attributes.py000066400000000000000000000032271511007055700212570ustar00rootroot00000000000000from __future__ import annotations from collections.abc import MutableMapping from typing import TYPE_CHECKING from zarr.core.common import JSON if TYPE_CHECKING: from collections.abc import Iterator from zarr.core.group import Group from zarr.types import AnyArray class Attributes(MutableMapping[str, JSON]): def __init__(self, obj: AnyArray | Group) -> None: # key=".zattrs", read_only=False, cache=True, synchronizer=None self._obj = obj def __getitem__(self, key: str) -> JSON: return self._obj.metadata.attributes[key] def __setitem__(self, key: str, value: JSON) -> None: new_attrs = dict(self._obj.metadata.attributes) new_attrs[key] = value self._obj = self._obj.update_attributes(new_attrs) def __delitem__(self, key: str) -> None: new_attrs = dict(self._obj.metadata.attributes) del new_attrs[key] self.put(new_attrs) def __iter__(self) -> Iterator[str]: return iter(self._obj.metadata.attributes) def __len__(self) -> int: return len(self._obj.metadata.attributes) def put(self, d: dict[str, JSON]) -> None: """ Overwrite all attributes with the values from `d`. Equivalent to the following pseudo-code, but performed atomically. ```python attrs = {"a": 1, "b": 2} attrs.clear() attrs.update({"a": "3", "c": 4}) print(attrs) #> {'a': '3', 'c': 4} ``` """ self._obj.metadata.attributes.clear() self._obj = self._obj.update_attributes(d) def asdict(self) -> dict[str, JSON]: return dict(self._obj.metadata.attributes) zarr-python-3.1.5/src/zarr/core/buffer/000077500000000000000000000000001511007055700177645ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/core/buffer/__init__.py000066400000000000000000000006451511007055700221020ustar00rootroot00000000000000from zarr.core.buffer.core import ( ArrayLike, Buffer, BufferPrototype, NDArrayLike, NDArrayLikeOrScalar, NDBuffer, default_buffer_prototype, ) from zarr.core.buffer.cpu import numpy_buffer_prototype __all__ = [ "ArrayLike", "Buffer", "BufferPrototype", "NDArrayLike", "NDArrayLikeOrScalar", "NDBuffer", "default_buffer_prototype", "numpy_buffer_prototype", ] zarr-python-3.1.5/src/zarr/core/buffer/core.py000066400000000000000000000431521511007055700212730ustar00rootroot00000000000000from __future__ import annotations import sys from abc import ABC, abstractmethod from collections.abc import Iterable from typing import ( TYPE_CHECKING, Any, Literal, NamedTuple, Protocol, SupportsIndex, cast, runtime_checkable, ) import numpy as np import numpy.typing as npt if TYPE_CHECKING: from collections.abc import Iterable, Sequence from typing import Self from zarr.codecs.bytes import Endian from zarr.core.common import BytesLike # Everything here is imported into ``zarr.core.buffer`` namespace. __all__: list[str] = [] @runtime_checkable class ArrayLike(Protocol): """Protocol for the array-like type that underlie Buffer""" @property def dtype(self) -> np.dtype[Any]: ... @property def ndim(self) -> int: ... @property def size(self) -> int: ... def __getitem__(self, key: slice) -> Self: ... def __setitem__(self, key: slice, value: Any) -> None: ... @runtime_checkable class NDArrayLike(Protocol): """Protocol for the nd-array-like type that underlie NDBuffer""" @property def dtype(self) -> np.dtype[Any]: ... @property def ndim(self) -> int: ... @property def size(self) -> int: ... @property def shape(self) -> tuple[int, ...]: ... def __len__(self) -> int: ... def __getitem__(self, key: slice) -> Self: ... def __setitem__(self, key: slice, value: Any) -> None: ... def __array__(self) -> npt.NDArray[Any]: ... def reshape( self, shape: tuple[int, ...] | Literal[-1], *, order: Literal["A", "C", "F"] = ... ) -> Self: ... def view(self, dtype: npt.DTypeLike) -> Self: ... def astype( self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = ..., *, copy: bool = ..., ) -> Self: ... def fill(self, value: Any) -> None: ... def copy(self) -> Self: ... def transpose(self, axes: SupportsIndex | Sequence[SupportsIndex] | None) -> Self: ... def ravel(self, order: Literal["K", "A", "C", "F"] = ...) -> Self: ... def all(self) -> bool: ... def __eq__(self, other: object) -> Self: # type: ignore[override] """Element-wise equal Notes ----- Type checkers such as mypy complains because the return type isn't a bool like its supertype "object", which violates the Liskov substitution principle. This is true, but since NumPy's ndarray is defined as an element-wise equal, our hands are tied. """ ScalarType = int | float | complex | bytes | str | bool | np.generic NDArrayLikeOrScalar = ScalarType | NDArrayLike def check_item_key_is_1d_contiguous(key: Any) -> None: """Raises error if `key` isn't a 1d contiguous slice""" if not isinstance(key, slice): raise TypeError( f"Item key has incorrect type (expected slice, got {key.__class__.__name__})" ) if not (key.step is None or key.step == 1): raise ValueError("slice must be contiguous") class Buffer(ABC): """A flat contiguous memory block We use Buffer throughout Zarr to represent a contiguous block of memory. A Buffer is backed by an underlying array-like instance that represents the memory. The memory type is unspecified; can be regular host memory, CUDA device memory, or something else. The only requirement is that the array-like instance can be copied/converted to a regular Numpy array (host memory). Notes ----- This buffer is untyped, so all indexing and sizes are in bytes. Parameters ---------- array_like array-like object that must be 1-dim, contiguous, and byte dtype. """ def __init__(self, array_like: ArrayLike) -> None: if array_like.ndim != 1: raise ValueError("array_like: only 1-dim allowed") if array_like.dtype != np.dtype("B"): raise ValueError("array_like: only byte dtype allowed") self._data = array_like @classmethod @abstractmethod def create_zero_length(cls) -> Self: """Create an empty buffer with length zero Returns ------- New empty 0-length buffer """ if cls is Buffer: raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'") return cls( cast("ArrayLike", None) ) # This line will never be reached, but it satisfies the type checker @classmethod def from_array_like(cls, array_like: ArrayLike) -> Self: """Create a new buffer of an array-like object Parameters ---------- array_like array-like object that must be 1-dim, contiguous, and byte dtype. Returns ------- New buffer representing `array_like` """ return cls(array_like) @classmethod @abstractmethod def from_buffer(cls, buffer: Buffer) -> Self: """Create a new buffer of an existing Buffer This is useful if you want to ensure that an existing buffer is of the correct subclass of Buffer. E.g., MemoryStore uses this to return a buffer instance of the subclass specified by its BufferPrototype argument. Typically, this only copies data if the data has to be moved between memory types, such as from host to device memory. Parameters ---------- buffer buffer object. Returns ------- A new buffer representing the content of the input buffer Notes ----- Subclasses of `Buffer` must override this method to implement more optimal conversions that avoid copies where possible """ if cls is Buffer: raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'") return cls( cast("ArrayLike", None) ) # This line will never be reached, but it satisfies the type checker @classmethod @abstractmethod def from_bytes(cls, bytes_like: BytesLike) -> Self: """Create a new buffer of a bytes-like object (host memory) Parameters ---------- bytes_like bytes-like object Returns ------- New buffer representing `bytes_like` """ if cls is Buffer: raise NotImplementedError("Cannot call abstract method on the abstract class 'Buffer'") return cls( cast("ArrayLike", None) ) # This line will never be reached, but it satisfies the type checker def as_array_like(self) -> ArrayLike: """Returns the underlying array (host or device memory) of this buffer This will never copy data. Returns ------- The underlying 1d array such as a NumPy or CuPy array. """ return self._data @abstractmethod def as_numpy_array(self) -> npt.NDArray[Any]: """Returns the buffer as a NumPy array (host memory). Notes ----- Might have to copy data, consider using `.as_array_like()` instead. Returns ------- NumPy array of this buffer (might be a data copy) """ ... def as_buffer_like(self) -> BytesLike: """Returns the buffer as an object that implements the Python buffer protocol. Notes ----- Might have to copy data, since the implementation uses `.as_numpy_array()`. Returns ------- An object that implements the Python buffer protocol """ return memoryview(self.as_numpy_array()) # type: ignore[arg-type] def to_bytes(self) -> bytes: """Returns the buffer as `bytes` (host memory). Warnings -------- Will always copy data, only use this method for small buffers such as metadata buffers. If possible, use `.as_numpy_array()` or `.as_array_like()` instead. Returns ------- `bytes` of this buffer (data copy) """ return bytes(self.as_numpy_array()) def __getitem__(self, key: slice) -> Self: check_item_key_is_1d_contiguous(key) return self.__class__(self._data.__getitem__(key)) def __setitem__(self, key: slice, value: Any) -> None: check_item_key_is_1d_contiguous(key) self._data.__setitem__(key, value) def __len__(self) -> int: return self._data.size @abstractmethod def combine(self, others: Iterable[Buffer]) -> Self: """Concatenate many buffers""" ... def __add__(self, other: Buffer) -> Self: """Concatenate two buffers""" return self.combine([other]) def __eq__(self, other: object) -> bool: # Another Buffer class can override this to choose a more efficient path return isinstance(other, Buffer) and np.array_equal( self.as_numpy_array(), other.as_numpy_array() ) class NDBuffer: """An n-dimensional memory block We use NDBuffer throughout Zarr to represent a n-dimensional memory block. A NDBuffer is backed by an underlying ndarray-like instance that represents the memory. The memory type is unspecified; can be regular host memory, CUDA device memory, or something else. The only requirement is that the ndarray-like instance can be copied/converted to a regular Numpy array (host memory). Notes ----- The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However, in order to use Python's type system to differentiate between the contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the two classes separate. Parameters ---------- array : ndarray_like ndarray-like object that is convertible to a regular Numpy array. """ def __init__(self, array: NDArrayLike) -> None: self._data = array @classmethod @abstractmethod def create( cls, *, shape: Iterable[int], dtype: npt.DTypeLike, order: Literal["C", "F"] = "C", fill_value: Any | None = None, ) -> Self: """Create a new buffer and its underlying ndarray-like object Parameters ---------- shape The shape of the buffer and its underlying ndarray-like object dtype The datatype of the buffer and its underlying ndarray-like object order Whether to store multi-dimensional data in row-major (C-style) or column-major (Fortran-style) order in memory. fill_value If not None, fill the new buffer with a scalar value. Returns ------- New buffer representing a new ndarray_like object Notes ----- A subclass can overwrite this method to create a ndarray-like object other then the default Numpy array. """ if cls is NDBuffer: raise NotImplementedError( "Cannot call abstract method on the abstract class 'NDBuffer'" ) return cls( cast("NDArrayLike", None) ) # This line will never be reached, but it satisfies the type checker @classmethod def empty( cls, shape: tuple[int, ...], dtype: npt.DTypeLike, order: Literal["C", "F"] = "C" ) -> Self: """ Create an empty buffer with the given shape, dtype, and order. This method can be faster than ``NDBuffer.create`` because it doesn't have to initialize the memory used by the underlying ndarray-like object. Parameters ---------- shape The shape of the buffer and its underlying ndarray-like object dtype The datatype of the buffer and its underlying ndarray-like object order Whether to store multi-dimensional data in row-major (C-style) or column-major (Fortran-style) order in memory. Returns ------- buffer New buffer representing a new ndarray_like object with empty data. See Also -------- NDBuffer.create Create a new buffer with some initial fill value. """ # Implementations should override this method if they have a faster way # to allocate an empty buffer. return cls.create(shape=shape, dtype=dtype, order=order) @classmethod def from_ndarray_like(cls, ndarray_like: NDArrayLike) -> Self: """Create a new buffer of a ndarray-like object Parameters ---------- ndarray_like ndarray-like object Returns ------- New buffer representing `ndarray_like` """ return cls(ndarray_like) @classmethod @abstractmethod def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: """Create a new buffer of Numpy array-like object Parameters ---------- array_like Object that can be coerced into a Numpy array Returns ------- New buffer representing `array_like` """ if cls is NDBuffer: raise NotImplementedError( "Cannot call abstract method on the abstract class 'NDBuffer'" ) return cls( cast("NDArrayLike", None) ) # This line will never be reached, but it satisfies the type checker def as_ndarray_like(self) -> NDArrayLike: """Returns the underlying array (host or device memory) of this buffer This will never copy data. Returns ------- The underlying array such as a NumPy or CuPy array. """ return self._data @abstractmethod def as_numpy_array(self) -> npt.NDArray[Any]: """Returns the buffer as a NumPy array (host memory). Warnings -------- Might have to copy data, consider using `.as_ndarray_like()` instead. Returns ------- NumPy array of this buffer (might be a data copy) """ ... def as_scalar(self) -> ScalarType: """Returns the buffer as a scalar value""" if self._data.size != 1: raise ValueError("Buffer does not contain a single scalar value") return cast("ScalarType", self.as_numpy_array()[()]) @property def dtype(self) -> np.dtype[Any]: return self._data.dtype @property def shape(self) -> tuple[int, ...]: return self._data.shape @property def byteorder(self) -> Endian: from zarr.codecs.bytes import Endian if self.dtype.byteorder == "<": return Endian.little elif self.dtype.byteorder == ">": return Endian.big else: return Endian(sys.byteorder) def reshape(self, newshape: tuple[int, ...] | Literal[-1]) -> Self: return self.__class__(self._data.reshape(newshape)) def squeeze(self, axis: tuple[int, ...]) -> Self: newshape = tuple(a for i, a in enumerate(self.shape) if i not in axis) return self.__class__(self._data.reshape(newshape)) def astype(self, dtype: npt.DTypeLike, order: Literal["K", "A", "C", "F"] = "K") -> Self: return self.__class__(self._data.astype(dtype=dtype, order=order)) @abstractmethod def __getitem__(self, key: Any) -> Self: ... @abstractmethod def __setitem__(self, key: Any, value: Any) -> None: ... def __len__(self) -> int: return self._data.__len__() def __repr__(self) -> str: return f"" def all_equal(self, other: Any, equal_nan: bool = True) -> bool: """Compare to `other` using np.array_equal.""" if other is None: # Handle None fill_value for Zarr V2 return False # Handle positive and negative zero by comparing bit patterns: if ( np.asarray(other).dtype.kind == "f" and other == 0.0 and self._data.dtype.kind not in ("U", "S", "T", "O", "V") ): _data, other = np.broadcast_arrays(self._data, np.asarray(other, self._data.dtype)) void_dtype = "V" + str(_data.dtype.itemsize) return np.array_equal(_data.view(void_dtype), other.view(void_dtype)) # use array_equal to obtain equal_nan=True functionality # Since fill-value is a scalar, isn't there a faster path than allocating a new array for fill value # every single time we have to write data? _data, other = np.broadcast_arrays(self._data, other) return np.array_equal( self._data, other, equal_nan=equal_nan if self._data.dtype.kind not in ("U", "S", "T", "O", "V") else False, ) def fill(self, value: Any) -> None: self._data.fill(value) def copy(self) -> Self: return self.__class__(self._data.copy()) def transpose(self, axes: SupportsIndex | Sequence[SupportsIndex] | None) -> Self: return self.__class__(self._data.transpose(axes)) class BufferPrototype(NamedTuple): """Prototype of the Buffer and NDBuffer class The protocol must be pickable. Attributes ---------- buffer The Buffer class to use when Zarr needs to create new Buffer. nd_buffer The NDBuffer class to use when Zarr needs to create new NDBuffer. """ buffer: type[Buffer] nd_buffer: type[NDBuffer] # The default buffer prototype used throughout the Zarr codebase. def default_buffer_prototype() -> BufferPrototype: from zarr.registry import ( get_buffer_class, get_ndbuffer_class, ) return BufferPrototype(buffer=get_buffer_class(), nd_buffer=get_ndbuffer_class()) zarr-python-3.1.5/src/zarr/core/buffer/cpu.py000066400000000000000000000170071511007055700211320ustar00rootroot00000000000000from __future__ import annotations from typing import ( TYPE_CHECKING, Any, Literal, ) import numpy as np import numpy.typing as npt from zarr.core.buffer import core from zarr.registry import ( register_buffer, register_ndbuffer, ) if TYPE_CHECKING: from collections.abc import Callable, Iterable from typing import Self from zarr.core.buffer.core import ArrayLike, NDArrayLike from zarr.core.common import BytesLike class Buffer(core.Buffer): """A flat contiguous memory block We use Buffer throughout Zarr to represent a contiguous block of memory. A Buffer is backed by a underlying array-like instance that represents the memory. The memory type is unspecified; can be regular host memory, CUDA device memory, or something else. The only requirement is that the array-like instance can be copied/converted to a regular Numpy array (host memory). Notes ----- This buffer is untyped, so all indexing and sizes are in bytes. Parameters ---------- array_like array-like object that must be 1-dim, contiguous, and byte dtype. """ def __init__(self, array_like: ArrayLike) -> None: super().__init__(array_like) @classmethod def create_zero_length(cls) -> Self: return cls(np.array([], dtype="B")) @classmethod def from_buffer(cls, buffer: core.Buffer) -> Self: """Create a new buffer of an existing Buffer This is useful if you want to ensure that an existing buffer is of the correct subclass of Buffer. E.g., MemoryStore uses this to return a buffer instance of the subclass specified by its BufferPrototype argument. Typically, this only copies data if the data has to be moved between memory types, such as from host to device memory. Parameters ---------- buffer buffer object. Returns ------- A new buffer representing the content of the input buffer Notes ----- Subclasses of `Buffer` must override this method to implement more optimal conversions that avoid copies where possible """ return cls.from_array_like(buffer.as_numpy_array()) @classmethod def from_bytes(cls, bytes_like: BytesLike) -> Self: """Create a new buffer of a bytes-like object (host memory) Parameters ---------- bytes_like bytes-like object Returns ------- New buffer representing `bytes_like` """ return cls.from_array_like(np.frombuffer(bytes_like, dtype="B")) def as_numpy_array(self) -> npt.NDArray[Any]: """Returns the buffer as a NumPy array (host memory). Notes ----- Might have to copy data, consider using `.as_array_like()` instead. Returns ------- NumPy array of this buffer (might be a data copy) """ return np.asanyarray(self._data) def combine(self, others: Iterable[core.Buffer]) -> Self: data = [np.asanyarray(self._data)] for buf in others: other_array = buf.as_array_like() assert other_array.dtype == np.dtype("B") data.append(np.asanyarray(other_array)) return self.__class__(np.concatenate(data)) class NDBuffer(core.NDBuffer): """An n-dimensional memory block We use NDBuffer throughout Zarr to represent a n-dimensional memory block. A NDBuffer is backed by a underlying ndarray-like instance that represents the memory. The memory type is unspecified; can be regular host memory, CUDA device memory, or something else. The only requirement is that the ndarray-like instance can be copied/converted to a regular Numpy array (host memory). Notes ----- The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However, in order to use Python's type system to differentiate between the contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the two classes separate. Parameters ---------- array ndarray-like object that is convertible to a regular Numpy array. """ def __init__(self, array: NDArrayLike) -> None: super().__init__(array) @classmethod def create( cls, *, shape: Iterable[int], dtype: npt.DTypeLike, order: Literal["C", "F"] = "C", fill_value: Any | None = None, ) -> Self: # np.zeros is much faster than np.full, and therefore using it when possible is better. if fill_value is None or (isinstance(fill_value, int) and fill_value == 0): return cls(np.zeros(shape=tuple(shape), dtype=dtype, order=order)) else: return cls(np.full(shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order)) @classmethod def empty( cls, shape: tuple[int, ...], dtype: npt.DTypeLike, order: Literal["C", "F"] = "C" ) -> Self: return cls(np.empty(shape=shape, dtype=dtype, order=order)) @classmethod def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: return cls.from_ndarray_like(np.asanyarray(array_like)) def as_numpy_array(self) -> npt.NDArray[Any]: """Returns the buffer as a NumPy array (host memory). Warnings -------- Might have to copy data, consider using `.as_ndarray_like()` instead. Returns ------- NumPy array of this buffer (might be a data copy) """ return np.asanyarray(self._data) def __getitem__(self, key: Any) -> Self: return self.__class__(np.asanyarray(self._data.__getitem__(key))) def __setitem__(self, key: Any, value: Any) -> None: if isinstance(value, NDBuffer): value = value._data self._data.__setitem__(key, value) def as_numpy_array_wrapper( func: Callable[[npt.NDArray[Any]], bytes], buf: core.Buffer, prototype: core.BufferPrototype ) -> core.Buffer: """Converts the input of `func` to a numpy array and the output back to `Buffer`. This function is useful when calling a `func` that only support host memory such as `GZip.decode` and `Blosc.decode`. In this case, use this wrapper to convert the input `buf` to a Numpy array and convert the result back into a `Buffer`. Parameters ---------- func The callable that will be called with the converted `buf` as input. `func` must return bytes, which will be converted into a `Buffer` before returned. buf The buffer that will be converted to a Numpy array before given as input to `func`. prototype The prototype of the output buffer. Returns ------- The result of `func` converted to a `Buffer` """ return prototype.buffer.from_bytes(func(buf.as_numpy_array())) # CPU buffer prototype using numpy arrays buffer_prototype = core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) # default_buffer_prototype = buffer_prototype # The numpy prototype used for E.g. when reading the shard index def numpy_buffer_prototype() -> core.BufferPrototype: return core.BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) register_buffer(Buffer, qualname="zarr.buffer.cpu.Buffer") register_ndbuffer(NDBuffer, qualname="zarr.buffer.cpu.NDBuffer") # backwards compatibility register_buffer(Buffer, qualname="zarr.core.buffer.cpu.Buffer") register_ndbuffer(NDBuffer, qualname="zarr.core.buffer.cpu.NDBuffer") zarr-python-3.1.5/src/zarr/core/buffer/gpu.py000066400000000000000000000171401511007055700211340ustar00rootroot00000000000000from __future__ import annotations import warnings from typing import ( TYPE_CHECKING, Any, Literal, cast, ) import numpy as np import numpy.typing as npt from zarr.core.buffer import core from zarr.core.buffer.core import ArrayLike, BufferPrototype, NDArrayLike from zarr.errors import ZarrUserWarning from zarr.registry import ( register_buffer, register_ndbuffer, ) if TYPE_CHECKING: from collections.abc import Iterable from typing import Self from zarr.core.common import BytesLike try: import cupy as cp except ImportError: cp = None class Buffer(core.Buffer): """A flat contiguous memory block on the GPU We use Buffer throughout Zarr to represent a contiguous block of memory. A Buffer is backed by an underlying array-like instance that represents the memory. The memory type is unspecified; can be regular host memory, CUDA device memory, or something else. The only requirement is that the array-like instance can be copied/converted to a regular Numpy array (host memory). Notes ----- This buffer is untyped, so all indexing and sizes are in bytes. Parameters ---------- array_like array-like object that must be 1-dim, contiguous, and byte dtype. """ def __init__(self, array_like: ArrayLike) -> None: if cp is None: raise ImportError( "Cannot use zarr.buffer.gpu.Buffer without cupy. Please install cupy." ) if array_like.ndim != 1: raise ValueError("array_like: only 1-dim allowed") if array_like.dtype != np.dtype("B"): raise ValueError("array_like: only byte dtype allowed") if not hasattr(array_like, "__cuda_array_interface__"): # Slow copy based path for arrays that don't support the __cuda_array_interface__ # TODO: Add a fast zero-copy path for arrays that support the dlpack protocol msg = ( "Creating a zarr.buffer.gpu.Buffer with an array that does not support the " "__cuda_array_interface__ for zero-copy transfers, " "falling back to slow copy based path" ) warnings.warn( msg, category=ZarrUserWarning, stacklevel=2, ) self._data = cp.asarray(array_like) @classmethod def create_zero_length(cls) -> Self: """Create an empty buffer with length zero Returns ------- New empty 0-length buffer """ return cls(cp.array([], dtype="B")) @classmethod def from_buffer(cls, buffer: core.Buffer) -> Self: """Create a GPU Buffer given an arbitrary Buffer This will try to be zero-copy if `buffer` is already on the GPU and will trigger a copy if not. Returns ------- New GPU Buffer constructed from `buffer` """ return cls(buffer.as_array_like()) @classmethod def from_bytes(cls, bytes_like: BytesLike) -> Self: return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B")) def as_numpy_array(self) -> npt.NDArray[Any]: return cast("npt.NDArray[Any]", cp.asnumpy(self._data)) def combine(self, others: Iterable[core.Buffer]) -> Self: data = [cp.asanyarray(self._data)] for other in others: other_array = other.as_array_like() assert other_array.dtype == np.dtype("B") gpu_other = Buffer(other_array) gpu_other_array = gpu_other.as_array_like() data.append(cp.asanyarray(gpu_other_array)) return self.__class__(cp.concatenate(data)) class NDBuffer(core.NDBuffer): """A n-dimensional memory block on the GPU We use NDBuffer throughout Zarr to represent a n-dimensional memory block. A NDBuffer is backed by an underlying ndarray-like instance that represents the memory. The memory type is unspecified; can be regular host memory, CUDA device memory, or something else. The only requirement is that the ndarray-like instance can be copied/converted to a regular Numpy array (host memory). Notes ----- The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However, in order to use Python's type system to differentiate between the contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the two classes separate. Parameters ---------- array ndarray-like object that is convertible to a regular Numpy array. """ def __init__(self, array: NDArrayLike) -> None: if cp is None: raise ImportError( "Cannot use zarr.buffer.gpu.NDBuffer without cupy. Please install cupy." ) # assert array.ndim > 0 assert array.dtype != object self._data = array if not hasattr(array, "__cuda_array_interface__"): # Slow copy based path for arrays that don't support the __cuda_array_interface__ # TODO: Add a fast zero-copy path for arrays that support the dlpack protocol msg = ( "Creating a zarr.buffer.gpu.NDBuffer with an array that does not support the " "__cuda_array_interface__ for zero-copy transfers, " "falling back to slow copy based path" ) warnings.warn( msg, stacklevel=2, ) self._data = cp.asarray(array) @classmethod def create( cls, *, shape: Iterable[int], dtype: npt.DTypeLike, order: Literal["C", "F"] = "C", fill_value: Any | None = None, ) -> Self: ret = cls(cp.empty(shape=tuple(shape), dtype=dtype, order=order)) if fill_value is not None: ret.fill(fill_value) return ret @classmethod def empty( cls, shape: tuple[int, ...], dtype: npt.DTypeLike, order: Literal["C", "F"] = "C" ) -> Self: return cls(cp.empty(shape=shape, dtype=dtype, order=order)) @classmethod def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self: """Create a new buffer of Numpy array-like object Parameters ---------- array_like Object that can be coerced into a Numpy array Returns ------- New buffer representing `array_like` """ return cls(cp.asarray(array_like)) def as_numpy_array(self) -> npt.NDArray[Any]: """Returns the buffer as a NumPy array (host memory). Warnings -------- Might have to copy data, consider using `.as_ndarray_like()` instead. Returns ------- NumPy array of this buffer (might be a data copy) """ return cast("npt.NDArray[Any]", cp.asnumpy(self._data)) def __getitem__(self, key: Any) -> Self: return self.__class__(self._data.__getitem__(key)) def __setitem__(self, key: Any, value: Any) -> None: if isinstance(value, NDBuffer): value = value._data elif isinstance(value, core.NDBuffer): gpu_value = NDBuffer(value.as_ndarray_like()) value = gpu_value._data self._data.__setitem__(key, value) buffer_prototype = BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer) register_buffer(Buffer, qualname="zarr.buffer.gpu.Buffer") register_ndbuffer(NDBuffer, qualname="zarr.buffer.gpu.NDBuffer") # backwards compatibility register_buffer(Buffer, qualname="zarr.core.buffer.gpu.Buffer") register_ndbuffer(NDBuffer, qualname="zarr.core.buffer.gpu.NDBuffer") zarr-python-3.1.5/src/zarr/core/chunk_grids.py000066400000000000000000000241021511007055700213640ustar00rootroot00000000000000from __future__ import annotations import itertools import math import numbers import operator import warnings from abc import abstractmethod from dataclasses import dataclass from functools import reduce from typing import TYPE_CHECKING, Any, Literal import numpy as np import zarr from zarr.abc.metadata import Metadata from zarr.core.common import ( JSON, NamedConfig, ShapeLike, ceildiv, parse_named_configuration, parse_shapelike, ) from zarr.errors import ZarrUserWarning if TYPE_CHECKING: from collections.abc import Iterator from typing import Self from zarr.core.array import ShardsLike def _guess_chunks( shape: tuple[int, ...] | int, typesize: int, *, increment_bytes: int = 256 * 1024, min_bytes: int = 128 * 1024, max_bytes: int = 64 * 1024 * 1024, ) -> tuple[int, ...]: """ Iteratively guess an appropriate chunk layout for an array, given its shape and the size of each element in bytes, and size constraints expressed in bytes. This logic is adapted from h5py. Parameters ---------- shape : tuple[int, ...] The chunk shape. typesize : int The size, in bytes, of each element of the chunk. increment_bytes : int = 256 * 1024 The number of bytes used to increment or decrement the target chunk size in bytes. min_bytes : int = 128 * 1024 The soft lower bound on the final chunk size in bytes. max_bytes : int = 64 * 1024 * 1024 The hard upper bound on the final chunk size in bytes. Returns ------- tuple[int, ...] """ if isinstance(shape, int): shape = (shape,) if typesize == 0: return shape ndims = len(shape) # require chunks to have non-zero length for all dimensions chunks = np.maximum(np.array(shape, dtype="=f8"), 1) # Determine the optimal chunk size in bytes using a PyTables expression. # This is kept as a float. dset_size = np.prod(chunks) * typesize target_size = increment_bytes * (2 ** np.log10(dset_size / (1024.0 * 1024))) if target_size > max_bytes: target_size = max_bytes elif target_size < min_bytes: target_size = min_bytes idx = 0 while True: # Repeatedly loop over the axes, dividing them by 2. Stop when: # 1a. We're smaller than the target chunk size, OR # 1b. We're within 50% of the target chunk size, AND # 2. The chunk is smaller than the maximum chunk size chunk_bytes = np.prod(chunks) * typesize if ( chunk_bytes < target_size or abs(chunk_bytes - target_size) / target_size < 0.5 ) and chunk_bytes < max_bytes: break if np.prod(chunks) == 1: break # Element size larger than max_bytes chunks[idx % ndims] = math.ceil(chunks[idx % ndims] / 2.0) idx += 1 return tuple(int(x) for x in chunks) def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tuple[int, ...]: """Convenience function to normalize the `chunks` argument for an array with the given `shape`.""" # N.B., expect shape already normalized # handle auto-chunking if chunks is None or chunks is True: return _guess_chunks(shape, typesize) # handle no chunking if chunks is False: return shape # handle 1D convenience form if isinstance(chunks, numbers.Integral): chunks = tuple(int(chunks) for _ in shape) # handle dask-style chunks (iterable of iterables) if all(isinstance(c, (tuple | list)) for c in chunks): # take first chunk size for each dimension chunks = tuple( c[0] for c in chunks ) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1]) # handle bad dimensionality if len(chunks) > len(shape): raise ValueError("too many dimensions in chunks") # handle underspecified chunks if len(chunks) < len(shape): # assume chunks across remaining dimensions chunks += shape[len(chunks) :] # handle None or -1 in chunks if -1 in chunks or None in chunks: chunks = tuple( s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks, strict=False) ) if not all(isinstance(c, numbers.Integral) for c in chunks): raise TypeError("non integer value in chunks") return tuple(int(c) for c in chunks) @dataclass(frozen=True) class ChunkGrid(Metadata): @classmethod def from_dict(cls, data: dict[str, JSON] | ChunkGrid | NamedConfig[str, Any]) -> ChunkGrid: if isinstance(data, ChunkGrid): return data name_parsed, _ = parse_named_configuration(data) if name_parsed == "regular": return RegularChunkGrid._from_dict(data) raise ValueError(f"Unknown chunk grid. Got {name_parsed}.") @abstractmethod def all_chunk_coords(self, array_shape: tuple[int, ...]) -> Iterator[tuple[int, ...]]: pass @abstractmethod def get_nchunks(self, array_shape: tuple[int, ...]) -> int: pass @dataclass(frozen=True) class RegularChunkGrid(ChunkGrid): chunk_shape: tuple[int, ...] def __init__(self, *, chunk_shape: ShapeLike) -> None: chunk_shape_parsed = parse_shapelike(chunk_shape) object.__setattr__(self, "chunk_shape", chunk_shape_parsed) @classmethod def _from_dict(cls, data: dict[str, JSON] | NamedConfig[str, Any]) -> Self: _, configuration_parsed = parse_named_configuration(data, "regular") return cls(**configuration_parsed) # type: ignore[arg-type] def to_dict(self) -> dict[str, JSON]: return {"name": "regular", "configuration": {"chunk_shape": tuple(self.chunk_shape)}} def all_chunk_coords(self, array_shape: tuple[int, ...]) -> Iterator[tuple[int, ...]]: return itertools.product( *(range(ceildiv(s, c)) for s, c in zip(array_shape, self.chunk_shape, strict=False)) ) def get_nchunks(self, array_shape: tuple[int, ...]) -> int: return reduce( operator.mul, itertools.starmap(ceildiv, zip(array_shape, self.chunk_shape, strict=True)), 1, ) def _guess_num_chunks_per_axis_shard( chunk_shape: tuple[int, ...], item_size: int, max_bytes: int, array_shape: tuple[int, ...] ) -> int: """Generate the number of chunks per axis to hit a target max byte size for a shard. For example, for a (2,2,2) chunk size and item size 4, maximum bytes of 256 would return 2. In other words the shard would be a (2,2,2) grid of (2,2,2) chunks i.e., prod(chunk_shape) * (returned_val * len(chunk_shape)) * item_size = 256 bytes. Parameters ---------- chunk_shape The shape of the (inner) chunks. item_size The item size of the data i.e., 2 for uint16. max_bytes The maximum number of bytes per shard to allow. array_shape The shape of the underlying array. Returns ------- The number of chunks per axis. """ bytes_per_chunk = np.prod(chunk_shape) * item_size if max_bytes < bytes_per_chunk: return 1 num_axes = len(chunk_shape) chunks_per_shard = 1 # First check for byte size, second check to make sure we don't go bigger than the array shape while (bytes_per_chunk * ((chunks_per_shard + 1) ** num_axes)) <= max_bytes and all( c * (chunks_per_shard + 1) <= a for c, a in zip(chunk_shape, array_shape, strict=True) ): chunks_per_shard += 1 return chunks_per_shard def _auto_partition( *, array_shape: tuple[int, ...], chunk_shape: tuple[int, ...] | Literal["auto"], shard_shape: ShardsLike | None, item_size: int, ) -> tuple[tuple[int, ...] | None, tuple[int, ...]]: """ Automatically determine the shard shape and chunk shape for an array, given the shape and dtype of the array. If `shard_shape` is `None` and the chunk_shape is "auto", the chunks will be set heuristically based on the dtype and shape of the array. If `shard_shape` is "auto", then the shard shape will be set heuristically from the dtype and shape of the array; if the `chunk_shape` is also "auto", then the chunks will be set heuristically as well, given the dtype and shard shape. Otherwise, the chunks will be returned as-is. """ if shard_shape is None: _shards_out: None | tuple[int, ...] = None if chunk_shape == "auto": _chunks_out = _guess_chunks(array_shape, item_size) else: _chunks_out = chunk_shape else: if chunk_shape == "auto": # aim for a 1MiB chunk _chunks_out = _guess_chunks(array_shape, item_size, max_bytes=1024) else: _chunks_out = chunk_shape if shard_shape == "auto": warnings.warn( "Automatic shard shape inference is experimental and may change without notice.", ZarrUserWarning, stacklevel=2, ) _shards_out = () target_shard_size_bytes = zarr.config.get("array.target_shard_size_bytes", None) num_chunks_per_shard_axis = ( _guess_num_chunks_per_axis_shard( chunk_shape=_chunks_out, item_size=item_size, max_bytes=target_shard_size_bytes, array_shape=array_shape, ) if (has_auto_shard := (target_shard_size_bytes is not None)) else 2 ) for a_shape, c_shape in zip(array_shape, _chunks_out, strict=True): # The previous heuristic was `a_shape // c_shape > 8` and now, with target_shard_size_bytes, we only check that the shard size is less than the array size. can_shard_axis = a_shape // c_shape > 8 if not has_auto_shard else True if can_shard_axis: _shards_out += (c_shape * num_chunks_per_shard_axis,) else: _shards_out += (c_shape,) elif isinstance(shard_shape, dict): _shards_out = tuple(shard_shape["shape"]) else: _shards_out = shard_shape return _shards_out, _chunks_out zarr-python-3.1.5/src/zarr/core/chunk_key_encodings.py000066400000000000000000000104561511007055700231040ustar00rootroot00000000000000from __future__ import annotations from abc import ABC, abstractmethod from dataclasses import dataclass from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypeAlias, TypedDict, cast if TYPE_CHECKING: from typing import NotRequired, Self from zarr.abc.metadata import Metadata from zarr.core.common import ( JSON, NamedConfig, parse_named_configuration, ) from zarr.registry import get_chunk_key_encoding_class, register_chunk_key_encoding SeparatorLiteral = Literal[".", "/"] def parse_separator(data: JSON) -> SeparatorLiteral: if data not in (".", "/"): raise ValueError(f"Expected an '.' or '/' separator. Got {data} instead.") return cast("SeparatorLiteral", data) class ChunkKeyEncodingParams(TypedDict): name: Literal["v2", "default"] separator: NotRequired[SeparatorLiteral] @dataclass(frozen=True) class ChunkKeyEncoding(ABC, Metadata): """ Defines how chunk coordinates are mapped to store keys. Subclasses must define a class variable `name` and implement `encode_chunk_key`. """ name: ClassVar[str] @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: _, config_parsed = parse_named_configuration(data, require_configuration=False) return cls(**config_parsed if config_parsed else {}) def to_dict(self) -> dict[str, JSON]: return {"name": self.name, "configuration": super().to_dict()} def decode_chunk_key(self, chunk_key: str) -> tuple[int, ...]: """ Optional: decode a chunk key string into chunk coordinates. Not required for normal operation; override if needed for testing or debugging. """ raise NotImplementedError(f"{self.__class__.__name__} does not implement decode_chunk_key.") @abstractmethod def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str: """ Encode chunk coordinates into a chunk key string. Must be implemented by subclasses. """ ChunkKeyEncodingLike: TypeAlias = ( dict[str, JSON] | ChunkKeyEncodingParams | ChunkKeyEncoding | NamedConfig[str, Any] ) @dataclass(frozen=True) class DefaultChunkKeyEncoding(ChunkKeyEncoding): name: ClassVar[Literal["default"]] = "default" separator: SeparatorLiteral = "/" def __post_init__(self) -> None: separator_parsed = parse_separator(self.separator) object.__setattr__(self, "separator", separator_parsed) def decode_chunk_key(self, chunk_key: str) -> tuple[int, ...]: if chunk_key == "c": return () return tuple(map(int, chunk_key[1:].split(self.separator))) def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str: return self.separator.join(map(str, ("c",) + chunk_coords)) @dataclass(frozen=True) class V2ChunkKeyEncoding(ChunkKeyEncoding): name: ClassVar[Literal["v2"]] = "v2" separator: SeparatorLiteral = "." def __post_init__(self) -> None: separator_parsed = parse_separator(self.separator) object.__setattr__(self, "separator", separator_parsed) def decode_chunk_key(self, chunk_key: str) -> tuple[int, ...]: return tuple(map(int, chunk_key.split(self.separator))) def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str: chunk_identifier = self.separator.join(map(str, chunk_coords)) return "0" if chunk_identifier == "" else chunk_identifier def parse_chunk_key_encoding(data: ChunkKeyEncodingLike) -> ChunkKeyEncoding: """ Take an implicit specification of a chunk key encoding and parse it into a ChunkKeyEncoding object. """ if isinstance(data, ChunkKeyEncoding): return data # handle ChunkKeyEncodingParams if "name" in data and "separator" in data: data = {"name": data["name"], "configuration": {"separator": data["separator"]}} # type: ignore[typeddict-item] # Now must be a named config data = cast("dict[str, JSON]", data) name_parsed, _ = parse_named_configuration(data, require_configuration=False) try: chunk_key_encoding = get_chunk_key_encoding_class(name_parsed).from_dict(data) except KeyError as e: raise ValueError(f"Unknown chunk key encoding: {e.args[0]!r}") from e return chunk_key_encoding register_chunk_key_encoding("default", DefaultChunkKeyEncoding) register_chunk_key_encoding("v2", V2ChunkKeyEncoding) zarr-python-3.1.5/src/zarr/core/codec_pipeline.py000066400000000000000000000526231511007055700220370ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass from itertools import islice, pairwise from typing import TYPE_CHECKING, Any, TypeVar from warnings import warn from zarr.abc.codec import ( ArrayArrayCodec, ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin, BytesBytesCodec, Codec, CodecPipeline, ) from zarr.core.common import concurrent_map from zarr.core.config import config from zarr.core.indexing import SelectorTuple, is_scalar from zarr.errors import ZarrUserWarning from zarr.registry import register_pipeline if TYPE_CHECKING: from collections.abc import Iterable, Iterator from typing import Self from zarr.abc.store import ByteGetter, ByteSetter from zarr.core.array_spec import ArraySpec from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer from zarr.core.chunk_grids import ChunkGrid from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType T = TypeVar("T") U = TypeVar("U") def _unzip2(iterable: Iterable[tuple[T, U]]) -> tuple[list[T], list[U]]: out0: list[T] = [] out1: list[U] = [] for item0, item1 in iterable: out0.append(item0) out1.append(item1) return (out0, out1) def batched(iterable: Iterable[T], n: int) -> Iterable[tuple[T, ...]]: if n < 1: raise ValueError("n must be at least one") it = iter(iterable) while batch := tuple(islice(it, n)): yield batch def resolve_batched(codec: Codec, chunk_specs: Iterable[ArraySpec]) -> Iterable[ArraySpec]: return [codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs] def fill_value_or_default(chunk_spec: ArraySpec) -> Any: fill_value = chunk_spec.fill_value if fill_value is None: # Zarr V2 allowed `fill_value` to be null in the metadata. # Zarr V3 requires it to be set. This has already been # validated when decoding the metadata, but we support reading # Zarr V2 data and need to support the case where fill_value # is None. return chunk_spec.dtype.default_scalar() else: return fill_value @dataclass(frozen=True) class BatchedCodecPipeline(CodecPipeline): """Default codec pipeline. This batched codec pipeline divides the chunk batches into batches of a configurable batch size ("mini-batch"). Fetching, decoding, encoding and storing are performed in lock step for each mini-batch. Multiple mini-batches are processing concurrently. """ array_array_codecs: tuple[ArrayArrayCodec, ...] array_bytes_codec: ArrayBytesCodec bytes_bytes_codecs: tuple[BytesBytesCodec, ...] batch_size: int def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: return type(self).from_codecs(c.evolve_from_array_spec(array_spec=array_spec) for c in self) @classmethod def from_codecs(cls, codecs: Iterable[Codec], *, batch_size: int | None = None) -> Self: array_array_codecs, array_bytes_codec, bytes_bytes_codecs = codecs_from_list(codecs) return cls( array_array_codecs=array_array_codecs, array_bytes_codec=array_bytes_codec, bytes_bytes_codecs=bytes_bytes_codecs, batch_size=batch_size or config.get("codec_pipeline.batch_size"), ) @property def supports_partial_decode(self) -> bool: """Determines whether the codec pipeline supports partial decoding. Currently, only codec pipelines with a single ArrayBytesCodec that supports partial decoding can support partial decoding. This limitation is due to the fact that ArrayArrayCodecs can change the slice selection leading to non-contiguous slices and BytesBytesCodecs can change the chunk bytes in a way that slice selections cannot be attributed to byte ranges anymore which renders partial decoding infeasible. This limitation may softened in the future.""" return (len(self.array_array_codecs) + len(self.bytes_bytes_codecs)) == 0 and isinstance( self.array_bytes_codec, ArrayBytesCodecPartialDecodeMixin ) @property def supports_partial_encode(self) -> bool: """Determines whether the codec pipeline supports partial encoding. Currently, only codec pipelines with a single ArrayBytesCodec that supports partial encoding can support partial encoding. This limitation is due to the fact that ArrayArrayCodecs can change the slice selection leading to non-contiguous slices and BytesBytesCodecs can change the chunk bytes in a way that slice selections cannot be attributed to byte ranges anymore which renders partial encoding infeasible. This limitation may softened in the future.""" return (len(self.array_array_codecs) + len(self.bytes_bytes_codecs)) == 0 and isinstance( self.array_bytes_codec, ArrayBytesCodecPartialEncodeMixin ) def __iter__(self) -> Iterator[Codec]: yield from self.array_array_codecs yield self.array_bytes_codec yield from self.bytes_bytes_codecs def validate( self, *, shape: tuple[int, ...], dtype: ZDType[TBaseDType, TBaseScalar], chunk_grid: ChunkGrid, ) -> None: for codec in self: codec.validate(shape=shape, dtype=dtype, chunk_grid=chunk_grid) def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: for codec in self: byte_length = codec.compute_encoded_size(byte_length, array_spec) array_spec = codec.resolve_metadata(array_spec) return byte_length def _codecs_with_resolved_metadata_batched( self, chunk_specs: Iterable[ArraySpec] ) -> tuple[ list[tuple[ArrayArrayCodec, list[ArraySpec]]], tuple[ArrayBytesCodec, list[ArraySpec]], list[tuple[BytesBytesCodec, list[ArraySpec]]], ]: aa_codecs_with_spec: list[tuple[ArrayArrayCodec, list[ArraySpec]]] = [] chunk_specs = list(chunk_specs) for aa_codec in self.array_array_codecs: aa_codecs_with_spec.append((aa_codec, chunk_specs)) chunk_specs = [aa_codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs] ab_codec_with_spec = (self.array_bytes_codec, chunk_specs) chunk_specs = [ self.array_bytes_codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs ] bb_codecs_with_spec: list[tuple[BytesBytesCodec, list[ArraySpec]]] = [] for bb_codec in self.bytes_bytes_codecs: bb_codecs_with_spec.append((bb_codec, chunk_specs)) chunk_specs = [bb_codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs] return (aa_codecs_with_spec, ab_codec_with_spec, bb_codecs_with_spec) async def decode_batch( self, chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], ) -> Iterable[NDBuffer | None]: chunk_bytes_batch: Iterable[Buffer | None] chunk_bytes_batch, chunk_specs = _unzip2(chunk_bytes_and_specs) ( aa_codecs_with_spec, ab_codec_with_spec, bb_codecs_with_spec, ) = self._codecs_with_resolved_metadata_batched(chunk_specs) for bb_codec, chunk_spec_batch in bb_codecs_with_spec[::-1]: chunk_bytes_batch = await bb_codec.decode( zip(chunk_bytes_batch, chunk_spec_batch, strict=False) ) ab_codec, chunk_spec_batch = ab_codec_with_spec chunk_array_batch = await ab_codec.decode( zip(chunk_bytes_batch, chunk_spec_batch, strict=False) ) for aa_codec, chunk_spec_batch in aa_codecs_with_spec[::-1]: chunk_array_batch = await aa_codec.decode( zip(chunk_array_batch, chunk_spec_batch, strict=False) ) return chunk_array_batch async def decode_partial_batch( self, batch_info: Iterable[tuple[ByteGetter, SelectorTuple, ArraySpec]], ) -> Iterable[NDBuffer | None]: assert self.supports_partial_decode assert isinstance(self.array_bytes_codec, ArrayBytesCodecPartialDecodeMixin) return await self.array_bytes_codec.decode_partial(batch_info) async def encode_batch( self, chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]], ) -> Iterable[Buffer | None]: chunk_array_batch: Iterable[NDBuffer | None] chunk_specs: Iterable[ArraySpec] chunk_array_batch, chunk_specs = _unzip2(chunk_arrays_and_specs) for aa_codec in self.array_array_codecs: chunk_array_batch = await aa_codec.encode( zip(chunk_array_batch, chunk_specs, strict=False) ) chunk_specs = resolve_batched(aa_codec, chunk_specs) chunk_bytes_batch = await self.array_bytes_codec.encode( zip(chunk_array_batch, chunk_specs, strict=False) ) chunk_specs = resolve_batched(self.array_bytes_codec, chunk_specs) for bb_codec in self.bytes_bytes_codecs: chunk_bytes_batch = await bb_codec.encode( zip(chunk_bytes_batch, chunk_specs, strict=False) ) chunk_specs = resolve_batched(bb_codec, chunk_specs) return chunk_bytes_batch async def encode_partial_batch( self, batch_info: Iterable[tuple[ByteSetter, NDBuffer, SelectorTuple, ArraySpec]], ) -> None: assert self.supports_partial_encode assert isinstance(self.array_bytes_codec, ArrayBytesCodecPartialEncodeMixin) await self.array_bytes_codec.encode_partial(batch_info) async def read_batch( self, batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], out: NDBuffer, drop_axes: tuple[int, ...] = (), ) -> None: if self.supports_partial_decode: chunk_array_batch = await self.decode_partial_batch( [ (byte_getter, chunk_selection, chunk_spec) for byte_getter, chunk_spec, chunk_selection, *_ in batch_info ] ) for chunk_array, (_, chunk_spec, _, out_selection, _) in zip( chunk_array_batch, batch_info, strict=False ): if chunk_array is not None: out[out_selection] = chunk_array else: out[out_selection] = fill_value_or_default(chunk_spec) else: chunk_bytes_batch = await concurrent_map( [(byte_getter, array_spec.prototype) for byte_getter, array_spec, *_ in batch_info], lambda byte_getter, prototype: byte_getter.get(prototype), config.get("async.concurrency"), ) chunk_array_batch = await self.decode_batch( [ (chunk_bytes, chunk_spec) for chunk_bytes, (_, chunk_spec, *_) in zip( chunk_bytes_batch, batch_info, strict=False ) ], ) for chunk_array, (_, chunk_spec, chunk_selection, out_selection, _) in zip( chunk_array_batch, batch_info, strict=False ): if chunk_array is not None: tmp = chunk_array[chunk_selection] if drop_axes != (): tmp = tmp.squeeze(axis=drop_axes) out[out_selection] = tmp else: out[out_selection] = fill_value_or_default(chunk_spec) def _merge_chunk_array( self, existing_chunk_array: NDBuffer | None, value: NDBuffer, out_selection: SelectorTuple, chunk_spec: ArraySpec, chunk_selection: SelectorTuple, is_complete_chunk: bool, drop_axes: tuple[int, ...], ) -> NDBuffer: if ( is_complete_chunk and value.shape == chunk_spec.shape # Guard that this is not a partial chunk at the end with is_complete_chunk=True and value[out_selection].shape == chunk_spec.shape ): return value if existing_chunk_array is None: chunk_array = chunk_spec.prototype.nd_buffer.create( shape=chunk_spec.shape, dtype=chunk_spec.dtype.to_native_dtype(), order=chunk_spec.order, fill_value=fill_value_or_default(chunk_spec), ) else: chunk_array = existing_chunk_array.copy() # make a writable copy if chunk_selection == () or is_scalar( value.as_ndarray_like(), chunk_spec.dtype.to_native_dtype() ): chunk_value = value else: chunk_value = value[out_selection] # handle missing singleton dimensions if drop_axes != (): item = tuple( None # equivalent to np.newaxis if idx in drop_axes else slice(None) for idx in range(chunk_spec.ndim) ) chunk_value = chunk_value[item] chunk_array[chunk_selection] = chunk_value return chunk_array async def write_batch( self, batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], value: NDBuffer, drop_axes: tuple[int, ...] = (), ) -> None: if self.supports_partial_encode: # Pass scalar values as is if len(value.shape) == 0: await self.encode_partial_batch( [ (byte_setter, value, chunk_selection, chunk_spec) for byte_setter, chunk_spec, chunk_selection, out_selection, _ in batch_info ], ) else: await self.encode_partial_batch( [ (byte_setter, value[out_selection], chunk_selection, chunk_spec) for byte_setter, chunk_spec, chunk_selection, out_selection, _ in batch_info ], ) else: # Read existing bytes if not total slice async def _read_key( byte_setter: ByteSetter | None, prototype: BufferPrototype ) -> Buffer | None: if byte_setter is None: return None return await byte_setter.get(prototype=prototype) chunk_bytes_batch: Iterable[Buffer | None] chunk_bytes_batch = await concurrent_map( [ ( None if is_complete_chunk else byte_setter, chunk_spec.prototype, ) for byte_setter, chunk_spec, chunk_selection, _, is_complete_chunk in batch_info ], _read_key, config.get("async.concurrency"), ) chunk_array_decoded = await self.decode_batch( [ (chunk_bytes, chunk_spec) for chunk_bytes, (_, chunk_spec, *_) in zip( chunk_bytes_batch, batch_info, strict=False ) ], ) chunk_array_merged = [ self._merge_chunk_array( chunk_array, value, out_selection, chunk_spec, chunk_selection, is_complete_chunk, drop_axes, ) for chunk_array, ( _, chunk_spec, chunk_selection, out_selection, is_complete_chunk, ) in zip(chunk_array_decoded, batch_info, strict=False) ] chunk_array_batch: list[NDBuffer | None] = [] for chunk_array, (_, chunk_spec, *_) in zip( chunk_array_merged, batch_info, strict=False ): if chunk_array is None: chunk_array_batch.append(None) # type: ignore[unreachable] else: if not chunk_spec.config.write_empty_chunks and chunk_array.all_equal( fill_value_or_default(chunk_spec) ): chunk_array_batch.append(None) else: chunk_array_batch.append(chunk_array) chunk_bytes_batch = await self.encode_batch( [ (chunk_array, chunk_spec) for chunk_array, (_, chunk_spec, *_) in zip( chunk_array_batch, batch_info, strict=False ) ], ) async def _write_key(byte_setter: ByteSetter, chunk_bytes: Buffer | None) -> None: if chunk_bytes is None: await byte_setter.delete() else: await byte_setter.set(chunk_bytes) await concurrent_map( [ (byte_setter, chunk_bytes) for chunk_bytes, (byte_setter, *_) in zip( chunk_bytes_batch, batch_info, strict=False ) ], _write_key, config.get("async.concurrency"), ) async def decode( self, chunk_bytes_and_specs: Iterable[tuple[Buffer | None, ArraySpec]], ) -> Iterable[NDBuffer | None]: output: list[NDBuffer | None] = [] for batch_info in batched(chunk_bytes_and_specs, self.batch_size): output.extend(await self.decode_batch(batch_info)) return output async def encode( self, chunk_arrays_and_specs: Iterable[tuple[NDBuffer | None, ArraySpec]], ) -> Iterable[Buffer | None]: output: list[Buffer | None] = [] for single_batch_info in batched(chunk_arrays_and_specs, self.batch_size): output.extend(await self.encode_batch(single_batch_info)) return output async def read( self, batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], out: NDBuffer, drop_axes: tuple[int, ...] = (), ) -> None: await concurrent_map( [ (single_batch_info, out, drop_axes) for single_batch_info in batched(batch_info, self.batch_size) ], self.read_batch, config.get("async.concurrency"), ) async def write( self, batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], value: NDBuffer, drop_axes: tuple[int, ...] = (), ) -> None: await concurrent_map( [ (single_batch_info, value, drop_axes) for single_batch_info in batched(batch_info, self.batch_size) ], self.write_batch, config.get("async.concurrency"), ) def codecs_from_list( codecs: Iterable[Codec], ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]: from zarr.codecs.sharding import ShardingCodec array_array: tuple[ArrayArrayCodec, ...] = () array_bytes_maybe: ArrayBytesCodec | None = None bytes_bytes: tuple[BytesBytesCodec, ...] = () if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(tuple(codecs)) > 1: warn( "Combining a `sharding_indexed` codec disables partial reads and " "writes, which may lead to inefficient performance.", category=ZarrUserWarning, stacklevel=3, ) for prev_codec, cur_codec in pairwise((None, *codecs)): if isinstance(cur_codec, ArrayArrayCodec): if isinstance(prev_codec, ArrayBytesCodec | BytesBytesCodec): msg = ( f"Invalid codec order. ArrayArrayCodec {cur_codec}" "must be preceded by another ArrayArrayCodec. " f"Got {type(prev_codec)} instead." ) raise TypeError(msg) array_array += (cur_codec,) elif isinstance(cur_codec, ArrayBytesCodec): if isinstance(prev_codec, BytesBytesCodec): msg = ( f"Invalid codec order. ArrayBytes codec {cur_codec}" f" must be preceded by an ArrayArrayCodec. Got {type(prev_codec)} instead." ) raise TypeError(msg) if array_bytes_maybe is not None: msg = ( f"Got two instances of ArrayBytesCodec: {array_bytes_maybe} and {cur_codec}. " "Only one array-to-bytes codec is allowed." ) raise ValueError(msg) array_bytes_maybe = cur_codec elif isinstance(cur_codec, BytesBytesCodec): if isinstance(prev_codec, ArrayArrayCodec): msg = ( f"Invalid codec order. BytesBytesCodec {cur_codec}" "must be preceded by either another BytesBytesCodec, or an ArrayBytesCodec. " f"Got {type(prev_codec)} instead." ) bytes_bytes += (cur_codec,) else: raise TypeError if array_bytes_maybe is None: raise ValueError("Required ArrayBytesCodec was not found.") else: return array_array, array_bytes_maybe, bytes_bytes register_pipeline(BatchedCodecPipeline) zarr-python-3.1.5/src/zarr/core/common.py000066400000000000000000000166621511007055700203700ustar00rootroot00000000000000from __future__ import annotations import asyncio import functools import math import operator import warnings from collections.abc import Iterable, Mapping, Sequence from enum import Enum from itertools import starmap from typing import ( TYPE_CHECKING, Any, Final, Generic, Literal, NotRequired, TypedDict, TypeVar, cast, overload, ) from typing_extensions import ReadOnly from zarr.core.config import config as zarr_config from zarr.errors import ZarrRuntimeWarning if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Iterator ZARR_JSON = "zarr.json" ZARRAY_JSON = ".zarray" ZGROUP_JSON = ".zgroup" ZATTRS_JSON = ".zattrs" ZMETADATA_V2_JSON = ".zmetadata" BytesLike = bytes | bytearray | memoryview ShapeLike = Iterable[int] | int # For backwards compatibility ChunkCoords = tuple[int, ...] ZarrFormat = Literal[2, 3] NodeType = Literal["array", "group"] JSON = str | int | float | Mapping[str, "JSON"] | Sequence["JSON"] | None MemoryOrder = Literal["C", "F"] AccessModeLiteral = Literal["r", "r+", "a", "w", "w-"] ANY_ACCESS_MODE: Final = "r", "r+", "a", "w", "w-" DimensionNames = Iterable[str | None] | None TName = TypeVar("TName", bound=str) TConfig = TypeVar("TConfig", bound=Mapping[str, object]) class NamedConfig(TypedDict, Generic[TName, TConfig]): """ A typed dictionary representing an object with a name and configuration, where the configuration is an optional mapping of string keys to values, e.g. another typed dictionary or a JSON object. This class is generic with two type parameters: the type of the name (``TName``) and the type of the configuration (``TConfig``). """ name: ReadOnly[TName] """The name of the object.""" configuration: NotRequired[ReadOnly[TConfig]] """The configuration of the object. Not required.""" class NamedRequiredConfig(TypedDict, Generic[TName, TConfig]): """ A typed dictionary representing an object with a name and configuration, where the configuration is a mapping of string keys to values, e.g. another typed dictionary or a JSON object. This class is generic with two type parameters: the type of the name (``TName``) and the type of the configuration (``TConfig``). """ name: ReadOnly[TName] """The name of the object.""" configuration: ReadOnly[TConfig] """The configuration of the object.""" def product(tup: tuple[int, ...]) -> int: return functools.reduce(operator.mul, tup, 1) def ceildiv(a: float, b: float) -> int: if a == 0: return 0 return math.ceil(a / b) T = TypeVar("T", bound=tuple[Any, ...]) V = TypeVar("V") async def concurrent_map( items: Iterable[T], func: Callable[..., Awaitable[V]], limit: int | None = None, ) -> list[V]: if limit is None: return await asyncio.gather(*list(starmap(func, items))) else: sem = asyncio.Semaphore(limit) async def run(item: tuple[Any]) -> V: async with sem: return await func(*item) return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items]) E = TypeVar("E", bound=Enum) def enum_names(enum: type[E]) -> Iterator[str]: for item in enum: yield item.name def parse_enum(data: object, cls: type[E]) -> E: if isinstance(data, cls): return data if not isinstance(data, str): raise TypeError(f"Expected str, got {type(data)}") if data in enum_names(cls): return cls(data) raise ValueError(f"Value must be one of {list(enum_names(cls))!r}. Got {data} instead.") def parse_name(data: JSON, expected: str | None = None) -> str: if isinstance(data, str): if expected is None or data == expected: return data raise ValueError(f"Expected '{expected}'. Got {data} instead.") else: raise TypeError(f"Expected a string, got an instance of {type(data)}.") def parse_configuration(data: JSON) -> JSON: if not isinstance(data, dict): raise TypeError(f"Expected dict, got {type(data)}") return data @overload def parse_named_configuration( data: JSON | NamedConfig[str, Any], expected_name: str | None = None ) -> tuple[str, dict[str, JSON]]: ... @overload def parse_named_configuration( data: JSON | NamedConfig[str, Any], expected_name: str | None = None, *, require_configuration: bool = True, ) -> tuple[str, dict[str, JSON] | None]: ... def parse_named_configuration( data: JSON | NamedConfig[str, Any], expected_name: str | None = None, *, require_configuration: bool = True, ) -> tuple[str, JSON | None]: if not isinstance(data, dict): raise TypeError(f"Expected dict, got {type(data)}") if "name" not in data: raise ValueError(f"Named configuration does not have a 'name' key. Got {data}.") name_parsed = parse_name(data["name"], expected_name) if "configuration" in data: configuration_parsed = parse_configuration(data["configuration"]) elif require_configuration: raise ValueError(f"Named configuration does not have a 'configuration' key. Got {data}.") else: configuration_parsed = None return name_parsed, configuration_parsed def parse_shapelike(data: ShapeLike) -> tuple[int, ...]: if isinstance(data, int): if data < 0: raise ValueError(f"Expected a non-negative integer. Got {data} instead") return (data,) try: data_tuple = tuple(data) except TypeError as e: msg = f"Expected an integer or an iterable of integers. Got {data} instead." raise TypeError(msg) from e if not all(isinstance(v, int) for v in data_tuple): msg = f"Expected an iterable of integers. Got {data} instead." raise TypeError(msg) if not all(v > -1 for v in data_tuple): msg = f"Expected all values to be non-negative. Got {data} instead." raise ValueError(msg) return data_tuple def parse_fill_value(data: Any) -> Any: # todo: real validation return data def parse_order(data: Any) -> Literal["C", "F"]: if data in ("C", "F"): return cast("Literal['C', 'F']", data) raise ValueError(f"Expected one of ('C', 'F'), got {data} instead.") def parse_bool(data: Any) -> bool: if isinstance(data, bool): return data raise ValueError(f"Expected bool, got {data} instead.") def _warn_write_empty_chunks_kwarg() -> None: # TODO: link to docs page on array configuration in this message msg = ( "The `write_empty_chunks` keyword argument is deprecated and will be removed in future versions. " "To control whether empty chunks are written to storage, either use the `config` keyword " "argument, as in `config={'write_empty_chunks': True}`," "or change the global 'array.write_empty_chunks' configuration variable." ) warnings.warn(msg, ZarrRuntimeWarning, stacklevel=2) def _warn_order_kwarg() -> None: # TODO: link to docs page on array configuration in this message msg = ( "The `order` keyword argument has no effect for Zarr format 3 arrays. " "To control the memory layout of the array, either use the `config` keyword " "argument, as in `config={'order': 'C'}`," "or change the global 'array.order' configuration variable." ) warnings.warn(msg, ZarrRuntimeWarning, stacklevel=2) def _default_zarr_format() -> ZarrFormat: """Return the default zarr_version""" return cast("ZarrFormat", int(zarr_config.get("default_zarr_format", 3))) zarr-python-3.1.5/src/zarr/core/config.py000066400000000000000000000142131511007055700203330ustar00rootroot00000000000000""" The config module is responsible for managing the configuration of zarr and is based on the Donfig python library. For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations in the registry and then select them in the config. Example: An implementation of the bytes codec in a class ``your.module.NewBytesCodec`` requires the value of ``codecs.bytes`` to be ``your.module.NewBytesCodec``. Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations. ```python from your.module import NewBytesCodec from zarr.core.config import register_codec, config register_codec("bytes", NewBytesCodec) config.set({"codecs.bytes": "your.module.NewBytesCodec"}) ``` Instead of setting the value programmatically with ``config.set``, you can also set the value with an environment variable. The environment variable ``ZARR_CODECS__BYTES`` can be set to ``your.module.NewBytesCodec``. The double underscore ``__`` is used to indicate nested access. ```bash export ZARR_CODECS__BYTES="your.module.NewBytesCodec" ``` For more information, see the Donfig documentation at https://github.com/pytroll/donfig. """ from __future__ import annotations from typing import TYPE_CHECKING, Any, Literal, cast from donfig import Config as DConfig if TYPE_CHECKING: from donfig.config_obj import ConfigSet class BadConfigError(ValueError): _msg = "bad Config: %r" class Config(DConfig): # type: ignore[misc] """The Config will collect configuration from config files and environment variables Example environment variables: Grabs environment variables of the form "ZARR_FOO__BAR_BAZ=123" and turns these into config variables of the form ``{"foo": {"bar-baz": 123}}`` It transforms the key and value in the following way: - Lower-cases the key text - Treats ``__`` (double-underscore) as nested access - Calls ``ast.literal_eval`` on the value """ def reset(self) -> None: self.clear() self.refresh() def enable_gpu(self) -> ConfigSet: """ Configure Zarr to use GPUs where possible. """ return self.set( {"buffer": "zarr.buffer.gpu.Buffer", "ndbuffer": "zarr.buffer.gpu.NDBuffer"} ) # these keys were removed from the config as part of the 3.1.0 release. # these deprecations should be removed in 3.1.1 or thereabouts. deprecations = { "array.v2_default_compressor.numeric": None, "array.v2_default_compressor.string": None, "array.v2_default_compressor.bytes": None, "array.v2_default_filters.string": None, "array.v2_default_filters.bytes": None, "array.v3_default_filters.numeric": None, "array.v3_default_filters.raw": None, "array.v3_default_filters.bytes": None, "array.v3_default_serializer.numeric": None, "array.v3_default_serializer.string": None, "array.v3_default_serializer.bytes": None, "array.v3_default_compressors.string": None, "array.v3_default_compressors.bytes": None, "array.v3_default_compressors": None, } # The default configuration for zarr config = Config( "zarr", defaults=[ { "default_zarr_format": 3, "array": { "order": "C", "write_empty_chunks": False, "target_shard_size_bytes": None, }, "async": {"concurrency": 10, "timeout": None}, "threading": {"max_workers": None}, "json_indent": 2, "codec_pipeline": { "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", "batch_size": 1, }, "codecs": { "blosc": "zarr.codecs.blosc.BloscCodec", "gzip": "zarr.codecs.gzip.GzipCodec", "zstd": "zarr.codecs.zstd.ZstdCodec", "bytes": "zarr.codecs.bytes.BytesCodec", "endian": "zarr.codecs.bytes.BytesCodec", # compatibility with earlier versions of ZEP1 "crc32c": "zarr.codecs.crc32c_.Crc32cCodec", "sharding_indexed": "zarr.codecs.sharding.ShardingCodec", "transpose": "zarr.codecs.transpose.TransposeCodec", "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec", "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec", "numcodecs.bz2": "zarr.codecs.numcodecs.BZ2", "numcodecs.crc32": "zarr.codecs.numcodecs.CRC32", "numcodecs.crc32c": "zarr.codecs.numcodecs.CRC32C", "numcodecs.lz4": "zarr.codecs.numcodecs.LZ4", "numcodecs.lzma": "zarr.codecs.numcodecs.LZMA", "numcodecs.zfpy": "zarr.codecs.numcodecs.ZFPY", "numcodecs.adler32": "zarr.codecs.numcodecs.Adler32", "numcodecs.astype": "zarr.codecs.numcodecs.AsType", "numcodecs.bitround": "zarr.codecs.numcodecs.BitRound", "numcodecs.blosc": "zarr.codecs.numcodecs.Blosc", "numcodecs.delta": "zarr.codecs.numcodecs.Delta", "numcodecs.fixedscaleoffset": "zarr.codecs.numcodecs.FixedScaleOffset", "numcodecs.fletcher32": "zarr.codecs.numcodecs.Fletcher32", "numcodecs.gzip": "zarr.codecs.numcodecs.GZip", "numcodecs.jenkins_lookup3": "zarr.codecs.numcodecs.JenkinsLookup3", "numcodecs.pcodec": "zarr.codecs.numcodecs.PCodec", "numcodecs.packbits": "zarr.codecs.numcodecs.PackBits", "numcodecs.shuffle": "zarr.codecs.numcodecs.Shuffle", "numcodecs.quantize": "zarr.codecs.numcodecs.Quantize", "numcodecs.zlib": "zarr.codecs.numcodecs.Zlib", "numcodecs.zstd": "zarr.codecs.numcodecs.Zstd", }, "buffer": "zarr.buffer.cpu.Buffer", "ndbuffer": "zarr.buffer.cpu.NDBuffer", } ], deprecations=deprecations, ) def parse_indexing_order(data: Any) -> Literal["C", "F"]: if data in ("C", "F"): return cast("Literal['C', 'F']", data) msg = f"Expected one of ('C', 'F'), got {data} instead." raise ValueError(msg) zarr-python-3.1.5/src/zarr/core/dtype/000077500000000000000000000000001511007055700176405ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/core/dtype/__init__.py000066400000000000000000000216521511007055700217570ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Sequence from typing import TYPE_CHECKING, Final, TypeAlias from zarr.core.dtype.common import ( DataTypeValidationError, DTypeJSON, ) from zarr.core.dtype.npy.bool import Bool from zarr.core.dtype.npy.bytes import ( NullTerminatedBytes, NullterminatedBytesJSON_V2, NullTerminatedBytesJSON_V3, RawBytes, RawBytesJSON_V2, RawBytesJSON_V3, VariableLengthBytes, VariableLengthBytesJSON_V2, ) from zarr.core.dtype.npy.complex import Complex64, Complex128 from zarr.core.dtype.npy.float import Float16, Float32, Float64 from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64 from zarr.core.dtype.npy.structured import Structured, StructuredJSON_V2, StructuredJSON_V3 from zarr.core.dtype.npy.time import ( DateTime64, DateTime64JSON_V2, DateTime64JSON_V3, TimeDelta64, TimeDelta64JSON_V2, TimeDelta64JSON_V3, ) if TYPE_CHECKING: from zarr.core.common import ZarrFormat from collections.abc import Mapping import numpy as np import numpy.typing as npt from zarr.core.common import JSON from zarr.core.dtype.npy.string import ( FixedLengthUTF32, FixedLengthUTF32JSON_V2, FixedLengthUTF32JSON_V3, VariableLengthUTF8, VariableLengthUTF8JSON_V2, ) from zarr.core.dtype.registry import DataTypeRegistry from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType __all__ = [ "Bool", "Complex64", "Complex128", "DataTypeRegistry", "DataTypeValidationError", "DateTime64", "DateTime64JSON_V2", "DateTime64JSON_V3", "FixedLengthUTF32", "FixedLengthUTF32JSON_V2", "FixedLengthUTF32JSON_V3", "Float16", "Float32", "Float64", "Int8", "Int16", "Int32", "Int64", "NullTerminatedBytes", "NullTerminatedBytesJSON_V3", "NullterminatedBytesJSON_V2", "RawBytes", "RawBytesJSON_V2", "RawBytesJSON_V3", "Structured", "StructuredJSON_V2", "StructuredJSON_V3", "TBaseDType", "TBaseScalar", "TimeDelta64", "TimeDelta64", "TimeDelta64JSON_V2", "TimeDelta64JSON_V3", "UInt8", "UInt16", "UInt32", "UInt64", "VariableLengthBytes", "VariableLengthBytesJSON_V2", "VariableLengthUTF8", "VariableLengthUTF8JSON_V2", "ZDType", "data_type_registry", "parse_data_type", "parse_dtype", ] data_type_registry = DataTypeRegistry() IntegerDType = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 INTEGER_DTYPE: Final = Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64 FloatDType = Float16 | Float32 | Float64 FLOAT_DTYPE: Final = Float16, Float32, Float64 ComplexFloatDType = Complex64 | Complex128 COMPLEX_FLOAT_DTYPE: Final = Complex64, Complex128 StringDType = FixedLengthUTF32 | VariableLengthUTF8 STRING_DTYPE: Final = FixedLengthUTF32, VariableLengthUTF8 TimeDType = DateTime64 | TimeDelta64 TIME_DTYPE: Final = DateTime64, TimeDelta64 BytesDType = RawBytes | NullTerminatedBytes | VariableLengthBytes BYTES_DTYPE: Final = RawBytes, NullTerminatedBytes, VariableLengthBytes AnyDType = ( Bool | IntegerDType | FloatDType | ComplexFloatDType | StringDType | BytesDType | Structured | TimeDType | VariableLengthBytes ) # mypy has trouble inferring the type of variablelengthstring dtype, because its class definition # depends on the installed numpy version. That's why the type: ignore statement is needed here. ANY_DTYPE: Final = ( Bool, *INTEGER_DTYPE, *FLOAT_DTYPE, *COMPLEX_FLOAT_DTYPE, *STRING_DTYPE, *BYTES_DTYPE, Structured, *TIME_DTYPE, VariableLengthBytes, ) # These are aliases for variable-length UTF-8 strings # We handle them when a user requests a data type instead of using NumPy's dtype inferece because # the default NumPy behavior -- to inspect the user-provided array data and choose # an appropriately sized U dtype -- is unworkable for Zarr. VLEN_UTF8_ALIAS: Final = ("str", str, "string") # This type models inputs that can be coerced to a ZDType ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | Mapping[str, JSON] | str for dtype in ANY_DTYPE: # mypy does not know that all the elements of ANY_DTYPE are subclasses of ZDType data_type_registry.register(dtype._zarr_v3_name, dtype) # type: ignore[arg-type] # TODO: find a better name for this function def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[TBaseDType, TBaseScalar]: """ Get a data type wrapper (an instance of ``ZDType``) from a native data type, e.g. a numpy dtype. """ if not isinstance(dtype, np.dtype): na_dtype: np.dtype[np.generic] if isinstance(dtype, list): # this is a valid _VoidDTypeLike check na_dtype = np.dtype([tuple(d) for d in dtype]) else: na_dtype = np.dtype(dtype) else: na_dtype = dtype return data_type_registry.match_dtype(dtype=na_dtype) def get_data_type_from_json( dtype_spec: DTypeJSON, *, zarr_format: ZarrFormat ) -> ZDType[TBaseDType, TBaseScalar]: """ Given a JSON representation of a data type and a Zarr format version, attempt to create a ZDType instance from the registered ZDType classes. """ return data_type_registry.match_json(dtype_spec, zarr_format=zarr_format) def parse_data_type( dtype_spec: ZDTypeLike, *, zarr_format: ZarrFormat, ) -> ZDType[TBaseDType, TBaseScalar]: """ Interpret the input as a ZDType. This function wraps ``parse_dtype``. The only difference is the function name. This function may be deprecated in a future version of Zarr Python in favor of ``parse_dtype``. Parameters ---------- dtype_spec : ZDTypeLike The input to be interpreted as a ZDType. This could be a ZDType, which will be returned directly, or a JSON representation of a ZDType, or a native dtype, or a python object that can be converted into a native dtype. zarr_format : ZarrFormat The Zarr format version. This parameter is required because this function will attempt to parse the JSON representation of a data type, and the JSON representation of data types varies between Zarr 2 and Zarr 3. Returns ------- ZDType[TBaseDType, TBaseScalar] The ZDType corresponding to the input. Examples -------- ```python from zarr.dtype import parse_data_type import numpy as np parse_data_type("int32", zarr_format=2) # Int32(endianness='little') parse_data_type(np.dtype('S10'), zarr_format=2) # NullTerminatedBytes(length=10) parse_data_type({"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}}, zarr_format=3) # DateTime64(endianness='little', scale_factor=10, unit='s') ``` """ return parse_dtype(dtype_spec, zarr_format=zarr_format) def parse_dtype( dtype_spec: ZDTypeLike, *, zarr_format: ZarrFormat, ) -> ZDType[TBaseDType, TBaseScalar]: """ Convert the input as a ZDType. Parameters ---------- dtype_spec : ZDTypeLike The input to be converted to a ZDType. This could be a ZDType, which will be returned directly, or a JSON representation of a ZDType, or a numpy dtype, or a python object that can be converted into a native dtype. zarr_format : ZarrFormat The Zarr format version. This parameter is required because this function will attempt to parse the JSON representation of a data type, and the JSON representation of data types varies between Zarr 2 and Zarr 3. Returns ------- ZDType[TBaseDType, TBaseScalar] The ZDType corresponding to the input. Examples -------- ```python from zarr.dtype import parse_dtype import numpy as np parse_dtype("int32", zarr_format=2) # Int32(endianness='little') parse_dtype(np.dtype('S10'), zarr_format=2) # NullTerminatedBytes(length=10) parse_dtype({"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}}, zarr_format=3) # DateTime64(endianness='little', scale_factor=10, unit='s') ``` """ if isinstance(dtype_spec, ZDType): return dtype_spec # First attempt to interpret the input as JSON if isinstance(dtype_spec, Mapping | str | Sequence): try: return get_data_type_from_json(dtype_spec, zarr_format=zarr_format) # type: ignore[arg-type] except ValueError: # no data type matched this JSON-like input pass if dtype_spec in VLEN_UTF8_ALIAS: # If the dtype request is one of the aliases for variable-length UTF-8 strings, # return that dtype. return VariableLengthUTF8() # type: ignore[return-value] # otherwise, we have either a numpy dtype string, or a zarr v3 dtype string, and in either case # we can create a native dtype from it, and do the dtype inference from that return get_data_type_from_native_dtype(dtype_spec) # type: ignore[arg-type] zarr-python-3.1.5/src/zarr/core/dtype/common.py000066400000000000000000000207131511007055700215050ustar00rootroot00000000000000from __future__ import annotations import warnings from collections.abc import Mapping, Sequence from dataclasses import dataclass from typing import ( ClassVar, Final, Generic, Literal, TypedDict, TypeGuard, TypeVar, ) from typing_extensions import ReadOnly from zarr.core.common import NamedConfig from zarr.errors import UnstableSpecificationWarning EndiannessStr = Literal["little", "big"] ENDIANNESS_STR: Final = "little", "big" SpecialFloatStrings = Literal["NaN", "Infinity", "-Infinity"] SPECIAL_FLOAT_STRINGS: Final = ("NaN", "Infinity", "-Infinity") JSONFloatV2 = float | SpecialFloatStrings JSONFloatV3 = float | SpecialFloatStrings | str ObjectCodecID = Literal["vlen-utf8", "vlen-bytes", "vlen-array", "pickle", "json2", "msgpack2"] # These are the ids of the known object codecs for zarr v2. OBJECT_CODEC_IDS: Final = ("vlen-utf8", "vlen-bytes", "vlen-array", "pickle", "json2", "msgpack2") # This is a wider type than our standard JSON type because we need # to work with typeddict objects which are assignable to Mapping[str, object] DTypeJSON = str | int | float | Sequence["DTypeJSON"] | None | Mapping[str, object] # The DTypeJSON_V2 type exists because ZDType.from_json takes a single argument, which must contain # all the information necessary to decode the data type. Zarr v2 supports multiple distinct # data types that all used the "|O" data type identifier. These data types can only be # discriminated on the basis of their "object codec", i.e. a special data type specific # compressor or filter. So to figure out what data type a zarr v2 array has, we need the # data type identifier from metadata, as well as an object codec id if the data type identifier # is "|O". # So we will pack the name of the dtype alongside the name of the object codec id, if applicable, # in a single dict, and pass that to the data type inference logic. # These type variables have a very wide bound because the individual zdtype # classes can perform a very specific type check. # This is the JSON representation of a structured dtype in zarr v2 StructuredName_V2 = Sequence["str | StructuredName_V2"] # This models the type of the name a dtype might have in zarr v2 array metadata DTypeName_V2 = StructuredName_V2 | str TDTypeNameV2_co = TypeVar("TDTypeNameV2_co", bound=DTypeName_V2, covariant=True) TObjectCodecID_co = TypeVar("TObjectCodecID_co", bound=None | str, covariant=True) class DTypeConfig_V2(TypedDict, Generic[TDTypeNameV2_co, TObjectCodecID_co]): name: ReadOnly[TDTypeNameV2_co] object_codec_id: ReadOnly[TObjectCodecID_co] DTypeSpec_V2 = DTypeConfig_V2[DTypeName_V2, None | str] def check_structured_dtype_v2_inner(data: object) -> TypeGuard[StructuredName_V2]: """ A type guard for the inner elements of a structured dtype. This is a recursive check because the type is itself recursive. This check ensures that all the elements are 2-element sequences beginning with a string and ending with either another string or another 2-element sequence beginning with a string and ending with another instance of that type. """ if isinstance(data, (str, Mapping)): return False if not isinstance(data, Sequence): return False if len(data) != 2: return False if not (isinstance(data[0], str)): return False if isinstance(data[-1], str): return True elif isinstance(data[-1], Sequence): return check_structured_dtype_v2_inner(data[-1]) return False def check_structured_dtype_name_v2(data: Sequence[object]) -> TypeGuard[StructuredName_V2]: """ Check that all the elements of a sequence are valid zarr v2 structured dtype identifiers """ return all(check_structured_dtype_v2_inner(d) for d in data) def check_dtype_name_v2(data: object) -> TypeGuard[DTypeName_V2]: """ Type guard for narrowing the type of a python object to a valid zarr v2 dtype name. """ if isinstance(data, str): return True elif isinstance(data, Sequence): return check_structured_dtype_name_v2(data) return False def check_dtype_spec_v2(data: object) -> TypeGuard[DTypeSpec_V2]: """ Type guard for narrowing a python object to an instance of DTypeSpec_V2 """ if not isinstance(data, Mapping): return False if set(data.keys()) != {"name", "object_codec_id"}: return False if not check_dtype_name_v2(data["name"]): return False return isinstance(data["object_codec_id"], str | None) # By comparison, The JSON representation of a dtype in zarr v3 is much simpler. # It's either a string, or a structured dict DTypeSpec_V3 = str | NamedConfig[str, Mapping[str, object]] def check_dtype_spec_v3(data: object) -> TypeGuard[DTypeSpec_V3]: """ Type guard for narrowing the type of a python object to an instance of DTypeSpec_V3, i.e either a string or a dict with a "name" field that's a string and a "configuration" field that's a mapping with string keys. """ if isinstance(data, str) or ( # noqa: SIM103 isinstance(data, Mapping) and set(data.keys()) == {"name", "configuration"} and isinstance(data["configuration"], Mapping) and all(isinstance(k, str) for k in data["configuration"]) ): return True return False def unpack_dtype_json(data: DTypeSpec_V2 | DTypeSpec_V3) -> DTypeJSON: """ Return the array metadata form of the dtype JSON representation. For the Zarr V3 form of dtype metadata, this is a no-op. For the Zarr V2 form of dtype metadata, this unpacks the dtype name. """ if isinstance(data, Mapping) and set(data.keys()) == {"name", "object_codec_id"}: return data["name"] return data class DataTypeValidationError(ValueError): ... class ScalarTypeValidationError(ValueError): ... @dataclass(frozen=True, kw_only=True) class HasLength: """ A mix-in class for data types with a length attribute, such as fixed-size collections of unicode strings, or bytes. Attributes ---------- length : int The length of the scalars belonging to this data type. Note that this class does not assign a unit to the length. Child classes may assign units. """ length: int @dataclass(frozen=True, kw_only=True) class HasEndianness: """ A mix-in class for data types with an endianness attribute """ endianness: EndiannessStr = "little" @dataclass(frozen=True, kw_only=True) class HasItemSize: """ A mix-in class for data types with an item size attribute. This mix-in bears a property ``item_size``, which denotes the size of each element of the data type, in bytes. """ @property def item_size(self) -> int: raise NotImplementedError @dataclass(frozen=True, kw_only=True) class HasObjectCodec: """ A mix-in class for data types that require an object codec id. This class bears the property ``object_codec_id``, which is the string name of an object codec that is required to encode and decode the data type. In zarr-python 2.x certain data types like variable-length strings or variable-length arrays used the catch-all numpy "object" data type for their in-memory representation. But these data types cannot be stored as numpy object data types, because the object data type does not define a fixed memory layout. So these data types required a special codec, called an "object codec", that effectively defined a compact representation for the data type, which was used to encode and decode the data type. Zarr-python 2.x would not allow the creation of arrays with the "object" data type if an object codec was not specified, and thus the name of the object codec is effectively part of the data type model. """ object_codec_id: ClassVar[str] def v3_unstable_dtype_warning(dtype: object) -> None: """ Emit this warning when a data type does not have a stable zarr v3 spec """ msg = ( f"The data type ({dtype}) does not have a Zarr V3 specification. " "That means that the representation of arrays saved with this data type may change without " "warning in a future version of Zarr Python. " "Arrays stored with this data type may be unreadable by other Zarr libraries. " "Use this data type at your own risk! " "Check https://github.com/zarr-developers/zarr-extensions/tree/main/data-types for the " "status of data type specifications for Zarr V3." ) warnings.warn(msg, category=UnstableSpecificationWarning, stacklevel=2) zarr-python-3.1.5/src/zarr/core/dtype/npy/000077500000000000000000000000001511007055700204465ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/core/dtype/npy/__init__.py000066400000000000000000000000001511007055700225450ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/core/dtype/npy/bool.py000066400000000000000000000213141511007055700217540ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass from typing import TYPE_CHECKING, ClassVar, Literal, Self, TypeGuard, overload import numpy as np from zarr.core.dtype.common import ( DataTypeValidationError, DTypeConfig_V2, DTypeJSON, HasItemSize, check_dtype_spec_v2, ) from zarr.core.dtype.wrapper import TBaseDType, ZDType if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat @dataclass(frozen=True, kw_only=True, slots=True) class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize): """ A Zarr data type for arrays containing booleans. Wraps the [`np.dtypes.BoolDType`][numpy.dtypes.BoolDType] data type. Scalars for this data type are instances of [`np.bool_`][numpy.bool_]. Attributes ---------- _zarr_v3_name : Literal["bool"] = "bool" The Zarr v3 name of the dtype. _zarr_v2_name : ``Literal["|b1"]`` = ``"|b1"`` The Zarr v2 name of the dtype, which is also a string representation of the boolean dtype used by NumPy. dtype_cls : ClassVar[type[np.dtypes.BoolDType]] = np.dtypes.BoolDType The NumPy dtype class. References ---------- This class implements the boolean data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding)and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ _zarr_v3_name: ClassVar[Literal["bool"]] = "bool" _zarr_v2_name: ClassVar[Literal["|b1"]] = "|b1" dtype_cls = np.dtypes.BoolDType @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create an instance of Bool from an instance of np.dtypes.BoolDType. Parameters ---------- dtype : TBaseDType The NumPy boolean dtype instance to convert. Returns ------- Bool An instance of Bool. Raises ------ DataTypeValidationError If the provided dtype is not compatible with this ZDType. """ if cls._check_native_dtype(dtype): return cls() raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self: Self) -> np.dtypes.BoolDType: """ Create a NumPy boolean dtype instance from this ZDType. Returns ------- np.dtypes.BoolDType The NumPy boolean dtype. """ return self.dtype_cls() @classmethod def _check_json_v2( cls, data: DTypeJSON, ) -> TypeGuard[DTypeConfig_V2[Literal["|b1"], None]]: """ Check that the input is a valid JSON representation of a Bool. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- ``TypeGuard[DTypeConfig_V2[Literal["|b1"], None]]`` True if the input is a valid JSON representation, False otherwise. """ return ( check_dtype_spec_v2(data) and data["name"] == cls._zarr_v2_name and data["object_codec_id"] is None ) @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["bool"]]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- bool True if the input is a valid JSON representation, False otherwise. """ return data == cls._zarr_v3_name @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of Bool from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Bool An instance of Bool. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v2(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v2_name!r}" raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls: type[Self], data: DTypeJSON) -> Self: """ Create an instance of Bool from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Bool An instance of Bool. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal["|b1"], None]: ... @overload def to_json(self, zarr_format: Literal[3]) -> Literal["bool"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal["|b1"], None] | Literal["bool"]: """ Serialize this Bool instance to JSON. Parameters ---------- zarr_format : ZarrFormat The Zarr format version (2 or 3). Returns ------- ``DTypeConfig_V2[Literal["|b1"], None] | Literal["bool"]`` The JSON representation of the Bool instance. Raises ------ ValueError If the zarr_format is not 2 or 3. """ if zarr_format == 2: return {"name": self._zarr_v2_name, "object_codec_id": None} elif zarr_format == 3: return self._zarr_v3_name raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> bool: """ Check if the input can be cast to a boolean scalar. Parameters ---------- data : object The data to check. Returns ------- bool True if the input can be cast to a boolean scalar, False otherwise. """ return True def cast_scalar(self, data: object) -> np.bool_: """ Cast the input to a numpy boolean scalar. Parameters ---------- data : object The data to cast. Returns ------- bool : np.bool_ The numpy boolean scalar. Raises ------ TypeError If the input cannot be converted to a numpy boolean. """ if self._check_scalar(data): return np.bool_(data) msg = ( # pragma: no cover f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) # pragma: no cover def default_scalar(self) -> np.bool_: """ Get the default value for the boolean dtype. Returns ------- bool : np.bool_ The default value. """ return np.False_ def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> bool: """ Convert a scalar to a python bool. Parameters ---------- data : object The value to convert. zarr_format : ZarrFormat The zarr format version. Returns ------- bool The JSON-serializable format. """ return bool(data) def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_: """ Read a JSON-serializable value as a numpy boolean scalar. Parameters ---------- data : JSON The JSON-serializable value. zarr_format : ZarrFormat The zarr format version. Returns ------- bool : np.bool_ The numpy boolean scalar. Raises ------ TypeError If the input is not a valid boolean type. """ if self._check_scalar(data): return np.bool_(data) raise TypeError(f"Invalid type: {data}. Expected a boolean.") # pragma: no cover @property def item_size(self) -> int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 1 zarr-python-3.1.5/src/zarr/core/dtype/npy/bytes.py000066400000000000000000001160521511007055700221530ustar00rootroot00000000000000from __future__ import annotations import base64 import re from dataclasses import dataclass from typing import ClassVar, Literal, Self, TypedDict, TypeGuard, cast, overload import numpy as np from zarr.core.common import JSON, NamedConfig, ZarrFormat from zarr.core.dtype.common import ( DataTypeValidationError, DTypeConfig_V2, DTypeJSON, HasItemSize, HasLength, HasObjectCodec, check_dtype_spec_v2, v3_unstable_dtype_warning, ) from zarr.core.dtype.npy.common import check_json_str from zarr.core.dtype.wrapper import TBaseDType, ZDType BytesLike = np.bytes_ | str | bytes | int class FixedLengthBytesConfig(TypedDict): """ A configuration for a data type that takes a ``length_bytes`` parameter. Attributes ---------- length_bytes : int The length in bytes of the data associated with this configuration. Examples -------- ```python { "length_bytes": 12 } ``` """ length_bytes: int class NullterminatedBytesJSON_V2(DTypeConfig_V2[str, None]): """ A wrapper around the JSON representation of the ``NullTerminatedBytes`` data type in Zarr V2. The ``name`` field of this class contains the value that would appear under the ``dtype`` field in Zarr V2 array metadata. References ---------- The structure of the ``name`` field is defined in the Zarr V2 [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- ```python { "name": "|S10", "object_codec_id": None } ``` """ class NullTerminatedBytesJSON_V3( NamedConfig[Literal["null_terminated_bytes"], FixedLengthBytesConfig] ): """ The JSON representation of the ``NullTerminatedBytes`` data type in Zarr V3. References ---------- This representation is not currently defined in an external specification. Examples -------- ```python { "name": "null_terminated_bytes", "configuration": { "length_bytes": 12 } } ``` """ class RawBytesJSON_V2(DTypeConfig_V2[str, None]): """ A wrapper around the JSON representation of the ``RawBytes`` data type in Zarr V2. The ``name`` field of this class contains the value that would appear under the ``dtype`` field in Zarr V2 array metadata. References ---------- The structure of the ``name`` field is defined in the Zarr V2 [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- ```python { "name": "|V10", "object_codec_id": None } ``` """ class RawBytesJSON_V3(NamedConfig[Literal["raw_bytes"], FixedLengthBytesConfig]): """ The JSON representation of the ``RawBytes`` data type in Zarr V3. References ---------- This representation is not currently defined in an external specification. Examples -------- ```python { "name": "raw_bytes", "configuration": { "length_bytes": 12 } } ``` """ class VariableLengthBytesJSON_V2(DTypeConfig_V2[Literal["|O"], Literal["vlen-bytes"]]): """ A wrapper around the JSON representation of the ``VariableLengthBytes`` data type in Zarr V2. The ``name`` field of this class contains the value that would appear under the ``dtype`` field in Zarr V2 array metadata. The ``object_codec_id`` field is always ``"vlen-bytes"`` References ---------- The structure of the ``name`` field is defined in the Zarr V2 [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- ```python { "name": "|O", "object_codec_id": "vlen-bytes" } ``` """ @dataclass(frozen=True, kw_only=True) class NullTerminatedBytes(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize): """ A Zarr data type for arrays containing fixed-length null-terminated byte sequences. Wraps the [`np.dtypes.BytesDType`][numpy.dtypes.BytesDType] data type. Scalars for this data type are instances of [`np.bytes_`][numpy.bytes_]. This data type is parametrized by an integral length which specifies size in bytes of each scalar. Because this data type uses null-terminated semantics, indexing into NumPy arrays with this data type may return fewer than ``length`` bytes. Attributes ---------- dtype_cls: ClassVar[type[np.dtypes.BytesDType[int]]] = np.dtypes.BytesDType The NumPy data type wrapped by this ZDType. _zarr_v3_name : ClassVar[Literal["null_terminated_bytes"]] length : int The length of the bytes. Notes ----- This data type is designed for compatibility with NumPy arrays that use the NumPy ``bytes`` data type. It may not be desirable for usage outside of that context. If compatibility with the NumPy ``bytes`` data type is not essential, consider using the ``RawBytes`` or ``VariableLengthBytes`` data types instead. """ dtype_cls = np.dtypes.BytesDType _zarr_v3_name: ClassVar[Literal["null_terminated_bytes"]] = "null_terminated_bytes" def __post_init__(self) -> None: """ We don't allow instances of this class with length less than 1 because there is no way such a data type can contain actual data. """ if self.length < 1: raise ValueError(f"length must be >= 1, got {self.length}.") @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create an instance of NullTerminatedBytes from an instance of np.dtypes.BytesDType. This method checks if the provided data type is an instance of np.dtypes.BytesDType. If so, it returns a new instance of NullTerminatedBytes with a length equal to the length of input data type. Parameters ---------- dtype : TBaseDType The native dtype to convert. Returns ------- NullTerminatedBytes An instance of NullTerminatedBytes with the specified length. Raises ------ DataTypeValidationError If the dtype is not compatible with NullTerminatedBytes. """ if cls._check_native_dtype(dtype): return cls(length=dtype.itemsize) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> np.dtypes.BytesDType[int]: """ Create a NumPy bytes dtype from this NullTerminatedBytes ZDType. Returns ------- np.dtypes.BytesDType[int] A NumPy data type object representing null-terminated bytes with a specified length. """ return self.dtype_cls(self.length) @classmethod def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[NullterminatedBytesJSON_V2]: """ Check that the input is a valid JSON representation of NullTerminatedBytes in Zarr V2. The input data must be a mapping that contains a "name" key that matches the pattern "|S" and an "object_codec_id" key that is None. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- bool True if the input data is a valid representation, False otherwise. """ return ( check_dtype_spec_v2(data) and isinstance(data["name"], str) and re.match(r"^\|S\d+$", data["name"]) is not None and data["object_codec_id"] is None ) @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[NullTerminatedBytesJSON_V3]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- TypeGuard[NullTerminatedBytesJSON_V3] True if the input is a valid representation of this class in Zarr V3, False otherwise. """ return ( isinstance(data, dict) and set(data.keys()) == {"name", "configuration"} and data["name"] == cls._zarr_v3_name and isinstance(data["configuration"], dict) and "length_bytes" in data["configuration"] and isinstance(data["configuration"]["length_bytes"], int) ) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this class from Zarr V2-flavored JSON. This method checks if the input data is a valid representation of this class in Zarr V2. If so, it returns a new instance of this class with a ``length`` as specified in the input data. Parameters ---------- data : DTypeJSON The JSON data to parse. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data is not a valid representation of this class. """ if cls._check_json_v2(data): name = data["name"] return cls(length=int(name[2:])) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string like '|S1', '|S2', etc" raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this class from Zarr V3-flavored JSON. This method checks if the input data is a valid representation of this class in Zarr V3. If so, it returns a new instance of this class with a ``length`` as specified in the input data. Parameters ---------- data : DTypeJSON The JSON data to parse. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data is not a valid representation of this class. """ if cls._check_json_v3(data): return cls(length=data["configuration"]["length_bytes"]) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> NullterminatedBytesJSON_V2: ... @overload def to_json(self, zarr_format: Literal[3]) -> NullTerminatedBytesJSON_V3: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[str, None] | NullTerminatedBytesJSON_V3: """ Generate a JSON representation of this data type. Parameters ---------- zarr_format : ZarrFormat The zarr format version. Returns ------- NullterminatedBytesJSON_V2 | NullTerminatedBytesJSON_V3 The JSON-serializable representation of the data type """ if zarr_format == 2: return {"name": self.to_native_dtype().str, "object_codec_id": None} elif zarr_format == 3: v3_unstable_dtype_warning(self) return { "name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}, } raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[BytesLike]: """ Check if the provided data is of type BytesLike. This method is used to verify if the input data can be considered as a scalar of bytes-like type, which includes NumPy bytes, strings, bytes, and integers. Parameters ---------- data : object The data to check. Returns ------- TypeGuard[BytesLike] True if the data is bytes-like, False otherwise. """ return isinstance(data, BytesLike) def _cast_scalar_unchecked(self, data: BytesLike) -> np.bytes_: """ Cast the provided scalar data to [`np.bytes_`][numpy.bytes_], truncating if necessary. Parameters ---------- data : BytesLike The data to cast. Returns ------- bytes : [`np.bytes_`][numpy.bytes_] The casted data as a NumPy bytes scalar. Notes ----- This method does not perform any type checking. The input data must be bytes-like. """ if isinstance(data, int): return self.to_native_dtype().type(str(data)[: self.length]) else: return self.to_native_dtype().type(data[: self.length]) def cast_scalar(self, data: object) -> np.bytes_: """ Attempt to cast a given object to a NumPy bytes scalar. This method first checks if the provided data is a valid scalar that can be converted to a NumPy bytes scalar. If the check succeeds, the unchecked casting operation is performed. If the data is not valid, a TypeError is raised. Parameters ---------- data : object The data to be cast to a NumPy bytes scalar. Returns ------- bytes : [`np.bytes_`][numpy.bytes_] The data cast as a NumPy bytes scalar. Raises ------ TypeError If the data cannot be converted to a NumPy bytes scalar. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) def default_scalar(self) -> np.bytes_: """ Return a default scalar value, which for this data type is an empty byte string. Returns ------- bytes : [`np.bytes_`][numpy.bytes_] The default scalar value. """ return np.bytes_(b"") def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: """ Convert a scalar to a JSON-serializable string representation. This method encodes the given scalar as a NumPy bytes scalar and then encodes the bytes as a base64-encoded string. Parameters ---------- data : object The scalar to convert. zarr_format : ZarrFormat The zarr format version. Returns ------- str A string representation of the scalar. """ as_bytes = self.cast_scalar(data) return base64.standard_b64encode(as_bytes).decode("ascii") def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_: """ Read a JSON-serializable value as [`np.bytes_`][numpy.bytes_]. Parameters ---------- data : JSON The JSON-serializable base64-encoded string. zarr_format : ZarrFormat The zarr format version. Returns ------- bytes : [`np.bytes_`][numpy.bytes_] The NumPy bytes scalar obtained from decoding the base64 string. Raises ------ TypeError If the input data is not a base64-encoded string. """ if check_json_str(data): return self.to_native_dtype().type(base64.standard_b64decode(data.encode("ascii"))) raise TypeError( f"Invalid type: {data}. Expected a base64-encoded string." ) # pragma: no cover @property def item_size(self) -> int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return self.length @dataclass(frozen=True, kw_only=True) class RawBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize): """ A Zarr data type for arrays containing fixed-length sequences of raw bytes. Wraps the NumPy ``void`` data type. Scalars for this data type are instances of [`np.void`][numpy.void]. This data type is parametrized by an integral length which specifies size in bytes of each scalar belonging to this data type. Attributes ---------- dtype_cls: ClassVar[type[np.dtypes.VoidDType[int]]] = np.dtypes.VoidDtype The NumPy data type wrapped by this ZDType. _zarr_v3_name : ClassVar[Literal["raw_bytes"]] length : int The length of the bytes. Notes ----- Although the NumPy "Void" data type is used to create "structured" data types in NumPy, this class does not support structured data types. See the ``Structured`` data type for this functionality. """ # np.dtypes.VoidDType is specified in an odd way in NumPy # it cannot be used to create instances of the dtype # so we have to tell mypy to ignore this here dtype_cls = np.dtypes.VoidDType # type: ignore[assignment] _zarr_v3_name: ClassVar[Literal["raw_bytes"]] = "raw_bytes" def __post_init__(self) -> None: """ We don't allow instances of this class with length less than 1 because there is no way such a data type can contain actual data. """ if self.length < 1: raise ValueError(f"length must be >= 1, got {self.length}.") @classmethod def _check_native_dtype( cls: type[Self], dtype: TBaseDType ) -> TypeGuard[np.dtypes.VoidDType[int]]: """ Check that the input is a NumPy void dtype with no fields. Numpy void dtype comes in two forms: * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes. * If the ``fields`` attribute is not ``None``, then the dtype represents a structured dtype, In this check we ensure that ``fields`` is ``None``. Parameters ---------- dtype : TDBaseDType The dtype to check. Returns ------- Bool True if the dtype is an instance of np.dtypes.VoidDType with no fields, False otherwise. """ return cls.dtype_cls is type(dtype) and dtype.fields is None @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create an instance of RawBytes from an instance of np.dtypes.VoidDType. This method checks if the provided data type is compatible with RawBytes. The input must be an instance of np.dtypes.VoidDType, and have no fields. If the input is compatible, this method returns an instance of RawBytes with the specified length. Parameters ---------- dtype : TBaseDType The native dtype to convert. Returns ------- RawBytes An instance of RawBytes with the specified length. Raises ------ DataTypeValidationError If the dtype is not compatible with RawBytes. """ if cls._check_native_dtype(dtype): return cls(length=dtype.itemsize) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> np.dtypes.VoidDType[int]: """ Create a NumPy void dtype from this RawBytes ZDType. Returns ------- np.dtypes.VoidDType[int] A NumPy data type object representing raw bytes with a specified length. """ # Numpy does not allow creating a void type # by invoking np.dtypes.VoidDType directly return cast("np.dtypes.VoidDType[int]", np.dtype(f"V{self.length}")) @classmethod def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[RawBytesJSON_V2]: """ Check that the input is a valid representation of this class in Zarr V2. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- True if the input is a valid representation of this class in Zarr V3, False otherwise. """ return ( check_dtype_spec_v2(data) and isinstance(data["name"], str) and re.match(r"^\|V\d+$", data["name"]) is not None and data["object_codec_id"] is None ) @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[RawBytesJSON_V3]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- TypeGuard[RawBytesJSON_V3] True if the input is a valid representation of this class in Zarr V3, False otherwise. """ return ( isinstance(data, dict) and set(data.keys()) == {"name", "configuration"} and data["name"] == cls._zarr_v3_name and isinstance(data["configuration"], dict) and set(data["configuration"].keys()) == {"length_bytes"} and isinstance(data["configuration"]["length_bytes"], int) ) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of RawBytes from Zarr V2-flavored JSON. This method checks if the input data is a valid representation of RawBytes in Zarr V2. If so, it returns a new instance of RawBytes with a ``length`` as specified in the input data. Parameters ---------- data : DTypeJSON The JSON data to parse. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data is not a valid representation of this class. """ if cls._check_json_v2(data): name = data["name"] return cls(length=int(name[2:])) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string like '|V1', '|V2', etc" raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of RawBytes from Zarr V3-flavored JSON. This method checks if the input data is a valid representation of RawBytes in Zarr V3. If so, it returns a new instance of RawBytes with a ``length`` as specified in the input data. Parameters ---------- data : DTypeJSON The JSON data to parse. Returns ------- RawBytes An instance of RawBytes. Raises ------ DataTypeValidationError If the input data is not a valid representation of this class. """ if cls._check_json_v3(data): return cls(length=data["configuration"]["length_bytes"]) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> RawBytesJSON_V2: ... @overload def to_json(self, zarr_format: Literal[3]) -> RawBytesJSON_V3: ... def to_json(self, zarr_format: ZarrFormat) -> RawBytesJSON_V2 | RawBytesJSON_V3: """ Generate a JSON representation of this data type. Parameters ---------- zarr_format : ZarrFormat The zarr format version. Returns ------- RawBytesJSON_V2 | RawBytesJSON_V3 The JSON-serializable representation of the data type. """ if zarr_format == 2: return {"name": self.to_native_dtype().str, "object_codec_id": None} elif zarr_format == 3: v3_unstable_dtype_warning(self) return {"name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}} raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[np.bytes_ | str | bytes | np.void]: """ Check if the provided data can be cast to np.void. This method is used to verify if the input data can be considered as a scalar of bytes-like type, which includes np.bytes_, np.void, strings, and bytes objects. Parameters ---------- data : object The data to check. Returns ------- TypeGuard[np.bytes_ | str | bytes | np.void] True if the data is void-scalar-like, False otherwise. """ return isinstance(data, np.bytes_ | str | bytes | np.void) def _cast_scalar_unchecked(self, data: object) -> np.void: """ Cast the provided scalar data to np.void. Parameters ---------- data : BytesLike The data to cast. Returns ------- np.void The casted data as a NumPy void scalar. Notes ----- This method does not perform any type checking. The input data must be castable to np.void. """ native_dtype = self.to_native_dtype() # Without the second argument, NumPy will return a void scalar for dtype V1. # The second argument ensures that, if native_dtype is something like V10, # the result will actually be a V10 scalar. return native_dtype.type(data, native_dtype) def cast_scalar(self, data: object) -> np.void: """ Attempt to cast a given object to a NumPy void scalar. This method first checks if the provided data is a valid scalar that can be converted to a NumPy void scalar. If the check succeeds, the unchecked casting operation is performed. If the data is not valid, a TypeError is raised. Parameters ---------- data : object The data to be cast to a NumPy void scalar. Returns ------- np.void The data cast as a NumPy void scalar. Raises ------ TypeError If the data cannot be converted to a NumPy void scalar. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) def default_scalar(self) -> np.void: """ Return the default scalar value for this data type. The default scalar is a NumPy void scalar of the same length as the data type, filled with zero bytes. Returns ------- np.void The default scalar value. """ return self.to_native_dtype().type(("\x00" * self.length).encode("ascii")) def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: """ Convert a scalar to a JSON-serializable string representation. This method converts the given scalar to bytes and then encodes the bytes as a base64-encoded string. Parameters ---------- data : object The scalar to convert. zarr_format : ZarrFormat The zarr format version. Returns ------- str A string representation of the scalar. """ as_bytes = self.cast_scalar(data) return base64.standard_b64encode(as_bytes.tobytes()).decode("ascii") def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void: """ Read a JSON-serializable value as a np.void. Parameters ---------- data : JSON The JSON-serializable value. zarr_format : ZarrFormat The zarr format version. Returns ------- np.void The NumPy void scalar. Raises ------ TypeError If the data is not a string, or if the string is not a valid base64 encoding. """ if check_json_str(data): return self.to_native_dtype().type(base64.standard_b64decode(data)) raise TypeError(f"Invalid type: {data}. Expected a string.") # pragma: no cover @property def item_size(self) -> int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return self.length @dataclass(frozen=True, kw_only=True) class VariableLengthBytes(ZDType[np.dtypes.ObjectDType, bytes], HasObjectCodec): """ A Zarr data type for arrays containing variable-length sequences of bytes. Wraps the NumPy "object" data type. Scalars for this data type are instances of ``bytes``. Attributes ---------- dtype_cls: ClassVar[type[np.dtypes.ObjectDType]] = np.dtypes.ObjectDType The NumPy data type wrapped by this ZDType. _zarr_v3_name: ClassVar[Literal["variable_length_bytes"]] = "variable_length_bytes" The name of this data type in Zarr V3. object_codec_id: ClassVar[Literal["vlen-bytes"]] = "vlen-bytes" The object codec ID for this data type. Notes ----- Because this data type uses the NumPy "object" data type, it does not guarantee a compact memory representation of array data. Therefore a "vlen-bytes" codec is needed to ensure that the array data can be persisted to storage. """ dtype_cls = np.dtypes.ObjectDType _zarr_v3_name: ClassVar[Literal["variable_length_bytes"]] = "variable_length_bytes" object_codec_id: ClassVar[Literal["vlen-bytes"]] = "vlen-bytes" @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create an instance of VariableLengthBytes from an instance of np.dtypes.ObjectDType. This method checks if the provided data type is an instance of np.dtypes.ObjectDType. If so, it returns an instance of VariableLengthBytes. Parameters ---------- dtype : TBaseDType The native dtype to convert. Returns ------- VariableLengthBytes An instance of VariableLengthBytes. Raises ------ DataTypeValidationError If the dtype is not compatible with VariableLengthBytes. """ if cls._check_native_dtype(dtype): return cls() raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> np.dtypes.ObjectDType: """ Create a NumPy object dtype from this VariableLengthBytes ZDType. Returns ------- np.dtypes.ObjectDType A NumPy data type object representing variable-length bytes. """ return self.dtype_cls() @classmethod def _check_json_v2( cls, data: DTypeJSON, ) -> TypeGuard[VariableLengthBytesJSON_V2]: """ Check that the input is a valid JSON representation of a NumPy O dtype, and that the object codec id is appropriate for variable-length bytes strings. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- True if the input is a valid representation of this class in Zarr V2, False otherwise. """ # Check that the input is a valid JSON representation of a Zarr v2 data type spec. if not check_dtype_spec_v2(data): return False # Check that the object codec id is appropriate for variable-length bytes strings. if data["name"] != "|O": return False return data["object_codec_id"] == cls.object_codec_id @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["variable_length_bytes"]]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- TypeGuard[Literal["variable_length_bytes"]] True if the input is a valid representation of this class in Zarr V3, False otherwise. """ return data in (cls._zarr_v3_name, "bytes") @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this VariableLengthBytes from Zarr V2-flavored JSON. This method checks if the input data is a valid representation of this class in Zarr V2. If so, it returns a new instance this class. Parameters ---------- data : DTypeJSON The JSON data to parse. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data is not a valid representation of this class class. """ if cls._check_json_v2(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string '|O' and an object_codec_id of {cls.object_codec_id}" raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of VariableLengthBytes from Zarr V3-flavored JSON. This method checks if the input data is a valid representation of VariableLengthBytes in Zarr V3. If so, it returns a new instance of VariableLengthBytes. Parameters ---------- data : DTypeJSON The JSON data to parse. Returns ------- VariableLengthBytes An instance of VariableLengthBytes. Raises ------ DataTypeValidationError If the input data is not a valid representation of this class. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> VariableLengthBytesJSON_V2: ... @overload def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_bytes"]: ... def to_json( self, zarr_format: ZarrFormat ) -> VariableLengthBytesJSON_V2 | Literal["variable_length_bytes"]: """ Convert the variable-length bytes data type to a JSON-serializable form. Parameters ---------- zarr_format : ZarrFormat The zarr format version. Accepted values are 2 and 3. Returns ------- ``DTypeConfig_V2[Literal["|O"], Literal["vlen-bytes"]] | Literal["variable_length_bytes"]`` The JSON-serializable representation of the variable-length bytes data type. For zarr_format 2, returns a dictionary with "name" and "object_codec_id". For zarr_format 3, returns a string identifier "variable_length_bytes". Raises ------ ValueError If zarr_format is not 2 or 3. """ if zarr_format == 2: return {"name": "|O", "object_codec_id": self.object_codec_id} elif zarr_format == 3: v3_unstable_dtype_warning(self) return self._zarr_v3_name raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def default_scalar(self) -> bytes: """ Return the default scalar value for the variable-length bytes data type. Returns ------- bytes The default scalar value, which is an empty byte string. """ return b"" def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: """ Convert a scalar to a JSON-serializable string representation. This method encodes the given scalar as bytes and then encodes the bytes as a base64-encoded string. Parameters ---------- data : object The scalar to convert. zarr_format : ZarrFormat The zarr format version. Returns ------- str A string representation of the scalar. """ return base64.standard_b64encode(data).decode("ascii") # type: ignore[arg-type] def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> bytes: """ Decode a base64-encoded JSON string to bytes. Parameters ---------- data : JSON The JSON-serializable base64-encoded string. zarr_format : ZarrFormat The zarr format version. Returns ------- bytes The decoded bytes from the base64 string. Raises ------ TypeError If the input data is not a base64-encoded string. """ if check_json_str(data): return base64.standard_b64decode(data.encode("ascii")) raise TypeError(f"Invalid type: {data}. Expected a string.") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[BytesLike]: """ Check if the provided data is of type BytesLike. This method is used to verify if the input data can be considered as a scalar of bytes-like type, which includes NumPy bytes, strings, bytes, and integers. Parameters ---------- data : object The data to check. Returns ------- TypeGuard[BytesLike] True if the data is bytes-like, False otherwise. """ return isinstance(data, BytesLike) def _cast_scalar_unchecked(self, data: BytesLike) -> bytes: """ Cast the provided scalar data to bytes. Parameters ---------- data : BytesLike The data to cast. Returns ------- bytes The casted data as bytes. Notes ----- This method does not perform any type checking. The input data must be bytes-like. """ if isinstance(data, str): return bytes(data, encoding="utf-8") return bytes(data) def cast_scalar(self, data: object) -> bytes: """ Attempt to cast a given object to a bytes scalar. This method first checks if the provided data is a valid scalar that can be converted to a bytes scalar. If the check succeeds, the unchecked casting operation is performed. If the data is not valid, a TypeError is raised. Parameters ---------- data : object The data to be cast to a bytes scalar. Returns ------- bytes The data cast as a bytes scalar. Raises ------ TypeError If the data cannot be converted to a bytes scalar. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) zarr-python-3.1.5/src/zarr/core/dtype/npy/common.py000066400000000000000000000337521511007055700223220ustar00rootroot00000000000000from __future__ import annotations import base64 import struct import sys from collections.abc import Sequence from typing import ( TYPE_CHECKING, Any, Final, Literal, NewType, SupportsComplex, SupportsFloat, SupportsIndex, SupportsInt, TypeGuard, TypeVar, ) import numpy as np from zarr.core.dtype.common import ( ENDIANNESS_STR, SPECIAL_FLOAT_STRINGS, EndiannessStr, JSONFloatV2, JSONFloatV3, ) if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat IntLike = SupportsInt | SupportsIndex | bytes | str FloatLike = SupportsIndex | SupportsFloat | bytes | str ComplexLike = SupportsFloat | SupportsIndex | SupportsComplex | bytes | str | None DateTimeUnit = Literal[ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic" ] DATETIME_UNIT: Final = ( "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic", ) IntishFloat = NewType("IntishFloat", float) """A type for floats that represent integers, like 1.0 (but not 1.1).""" IntishStr = NewType("IntishStr", str) """A type for strings that represent integers, like "0" or "42".""" FloatishStr = NewType("FloatishStr", str) """A type for strings that represent floats, like "3.14" or "-2.5".""" NumpyEndiannessStr = Literal[">", "<", "="] NUMPY_ENDIANNESS_STR: Final = ">", "<", "=" TFloatDType_co = TypeVar( "TFloatDType_co", bound=np.dtypes.Float16DType | np.dtypes.Float32DType | np.dtypes.Float64DType, covariant=True, ) TFloatScalar_co = TypeVar( "TFloatScalar_co", bound=np.float16 | np.float32 | np.float64, covariant=True ) TComplexDType_co = TypeVar( "TComplexDType_co", bound=np.dtypes.Complex64DType | np.dtypes.Complex128DType, covariant=True ) TComplexScalar_co = TypeVar("TComplexScalar_co", bound=np.complex64 | np.complex128, covariant=True) def endianness_from_numpy_str(endianness: NumpyEndiannessStr) -> EndiannessStr: """ Convert a numpy endianness string literal to a human-readable literal value. Parameters ---------- endianness : Literal[">", "<", "="] The numpy string representation of the endianness. Returns ------- Endianness The human-readable representation of the endianness. Raises ------ ValueError If the endianness is invalid. """ match endianness: case "=": # Use the local system endianness return sys.byteorder case "<": return "little" case ">": return "big" raise ValueError(f"Invalid endianness: {endianness!r}. Expected one of {NUMPY_ENDIANNESS_STR}") def endianness_to_numpy_str(endianness: EndiannessStr) -> NumpyEndiannessStr: """ Convert an endianness literal to its numpy string representation. Parameters ---------- endianness : Endianness The endianness to convert. Returns ------- Literal[">", "<"] The numpy string representation of the endianness. Raises ------ ValueError If the endianness is invalid. """ match endianness: case "little": return "<" case "big": return ">" raise ValueError( f"Invalid endianness: {endianness!r}. Expected one of {ENDIANNESS_STR} or None" ) def get_endianness_from_numpy_dtype(dtype: np.dtype[np.generic]) -> EndiannessStr: """ Gets the endianness from a numpy dtype that has an endianness. This function will raise a ValueError if the numpy data type does not have a concrete endianness. """ endianness = dtype.byteorder if dtype.byteorder in NUMPY_ENDIANNESS_STR: return endianness_from_numpy_str(endianness) # type: ignore [arg-type] raise ValueError(f"The dtype {dtype} has an unsupported endianness: {endianness}") def float_from_json_v2(data: JSONFloatV2) -> float: """ Convert a JSON float to a float (Zarr v2). Parameters ---------- data : JSONFloat The JSON float to convert. Returns ------- float The float value. """ match data: case "NaN": return float("nan") case "Infinity": return float("inf") case "-Infinity": return float("-inf") case _: return float(data) def float_from_json_v3(data: JSONFloatV3) -> float: """ Convert a JSON float to a float (v3). Parameters ---------- data : JSONFloat The JSON float to convert. Returns ------- float The float value. Notes ----- Zarr V3 allows floats to be stored as hex strings. To quote the spec: "...for float32, "NaN" is equivalent to "0x7fc00000". This representation is the only way to specify a NaN value other than the specific NaN value denoted by "NaN"." """ if isinstance(data, str): if data in SPECIAL_FLOAT_STRINGS: return float_from_json_v2(data) # type: ignore[arg-type] if not data.startswith("0x"): msg = ( f"Invalid float value: {data!r}. Expected a string starting with the hex prefix" " '0x', or one of 'NaN', 'Infinity', or '-Infinity'." ) raise ValueError(msg) if len(data[2:]) == 4: dtype_code = ">e" elif len(data[2:]) == 8: dtype_code = ">f" elif len(data[2:]) == 16: dtype_code = ">d" else: msg = ( f"Invalid hexadecimal float value: {data!r}. " "Expected the '0x' prefix to be followed by 4, 8, or 16 numeral characters" ) raise ValueError(msg) return float(struct.unpack(dtype_code, bytes.fromhex(data[2:]))[0]) return float_from_json_v2(data) def bytes_from_json(data: str, *, zarr_format: ZarrFormat) -> bytes: """ Convert a JSON string to bytes Parameters ---------- data : str The JSON string to convert. zarr_format : ZarrFormat The zarr format version. Returns ------- bytes The bytes. """ if zarr_format == 2: return base64.b64decode(data.encode("ascii")) # TODO: differentiate these as needed. This is a spec question. if zarr_format == 3: return base64.b64decode(data.encode("ascii")) raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.") # pragma: no cover def bytes_to_json(data: bytes, zarr_format: ZarrFormat) -> str: """ Convert bytes to JSON. Parameters ---------- data : bytes The bytes to store. zarr_format : ZarrFormat The zarr format version. Returns ------- str The bytes encoded as ascii using the base64 alphabet. """ # TODO: decide if we are going to make this implementation zarr format-specific return base64.b64encode(data).decode("ascii") def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloatV2: """ Convert a float to JSON (v2). Parameters ---------- data : float or np.floating The float value to convert. Returns ------- JSONFloat The JSON representation of the float. """ if np.isnan(data): return "NaN" elif np.isinf(data): return "Infinity" if data > 0 else "-Infinity" return float(data) def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloatV3: """ Convert a float to JSON (v3). Parameters ---------- data : float or np.floating The float value to convert. Returns ------- JSONFloat The JSON representation of the float. """ # v3 can in principle handle distinct NaN values, but numpy does not represent these explicitly # so we just reuse the v2 routine here return float_to_json_v2(data) def complex_float_to_json_v3( data: complex | np.complexfloating[Any, Any], ) -> tuple[JSONFloatV3, JSONFloatV3]: """ Convert a complex number to JSON as defined by the Zarr V3 spec. Parameters ---------- data : complex or np.complexfloating The complex value to convert. Returns ------- tuple[JSONFloat, JSONFloat] The JSON representation of the complex number. """ return float_to_json_v3(data.real), float_to_json_v3(data.imag) def complex_float_to_json_v2( data: complex | np.complexfloating[Any, Any], ) -> tuple[JSONFloatV2, JSONFloatV2]: """ Convert a complex number to JSON as defined by the Zarr V2 spec. Parameters ---------- data : complex | np.complexfloating The complex value to convert. Returns ------- tuple[JSONFloat, JSONFloat] The JSON representation of the complex number. """ return float_to_json_v2(data.real), float_to_json_v2(data.imag) def complex_float_from_json_v2(data: tuple[JSONFloatV2, JSONFloatV2]) -> complex: """ Convert a JSON complex float to a complex number (v2). Parameters ---------- data : tuple[JSONFloat, JSONFloat] The JSON complex float to convert. Returns ------- np.complexfloating The complex number. """ return complex(float_from_json_v2(data[0]), float_from_json_v2(data[1])) def complex_float_from_json_v3(data: tuple[JSONFloatV3, JSONFloatV3]) -> complex: """ Convert a JSON complex float to a complex number (v3). Parameters ---------- data : tuple[JSONFloat, JSONFloat] The JSON complex float to convert. Returns ------- np.complexfloating The complex number. """ return complex(float_from_json_v3(data[0]), float_from_json_v3(data[1])) def check_json_float_v2(data: JSON) -> TypeGuard[JSONFloatV2]: """ Check if a JSON value represents a float (v2). Parameters ---------- data : JSON The JSON value to check. Returns ------- Bool True if the data is a float, False otherwise. """ return data in ("NaN", "Infinity", "-Infinity") or isinstance(data, float | int) def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloatV3]: """ Check if a JSON value represents a float (v3). Parameters ---------- data : JSON The JSON value to check. Returns ------- Bool True if the data is a float, False otherwise. """ return check_json_float_v2(data) or (isinstance(data, str) and data.startswith("0x")) def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloatV2, JSONFloatV2]]: """ Check if a JSON value represents a complex float, as per the behavior of zarr-python 2.x Parameters ---------- data : JSON The JSON value to check. Returns ------- Bool True if the data is a complex float, False otherwise. """ return ( not isinstance(data, str) and isinstance(data, Sequence) and len(data) == 2 and check_json_float_v2(data[0]) and check_json_float_v2(data[1]) ) def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloatV3, JSONFloatV3]]: """ Check if a JSON value represents a complex float, as per the zarr v3 spec Parameters ---------- data : JSON The JSON value to check. Returns ------- Bool True if the data is a complex float, False otherwise. """ return ( not isinstance(data, str) and isinstance(data, Sequence) and len(data) == 2 and check_json_float_v3(data[0]) and check_json_float_v3(data[1]) ) def check_json_int(data: JSON) -> TypeGuard[int]: """ Check if a JSON value is an integer. Parameters ---------- data : JSON The JSON value to check. Returns ------- Bool True if the data is an integer, False otherwise. """ return bool(isinstance(data, int)) def check_json_intish_float(data: JSON) -> TypeGuard[IntishFloat]: """ Check if a JSON value is an "intish float", i.e. a float that represents an integer, like 0.0. Parameters ---------- data : JSON The JSON value to check. Returns ------- Bool True if the data is an intish float, False otherwise. """ return isinstance(data, float) and data.is_integer() def check_json_intish_str(data: JSON) -> TypeGuard[IntishStr]: """ Check if a JSON value is a string that represents an integer, like "0", "42", or "-5". Parameters ---------- data : JSON The JSON value to check. Returns ------- bool True if the data is a string representing an integer, False otherwise. """ if not isinstance(data, str): return False try: int(data) except ValueError: return False else: return True def check_json_floatish_str(data: JSON) -> TypeGuard[FloatishStr]: """ Check if a JSON value is a string that represents a float, like "3.14", "-2.5", or "0.0". Note: This function is intended to be used AFTER check_json_float_v2/v3, so it only handles regular string representations that those functions don't cover. Parameters ---------- data : JSON The JSON value to check. Returns ------- bool True if the data is a string representing a regular float, False otherwise. """ if not isinstance(data, str): return False try: float(data) except ValueError: return False else: return True def check_json_str(data: JSON) -> TypeGuard[str]: """ Check if a JSON value is a string. Parameters ---------- data : JSON The JSON value to check. Returns ------- Bool True if the data is a string, False otherwise. """ return bool(isinstance(data, str)) def check_json_bool(data: JSON) -> TypeGuard[bool]: """ Check if a JSON value is a boolean. Parameters ---------- data : JSON The JSON value to check. Returns ------- Bool True if the data is a boolean, False otherwise. """ return isinstance(data, bool) zarr-python-3.1.5/src/zarr/core/dtype/npy/complex.py000066400000000000000000000312001511007055700224630ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass from typing import ( TYPE_CHECKING, ClassVar, Literal, Self, TypeGuard, overload, ) import numpy as np from zarr.core.dtype.common import ( DataTypeValidationError, DTypeConfig_V2, DTypeJSON, HasEndianness, HasItemSize, check_dtype_spec_v2, ) from zarr.core.dtype.npy.common import ( ComplexLike, TComplexDType_co, TComplexScalar_co, check_json_complex_float_v2, check_json_complex_float_v3, complex_float_from_json_v2, complex_float_from_json_v3, complex_float_to_json_v2, complex_float_to_json_v3, endianness_to_numpy_str, get_endianness_from_numpy_dtype, ) from zarr.core.dtype.wrapper import TBaseDType, ZDType if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat @dataclass(frozen=True) class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness, HasItemSize): """ A base class for Zarr data types that wrap NumPy complex float data types. """ # This attribute holds the possible zarr v2 JSON names for the data type _zarr_v2_names: ClassVar[tuple[str, ...]] @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create an instance of this data type from a NumPy complex dtype. Parameters ---------- dtype : TBaseDType The native dtype to convert. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the dtype is not compatible with this data type. """ if cls._check_native_dtype(dtype): return cls(endianness=get_endianness_from_numpy_dtype(dtype)) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> TComplexDType_co: """ Convert this class to a NumPy complex dtype with the appropriate byte order. Returns ------- TComplexDType_co A NumPy data type object representing the complex data type with the specified byte order. """ byte_order = endianness_to_numpy_str(self.endianness) return self.dtype_cls().newbyteorder(byte_order) # type: ignore[return-value] @classmethod def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[str, None]]: """ Check that the input is a valid JSON representation of this data type. The input data must be a mapping that contains a "name" key that is one of the strings from cls._zarr_v2_names and an "object_codec_id" key that is None. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- bool True if the input is a valid JSON representation, False otherwise. """ return ( check_dtype_spec_v2(data) and data["name"] in cls._zarr_v2_names and data["object_codec_id"] is None ) @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[str]: """ Check that the input is a valid JSON representation of this data type in Zarr V3. This method verifies that the provided data matches the expected Zarr V3 representation, which is the string specified by the class-level attribute _zarr_v3_name. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- TypeGuard[str] True if the input is a valid representation of this class in Zarr V3, False otherwise. """ return data == cls._zarr_v3_name @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this class from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this class. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v2(data): # Going via numpy ensures that we get the endianness correct without # annoying string parsing. name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}." raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this class from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}." raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[str, None]: ... @overload def to_json(self, zarr_format: Literal[3]) -> str: ... def to_json(self, zarr_format: ZarrFormat) -> DTypeConfig_V2[str, None] | str: """ Serialize this object to a JSON-serializable representation. Parameters ---------- zarr_format : ZarrFormat The Zarr format version. Supported values are 2 and 3. Returns ------- DTypeConfig_V2[str, None] | str If ``zarr_format`` is 2, a dictionary with ``"name"`` and ``"object_codec_id"`` keys is returned. If ``zarr_format`` is 3, a string representation of the complex data type is returned. Raises ------ ValueError If `zarr_format` is not 2 or 3. """ if zarr_format == 2: return {"name": self.to_native_dtype().str, "object_codec_id": None} elif zarr_format == 3: return self._zarr_v3_name raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[ComplexLike]: """ Check that the input is a scalar complex value. Parameters ---------- data : object The value to check. Returns ------- TypeGuard[ComplexLike] True if the input is a scalar complex value, False otherwise. """ return isinstance(data, ComplexLike) def _cast_scalar_unchecked(self, data: ComplexLike) -> TComplexScalar_co: """ Cast the provided scalar data to the native scalar type of this class. Parameters ---------- data : ComplexLike The data to cast. Returns ------- TComplexScalar_co The casted data as a numpy complex scalar. Notes ----- This method does not perform any type checking. The input data must be a scalar complex value. """ return self.to_native_dtype().type(data) # type: ignore[return-value] def cast_scalar(self, data: object) -> TComplexScalar_co: """ Attempt to cast a given object to a numpy complex scalar. Parameters ---------- data : object The data to be cast to a numpy complex scalar. Returns ------- TComplexScalar_co The data cast as a numpy complex scalar. Raises ------ TypeError If the data cannot be converted to a numpy complex scalar. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) def default_scalar(self) -> TComplexScalar_co: """ Get the default value, which is 0 cast to this dtype Returns ------- Int scalar The default value. """ return self._cast_scalar_unchecked(0) def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> TComplexScalar_co: """ Read a JSON-serializable value as a numpy float. Parameters ---------- data : JSON The JSON-serializable value. zarr_format : ZarrFormat The zarr format version. Returns ------- TScalar_co The numpy float. """ if zarr_format == 2: if check_json_complex_float_v2(data): return self._cast_scalar_unchecked(complex_float_from_json_v2(data)) raise TypeError( f"Invalid type: {data}. Expected a float or a special string encoding of a float." ) elif zarr_format == 3: if check_json_complex_float_v3(data): return self._cast_scalar_unchecked(complex_float_from_json_v3(data)) raise TypeError( f"Invalid type: {data}. Expected a float or a special string encoding of a float." ) raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> JSON: """ Convert an object to a JSON-serializable float. Parameters ---------- data : _BaseScalar The value to convert. zarr_format : ZarrFormat The zarr format version. Returns ------- JSON The JSON-serializable form of the complex number, which is a list of two floats, each of which is encoding according to a zarr-format-specific encoding. """ if zarr_format == 2: return complex_float_to_json_v2(self.cast_scalar(data)) elif zarr_format == 3: return complex_float_to_json_v3(self.cast_scalar(data)) raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover @dataclass(frozen=True, kw_only=True) class Complex64(BaseComplex[np.dtypes.Complex64DType, np.complex64]): """ A Zarr data type for arrays containing 64 bit complex floats. Wraps the [`np.dtypes.Complex64DType`][numpy.dtypes.Complex64DType] data type. Scalars for this data type are instances of [`np.complex64`][numpy.complex64]. Attributes ---------- dtype_cls : Type[np.dtypes.Complex64DType] The numpy dtype class for this data type. _zarr_v3_name : ClassVar[Literal["complex64"]] The name of this data type in Zarr V3. _zarr_v2_names : ClassVar[tuple[Literal[">c8"], Literal["c8"], Literal["c8", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 8 @dataclass(frozen=True, kw_only=True) class Complex128(BaseComplex[np.dtypes.Complex128DType, np.complex128], HasEndianness): """ A Zarr data type for arrays containing 64 bit complex floats. Wraps the [`np.dtypes.Complex128DType`][numpy.dtypes.Complex128DType] data type. Scalars for this data type are instances of [`np.complex128`][numpy.complex128]. Attributes ---------- dtype_cls : Type[np.dtypes.Complex128DType] The numpy dtype class for this data type. _zarr_v3_name : ClassVar[Literal["complex128"]] The name of this data type in Zarr V3. _zarr_v2_names : ClassVar[tuple[Literal[">c16"], Literal["c16"], Literal["c16", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 16 zarr-python-3.1.5/src/zarr/core/dtype/npy/float.py000066400000000000000000000315431511007055700221330ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass from typing import TYPE_CHECKING, ClassVar, Literal, Self, TypeGuard, overload import numpy as np from zarr.core.dtype.common import ( DataTypeValidationError, DTypeConfig_V2, DTypeJSON, HasEndianness, HasItemSize, check_dtype_spec_v2, ) from zarr.core.dtype.npy.common import ( FloatLike, TFloatDType_co, TFloatScalar_co, check_json_float_v2, check_json_float_v3, check_json_floatish_str, endianness_to_numpy_str, float_from_json_v2, float_from_json_v3, float_to_json_v2, float_to_json_v3, get_endianness_from_numpy_dtype, ) from zarr.core.dtype.wrapper import TBaseDType, ZDType if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat @dataclass(frozen=True) class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness, HasItemSize): """ A base class for Zarr data types that wrap NumPy float data types. """ # This attribute holds the possible zarr v2 JSON names for the data type _zarr_v2_names: ClassVar[tuple[str, ...]] @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create an instance of this ZDType from a NumPy data type. Parameters ---------- dtype : TBaseDType The NumPy data type. Returns ------- Self An instance of this data type. """ if cls._check_native_dtype(dtype): return cls(endianness=get_endianness_from_numpy_dtype(dtype)) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> TFloatDType_co: """ Convert the wrapped data type to a NumPy data type. Returns ------- TFloatDType_co The NumPy data type. """ byte_order = endianness_to_numpy_str(self.endianness) return self.dtype_cls().newbyteorder(byte_order) # type: ignore[return-value] @classmethod def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[str, None]]: """ Check that the input is a valid JSON representation of this data type. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- TypeGuard[DTypeConfig_V2[str, None]] True if the input is a valid JSON representation of this data type, False otherwise. """ return ( check_dtype_spec_v2(data) and data["name"] in cls._zarr_v2_names and data["object_codec_id"] is None ) @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[str]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- TypeGuard[str] True if the input is a valid JSON representation of this class, False otherwise. """ return data == cls._zarr_v3_name @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this ZDType from Zarr v2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. """ if cls._check_json_v2(data): # Going via NumPy ensures that we get the endianness correct without # annoying string parsing. name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}." raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this ZDType from Zarr v3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}." raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[str, None]: ... @overload def to_json(self, zarr_format: Literal[3]) -> str: ... def to_json(self, zarr_format: ZarrFormat) -> DTypeConfig_V2[str, None] | str: """ Convert the wrapped data type to a JSON-serializable form. Parameters ---------- zarr_format : ZarrFormat The zarr format version. Returns ------- DTypeConfig_V2[str, None] or str The JSON-serializable representation of the wrapped data type. Raises ------ ValueError If zarr_format is not 2 or 3. """ if zarr_format == 2: return {"name": self.to_native_dtype().str, "object_codec_id": None} elif zarr_format == 3: return self._zarr_v3_name raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[FloatLike]: """ Check that the input is a valid scalar value. Parameters ---------- data : object The input to check. Returns ------- TypeGuard[FloatLike] True if the input is a valid scalar value, False otherwise. """ return isinstance(data, FloatLike) def _cast_scalar_unchecked(self, data: FloatLike) -> TFloatScalar_co: """ Cast a scalar value to a NumPy float scalar. Parameters ---------- data : FloatLike The scalar value to cast. Returns ------- TFloatScalar_co The NumPy float scalar. """ return self.to_native_dtype().type(data) # type: ignore[return-value] def cast_scalar(self, data: object) -> TFloatScalar_co: """ Cast a scalar value to a NumPy float scalar. Parameters ---------- data : object The scalar value to cast. Returns ------- TFloatScalar_co The NumPy float scalar. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) def default_scalar(self) -> TFloatScalar_co: """ Get the default value, which is 0 cast to this zdtype. Returns ------- TFloatScalar_co The default value. """ return self._cast_scalar_unchecked(0) def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> TFloatScalar_co: """ Read a JSON-serializable value as a NumPy float scalar. Parameters ---------- data : JSON The JSON-serializable value. zarr_format : ZarrFormat The zarr format version. Returns ------- TFloatScalar_co The NumPy float scalar. """ if zarr_format == 2: if check_json_float_v2(data): return self._cast_scalar_unchecked(float_from_json_v2(data)) elif check_json_floatish_str(data): return self._cast_scalar_unchecked(float(data)) else: raise TypeError( f"Invalid type: {data}. Expected a float or a special string encoding of a float." ) elif zarr_format == 3: if check_json_float_v3(data): return self._cast_scalar_unchecked(float_from_json_v3(data)) elif check_json_floatish_str(data): return self._cast_scalar_unchecked(float(data)) else: raise TypeError( f"Invalid type: {data}. Expected a float or a special string encoding of a float." ) else: raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> float | str: """ Convert an object to a JSON-serializable float. Parameters ---------- data : _BaseScalar The value to convert. zarr_format : ZarrFormat The zarr format version. Returns ------- JSON The JSON-serializable form of the float, which is potentially a number or a string. See the zarr specifications for details on the JSON encoding for floats. """ if zarr_format == 2: return float_to_json_v2(self.cast_scalar(data)) elif zarr_format == 3: return float_to_json_v3(self.cast_scalar(data)) else: raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover @dataclass(frozen=True, kw_only=True) class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]): """ A Zarr data type for arrays containing 16-bit floating point numbers. Wraps the [`np.dtypes.Float16DType`][numpy.dtypes.Float16DType] data type. Scalars for this data type are instances of [`np.float16`][numpy.float16]. Attributes ---------- dtype_cls : Type[np.dtypes.Float16DType] The NumPy dtype class for this data type. References ---------- This class implements the float16 data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Float16DType _zarr_v3_name = "float16" _zarr_v2_names: ClassVar[tuple[Literal[">f2"], Literal["f2", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 2 @dataclass(frozen=True, kw_only=True) class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]): """ A Zarr data type for arrays containing 32-bit floating point numbers. Wraps the [`np.dtypes.Float32DType`][numpy.dtypes.Float32DType] data type. Scalars for this data type are instances of [`np.float32`][numpy.float32]. Attributes ---------- dtype_cls : Type[np.dtypes.Float32DType] The NumPy dtype class for this data type. References ---------- This class implements the float32 data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Float32DType _zarr_v3_name = "float32" _zarr_v2_names: ClassVar[tuple[Literal[">f4"], Literal["f4", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 4 @dataclass(frozen=True, kw_only=True) class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]): """ A Zarr data type for arrays containing 64-bit floating point numbers. Wraps the [`np.dtypes.Float64DType`][numpy.dtypes.Float64DType] data type. Scalars for this data type are instances of [`np.float64`][numpy.float64]. Attributes ---------- dtype_cls : Type[np.dtypes.Float64DType] The NumPy dtype class for this data type. References ---------- This class implements the float64 data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Float64DType _zarr_v3_name = "float64" _zarr_v2_names: ClassVar[tuple[Literal[">f8"], Literal["f8", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 8 zarr-python-3.1.5/src/zarr/core/dtype/npy/int.py000066400000000000000000001340231511007055700216150ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass from typing import ( TYPE_CHECKING, ClassVar, Literal, Self, SupportsIndex, SupportsInt, TypeGuard, TypeVar, overload, ) import numpy as np from zarr.core.dtype.common import ( DataTypeValidationError, DTypeConfig_V2, DTypeJSON, HasEndianness, HasItemSize, check_dtype_spec_v2, ) from zarr.core.dtype.npy.common import ( check_json_int, check_json_intish_float, check_json_intish_str, endianness_to_numpy_str, get_endianness_from_numpy_dtype, ) from zarr.core.dtype.wrapper import TBaseDType, ZDType if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat _NumpyIntDType = ( np.dtypes.Int8DType | np.dtypes.Int16DType | np.dtypes.Int32DType | np.dtypes.Int64DType | np.dtypes.UInt8DType | np.dtypes.UInt16DType | np.dtypes.UInt32DType | np.dtypes.UInt64DType ) _NumpyIntScalar = ( np.int8 | np.int16 | np.int32 | np.int64 | np.uint8 | np.uint16 | np.uint32 | np.uint64 ) TIntDType_co = TypeVar("TIntDType_co", bound=_NumpyIntDType, covariant=True) TIntScalar_co = TypeVar("TIntScalar_co", bound=_NumpyIntScalar, covariant=True) IntLike = SupportsInt | SupportsIndex | bytes | str @dataclass(frozen=True) class BaseInt(ZDType[TIntDType_co, TIntScalar_co], HasItemSize): """ A base class for integer data types in Zarr. This class provides methods for serialization and deserialization of integer types in both Zarr v2 and v3 formats, as well as methods for checking and casting scalars. """ _zarr_v2_names: ClassVar[tuple[str, ...]] @classmethod def _check_json_v2(cls, data: object) -> TypeGuard[DTypeConfig_V2[str, None]]: """ Check that the input is a valid JSON representation of this integer data type in Zarr V2. This method verifies that the provided data matches the expected Zarr V2 representation for this data type. The input data must be a mapping that contains a "name" key that is one of the strings from cls._zarr_v2_names and an "object_codec_id" key that is None. Parameters ---------- data : object The JSON data to check. Returns ------- TypeGuard[DTypeConfig_V2[str, None]] True if the input is a valid representation of this class in Zarr V2, False otherwise. """ return ( check_dtype_spec_v2(data) and data["name"] in cls._zarr_v2_names and data["object_codec_id"] is None ) @classmethod def _check_json_v3(cls, data: object) -> TypeGuard[str]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : object The JSON data to check. Returns ------- TypeGuard[str] True if the input is a valid representation of this class in Zarr v3, False otherwise. """ return data == cls._zarr_v3_name def _check_scalar(self, data: object) -> TypeGuard[IntLike]: """ Check if the input object is of an IntLike type. This method verifies whether the provided data can be considered as an integer-like value, which includes objects supporting integer conversion. Parameters ---------- data : object The data to check. Returns ------- TypeGuard[IntLike] True if the data is IntLike, False otherwise. """ return isinstance(data, IntLike) def _cast_scalar_unchecked(self, data: IntLike) -> TIntScalar_co: """ Casts a given scalar value to the native integer scalar type without type checking. Parameters ---------- data : IntLike The scalar value to cast. Returns ------- TIntScalar_co The casted integer scalar of the native dtype. """ return self.to_native_dtype().type(data) # type: ignore[return-value] def cast_scalar(self, data: object) -> TIntScalar_co: """ Attempt to cast a given object to a NumPy integer scalar. Parameters ---------- data : object The data to be cast to a NumPy integer scalar. Returns ------- TIntScalar_co The data cast as a NumPy integer scalar. Raises ------ TypeError If the data cannot be converted to a NumPy integer scalar. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) def default_scalar(self) -> TIntScalar_co: """ Get the default value, which is 0 cast to this dtype. Returns ------- TIntScalar_co The default value. """ return self._cast_scalar_unchecked(0) def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> TIntScalar_co: """ Read a JSON-serializable value as a NumPy int scalar. Parameters ---------- data : JSON The JSON-serializable value. zarr_format : ZarrFormat The Zarr format version. Returns ------- TIntScalar_co The NumPy int scalar. Raises ------ TypeError If the input is not a valid integer type. """ if check_json_int(data): return self._cast_scalar_unchecked(data) if check_json_intish_float(data): return self._cast_scalar_unchecked(int(data)) if check_json_intish_str(data): return self._cast_scalar_unchecked(int(data)) raise TypeError(f"Invalid type: {data}. Expected an integer.") def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> int: """ Convert an object to a JSON serializable scalar. For the integer data types, the JSON form is a plain integer. Parameters ---------- data : object The value to convert. zarr_format : ZarrFormat The Zarr format version. Returns ------- int The JSON-serializable form of the scalar. """ return int(self.cast_scalar(data)) @dataclass(frozen=True, kw_only=True) class Int8(BaseInt[np.dtypes.Int8DType, np.int8]): """ A Zarr data type for arrays containing 8-bit signed integers. Wraps the [`np.dtypes.Int8DType`][numpy.dtypes.Int8DType] data type. Scalars for this data type are instances of [`np.int8`][numpy.int8]. Attributes ---------- dtype_cls : np.dtypes.Int8DType The class of the underlying NumPy dtype. References ---------- This class implements the 8-bit signed integer data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int8DType _zarr_v3_name: ClassVar[Literal["int8"]] = "int8" _zarr_v2_names: ClassVar[tuple[Literal["|i1"]]] = ("|i1",) @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create an Int8 from a np.dtype('int8') instance. Parameters ---------- dtype : TBaseDType The np.dtype('int8') instance. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data type is not a valid representation of this class Int8. """ if cls._check_native_dtype(dtype): return cls() raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self: Self) -> np.dtypes.Int8DType: """ Convert the Int8 instance to a np.dtype('int8') instance. Returns ------- np.dtypes.Int8DType The np.dtype('int8') instance. """ return self.dtype_cls() @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an Int8 from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class Int8. """ if cls._check_json_v2(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}" raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an Int8 from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class Int8. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal["|i1"], None]: ... @overload def to_json(self, zarr_format: Literal[3]) -> Literal["int8"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal["|i1"], None] | Literal["int8"]: """ Convert the data type to a JSON-serializable form. Parameters ---------- zarr_format : ZarrFormat The Zarr format version. Returns ------- ``DTypeConfig_V2[Literal["|i1"], None] | Literal["int8"]`` The JSON-serializable representation of the data type. Raises ------ ValueError If the zarr_format is not 2 or 3. """ if zarr_format == 2: return {"name": self._zarr_v2_names[0], "object_codec_id": None} elif zarr_format == 3: return self._zarr_v3_name raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover @property def item_size(self) -> int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 1 @dataclass(frozen=True, kw_only=True) class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]): """ A Zarr data type for arrays containing 8-bit unsigned integers. Wraps the [`np.dtypes.UInt8DType`][numpy.dtypes.UInt8DType] data type. Scalars for this data type are instances of [`np.uint8`][numpy.uint8]. Attributes ---------- dtype_cls : np.dtypes.UInt8DType The class of the underlying NumPy dtype. References ---------- This class implements the 8-bit unsigned integer data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt8DType _zarr_v3_name: ClassVar[Literal["uint8"]] = "uint8" _zarr_v2_names: ClassVar[tuple[Literal["|u1"]]] = ("|u1",) @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create a UInt8 from a np.dtype('uint8') instance. """ if cls._check_native_dtype(dtype): return cls() raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self: Self) -> np.dtypes.UInt8DType: """ Create a NumPy unsigned 8-bit integer dtype instance from this UInt8 ZDType. Returns ------- np.dtypes.UInt8DType The NumPy unsigned 8-bit integer dtype. """ return self.dtype_cls() @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v2(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}" raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal["|u1"], None]: ... @overload def to_json(self, zarr_format: Literal[3]) -> Literal["uint8"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal["|u1"], None] | Literal["uint8"]: """ Convert the data type to a JSON-serializable form. Parameters ---------- zarr_format : ZarrFormat The Zarr format version. Supported values are 2 and 3. Returns ------- ``DTypeConfig_V2[Literal["|u1"], None] | Literal["uint8"]`` The JSON-serializable representation of the data type. Raises ------ ValueError If `zarr_format` is not 2 or 3. """ if zarr_format == 2: # For Zarr format version 2, return a dictionary with the name and object codec ID. return {"name": self._zarr_v2_names[0], "object_codec_id": None} elif zarr_format == 3: # For Zarr format version 3, return the v3 name as a string. return self._zarr_v3_name # Raise an error if the zarr_format is neither 2 nor 3. raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover @property def item_size(self) -> int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 1 @dataclass(frozen=True, kw_only=True) class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness): """ A Zarr data type for arrays containing 16-bit signed integers. Wraps the [`np.dtypes.Int16DType`][numpy.dtypes.Int16DType] data type. Scalars for this data type are instances of [`np.int16`][numpy.int16]. Attributes ---------- dtype_cls : np.dtypes.Int16DType The class of the underlying NumPy dtype. References ---------- This class implements the 16-bit signed integer data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int16DType _zarr_v3_name: ClassVar[Literal["int16"]] = "int16" _zarr_v2_names: ClassVar[tuple[Literal[">i2"], Literal["i2", " Self: """ Create an instance of this data type from a np.dtype('int16') instance. Parameters ---------- dtype : np.dtype The instance of np.dtype('int16') to create from. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data type is not an instance of np.dtype('int16'). """ if cls._check_native_dtype(dtype): return cls(endianness=get_endianness_from_numpy_dtype(dtype)) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> np.dtypes.Int16DType: """ Convert the data type to a np.dtype('int16') instance. Returns ------- np.dtype The np.dtype('int16') instance. """ byte_order = endianness_to_numpy_str(self.endianness) return self.dtype_cls().newbyteorder(byte_order) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v2(data): # Going via NumPy ensures that we get the endianness correct without # annoying string parsing. name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names!r}." raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">i2", " Literal["int16"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal[">i2", "i2", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 2 @dataclass(frozen=True, kw_only=True) class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness): """ A Zarr data type for arrays containing 16-bit unsigned integers. Wraps the [`np.dtypes.UInt16DType`][numpy.dtypes.UInt16DType] data type. Scalars for this data type are instances of [`np.uint16`][numpy.uint16]. Attributes ---------- dtype_cls : np.dtypes.UInt16DType The class of the underlying NumPy dtype. References ---------- This class implements the unsigned 16-bit unsigned integer data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt16DType _zarr_v3_name: ClassVar[Literal["uint16"]] = "uint16" _zarr_v2_names: ClassVar[tuple[Literal[">u2"], Literal["u2", " Self: """ Create an instance of this data type from a np.dtype('uint16') instance. Parameters ---------- dtype : np.dtype The NumPy data type. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data type is not an instance of np.dtype('uint16'). """ if cls._check_native_dtype(dtype): return cls(endianness=get_endianness_from_numpy_dtype(dtype)) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> np.dtypes.UInt16DType: """ Convert the data type to a np.dtype('uint16') instance. Returns ------- np.dtype The np.dtype('uint16') instance. """ byte_order = endianness_to_numpy_str(self.endianness) return self.dtype_cls().newbyteorder(byte_order) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v2(data): # Going via NumPy ensures that we get the endianness correct without # annoying string parsing. name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected one of the strings {cls._zarr_v2_names}." raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">u2", " Literal["uint16"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal[">u2", "u2", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 2 @dataclass(frozen=True, kw_only=True) class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness): """ A Zarr data type for arrays containing 32-bit signed integers. Wraps the [`np.dtypes.Int32DType`][numpy.dtypes.Int32DType] data type. Scalars for this data type are instances of [`np.int32`][numpy.int32]. Attributes ---------- dtype_cls : np.dtypes.Int32DType The class of the underlying NumPy dtype. References ---------- This class implements the 32-bit signed integer data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int32DType _zarr_v3_name: ClassVar[Literal["int32"]] = "int32" _zarr_v2_names: ClassVar[tuple[Literal[">i4"], Literal["i4", " TypeGuard[np.dtypes.Int32DType]: """ A type guard that checks if the input is assignable to the type of ``cls.dtype_class`` This method is overridden for this particular data type because of a Windows-specific issue where np.dtype('i') creates an instance of ``np.dtypes.IntDType``, rather than an instance of ``np.dtypes.Int32DType``, even though both represent 32-bit signed integers. Parameters ---------- dtype : TDType The dtype to check. Returns ------- Bool True if the dtype matches, False otherwise. """ return super()._check_native_dtype(dtype) or dtype == np.dtypes.Int32DType() @classmethod def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self: """ Create an Int32 from a np.dtype('int32') instance. Parameters ---------- dtype : TBaseDType The np.dtype('int32') instance. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class Int32. """ if cls._check_native_dtype(dtype): return cls(endianness=get_endianness_from_numpy_dtype(dtype)) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self: Self) -> np.dtypes.Int32DType: """ Convert the Int32 instance to a np.dtype('int32') instance. Returns ------- np.dtypes.Int32DType The np.dtype('int32') instance. """ byte_order = endianness_to_numpy_str(self.endianness) return self.dtype_cls().newbyteorder(byte_order) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an Int32 from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class Int32. """ if cls._check_json_v2(data): # Going via NumPy ensures that we get the endianness correct without # annoying string parsing. name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names!r}." raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an Int32 from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class Int32. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">i4", " Literal["int32"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal[">i4", "i4", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 4 @dataclass(frozen=True, kw_only=True) class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness): """ A Zarr data type for arrays containing 32-bit unsigned integers. Wraps the [`np.dtypes.UInt32DType`][numpy.dtypes.UInt32DType] data type. Scalars for this data type are instances of [`np.uint32`][numpy.uint32]. Attributes ---------- dtype_cls : np.dtypes.UInt32DType The class of the underlying NumPy dtype. References ---------- This class implements the 32-bit unsigned integer data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt32DType _zarr_v3_name: ClassVar[Literal["uint32"]] = "uint32" _zarr_v2_names: ClassVar[tuple[Literal[">u4"], Literal["u4", " Self: """ Create a UInt32 from a np.dtype('uint32') instance. Parameters ---------- dtype : TBaseDType The NumPy data type. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data type is not a valid representation of this class 32-bit unsigned integer. """ if cls._check_native_dtype(dtype): return cls(endianness=get_endianness_from_numpy_dtype(dtype)) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> np.dtypes.UInt32DType: """ Create a NumPy unsigned 32-bit integer dtype instance from this UInt32 ZDType. Returns ------- np.dtypes.UInt32DType The NumPy unsigned 32-bit integer dtype. """ byte_order = endianness_to_numpy_str(self.endianness) return self.dtype_cls().newbyteorder(byte_order) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class 32-bit unsigned integer. """ if cls._check_json_v2(data): # Going via NumPy ensures that we get the endianness correct without # annoying string parsing. name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}." raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class 32-bit unsigned integer. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">u4", " Literal["uint32"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal[">u4", "u4", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 4 @dataclass(frozen=True, kw_only=True) class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness): """ A Zarr data type for arrays containing 64-bit signed integers. Wraps the [`np.dtypes.Int64DType`][numpy.dtypes.Int64DType] data type. Scalars for this data type are instances of [`np.int64`][numpy.int64]. Attributes ---------- dtype_cls : np.dtypes.Int64DType The class of the underlying NumPy dtype. References ---------- This class implements the 64-bit signed integer data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.Int64DType _zarr_v3_name: ClassVar[Literal["int64"]] = "int64" _zarr_v2_names: ClassVar[tuple[Literal[">i8"], Literal["i8", " Self: """ Create an Int64 from a np.dtype('int64') instance. Parameters ---------- dtype : TBaseDType The NumPy data type. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data type is not a valid representation of this class 64-bit signed integer. """ if cls._check_native_dtype(dtype): return cls(endianness=get_endianness_from_numpy_dtype(dtype)) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> np.dtypes.Int64DType: """ Create a NumPy signed 64-bit integer dtype instance from this Int64 ZDType. Returns ------- np.dtypes.Int64DType The NumPy signed 64-bit integer dtype. """ byte_order = endianness_to_numpy_str(self.endianness) return self.dtype_cls().newbyteorder(byte_order) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class 64-bit signed integer. """ if cls._check_json_v2(data): # Going via NumPy ensures that we get the endianness correct without # annoying string parsing. name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}." raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class 64-bit signed integer. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">i8", " Literal["int64"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal[">i8", "i8", " int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 8 @dataclass(frozen=True, kw_only=True) class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness): """ A Zarr data type for arrays containing 64-bit unsigned integers. Wraps the [`np.dtypes.UInt64DType`][numpy.dtypes.UInt64DType] data type. Scalars for this data type are instances of [`np.uint64`][numpy.uint64]. Attributes ---------- dtype_cls: np.dtypes.UInt64DType The class of the underlying NumPy dtype. References ---------- This class implements the unsigned 64-bit integer data type defined in Zarr V2 and V3. See the [Zarr V2](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding) and [Zarr V3](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v3/data-types/index.rst) specification documents for details. """ dtype_cls = np.dtypes.UInt64DType _zarr_v3_name: ClassVar[Literal["uint64"]] = "uint64" _zarr_v2_names: ClassVar[tuple[Literal[">u8"], Literal["u8", " np.dtypes.UInt64DType: """ Convert the data type to a native NumPy dtype. Returns ------- np.dtypes.UInt64DType The native NumPy dtype.eeeeeeeeeeeeeeeee """ byte_order = endianness_to_numpy_str(self.endianness) return self.dtype_cls().newbyteorder(byte_order) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class unsigned 64-bit integer. """ if cls._check_json_v2(data): # Going via NumPy ensures that we get the endianness correct without # annoying string parsing. name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}." raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from Zarr V3-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class unsigned 64-bit integer. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">u8", " Literal["uint64"]: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[Literal[">u8", "u8", " Self: """ Create an instance of this data type from a native NumPy dtype. Parameters ---------- dtype : TBaseDType The native NumPy dtype. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input dtype is not a valid representation of this class unsigned 64-bit integer. """ if cls._check_native_dtype(dtype): return cls(endianness=get_endianness_from_numpy_dtype(dtype)) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) @property def item_size(self) -> int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 8 zarr-python-3.1.5/src/zarr/core/dtype/npy/string.py000066400000000000000000000557251511007055700223440ustar00rootroot00000000000000from __future__ import annotations import re from dataclasses import dataclass from typing import ( TYPE_CHECKING, ClassVar, Literal, Protocol, Self, TypedDict, TypeGuard, overload, runtime_checkable, ) import numpy as np from zarr.core.common import NamedConfig from zarr.core.dtype.common import ( DataTypeValidationError, DTypeConfig_V2, DTypeJSON, HasEndianness, HasItemSize, HasLength, HasObjectCodec, check_dtype_spec_v2, v3_unstable_dtype_warning, ) from zarr.core.dtype.npy.common import ( check_json_str, endianness_to_numpy_str, get_endianness_from_numpy_dtype, ) from zarr.core.dtype.wrapper import TDType_co, ZDType if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat from zarr.core.dtype.wrapper import TBaseDType _NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType") @runtime_checkable class SupportsStr(Protocol): def __str__(self) -> str: ... class LengthBytesConfig(TypedDict): """ Configuration for a fixed-length string data type in Zarr V3. Attributes ---------- length_bytes : int The length in bytes of the data associated with this configuration. """ length_bytes: int class FixedLengthUTF32JSON_V2(DTypeConfig_V2[str, None]): """ A wrapper around the JSON representation of the ``FixedLengthUTF32`` data type in Zarr V2. The ``name`` field of this class contains the value that would appear under the ``dtype`` field in Zarr V2 array metadata. References ---------- The structure of the ``name`` field is defined in the Zarr V2 [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- ```python { "name": " None: """ We don't allow instances of this class with length less than 1 because there is no way such a data type can contain actual data. """ if self.length < 1: raise ValueError(f"length must be >= 1, got {self.length}.") @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create a FixedLengthUTF32 from a NumPy data type. Parameters ---------- dtype : TBaseDType The NumPy data type. Returns ------- Self An instance of this data type. """ if cls._check_native_dtype(dtype): endianness = get_endianness_from_numpy_dtype(dtype) return cls( length=dtype.itemsize // (cls.code_point_bytes), endianness=endianness, ) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> np.dtypes.StrDType[int]: """ Convert the FixedLengthUTF32 instance to a NumPy data type. Returns ------- np.dtypes.StrDType[int] The NumPy data type. """ byte_order = endianness_to_numpy_str(self.endianness) return self.dtype_cls(self.length).newbyteorder(byte_order) @classmethod def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[FixedLengthUTF32JSON_V2]: """ Check that the input is a valid JSON representation of a NumPy U dtype. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- TypeGuard[FixedLengthUTF32JSON_V2] Whether the input is a valid JSON representation of a NumPy U dtype. """ return ( check_dtype_spec_v2(data) and isinstance(data["name"], str) and re.match(r"^[><]U\d+$", data["name"]) is not None and data["object_codec_id"] is None ) @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[FixedLengthUTF32JSON_V3]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- TypeGuard[FixedLengthUTF32JSONV3] Whether the input is a valid JSON representation of a NumPy U dtype. """ return ( isinstance(data, dict) and set(data.keys()) == {"name", "configuration"} and data["name"] == cls._zarr_v3_name and "configuration" in data and isinstance(data["configuration"], dict) and set(data["configuration"].keys()) == {"length_bytes"} and isinstance(data["configuration"]["length_bytes"], int) ) @overload def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[str, None]: ... @overload def to_json(self, zarr_format: Literal[3]) -> FixedLengthUTF32JSON_V3: ... def to_json( self, zarr_format: ZarrFormat ) -> DTypeConfig_V2[str, None] | FixedLengthUTF32JSON_V3: """ Convert the FixedLengthUTF32 instance to a JSON representation. Parameters ---------- zarr_format : ZarrFormat The Zarr format to use. Returns ------- DTypeConfig_V2[str, None] | FixedLengthUTF32JSON_V3 The JSON representation of the data type. """ if zarr_format == 2: return {"name": self.to_native_dtype().str, "object_codec_id": None} elif zarr_format == 3: v3_unstable_dtype_warning(self) return { "name": self._zarr_v3_name, "configuration": {"length_bytes": self.length * self.code_point_bytes}, } raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create a FixedLengthUTF32 from a JSON representation of a NumPy U dtype. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. """ if cls._check_json_v2(data): # Construct the NumPy dtype instead of string parsing. name = data["name"] return cls.from_native_dtype(np.dtype(name)) raise DataTypeValidationError( f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string representation of a NumPy U dtype." ) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create a FixedLengthUTF32 from a JSON representation of a NumPy U dtype. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- Self An instance of this data type. """ if cls._check_json_v3(data): return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}." raise DataTypeValidationError(msg) def default_scalar(self) -> np.str_: """ Return the default scalar value for this data type. Returns ------- ``np.str_`` The default scalar value. """ return np.str_("") def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: """ Convert the scalar value to a JSON representation. Parameters ---------- data : object The scalar value. zarr_format : ZarrFormat The Zarr format to use. Returns ------- str The JSON representation of the scalar value. """ return str(data) def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_: """ Convert the JSON representation of a scalar value to the native scalar value. Parameters ---------- data : JSON The JSON data. zarr_format : ZarrFormat The Zarr format to use. Returns ------- ``np.str_`` The native scalar value. """ if check_json_str(data): return self.to_native_dtype().type(data) raise TypeError(f"Invalid type: {data}. Expected a string.") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[SupportsStr]: """ Check that the input is a valid scalar value for this data type. Parameters ---------- data : object The scalar value. Returns ------- TypeGuard[SupportsStr] Whether the input is a valid scalar value for this data type. """ # this is generous for backwards compatibility return isinstance(data, SupportsStr) def cast_scalar(self, data: object) -> np.str_: """ Cast the scalar value to the native scalar value. Parameters ---------- data : object The scalar value. Returns ------- ``np.str_`` The native scalar value. """ if self._check_scalar(data): # We explicitly truncate before casting because of the following NumPy behavior: # >>> x = np.dtype('U3').type('hello world') # >>> x # np.str_('hello world') # >>> x.dtype # dtype('U11') return self.to_native_dtype().type(str(data)[: self.length]) msg = ( # pragma: no cover f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) # pragma: no-cover @property def item_size(self) -> int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return self.length * self.code_point_bytes def check_vlen_string_json_scalar(data: object) -> TypeGuard[int | str | float]: """ Check if the input is a valid JSON scalar for a variable-length string. This function is generous for backwards compatibility, as Zarr Python v2 would use ints for variable-length string fill values. Parameters ---------- data : object The JSON value to check. Returns ------- TypeGuard[int | str | float] True if the input is a valid scalar for a variable-length string. """ return isinstance(data, int | str | float) class VariableLengthUTF8JSON_V2(DTypeConfig_V2[Literal["|O"], Literal["vlen-utf8"]]): """ A wrapper around the JSON representation of the ``VariableLengthUTF8`` data type in Zarr V2. The ``name`` field of this class contains the value that would appear under the ``dtype`` field in Zarr V2 array metadata. The ``object_codec_id`` field is always ``"vlen-utf8"``. References ---------- The structure of the ``name`` field is defined in the Zarr V2 [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- ```python { "name": "|O", "object_codec_id": "vlen-utf8" } ``` """ # VariableLengthUTF8 is defined in two places, conditioned on the version of NumPy. # If NumPy 2 is installed, then VariableLengthUTF8 is defined with the NumPy variable length # string dtype as the native dtype. Otherwise, VariableLengthUTF8 is defined with the NumPy object # dtype as the native dtype. class UTF8Base(ZDType[TDType_co, str], HasObjectCodec): """ A base class for variable-length UTF-8 string data types. Not intended for direct use, but as a base for concrete implementations. Attributes ---------- object_codec_id : ClassVar[Literal["vlen-utf8"]] The object codec ID for this data type. References ---------- This data type does not have a Zarr V3 specification. The Zarr V2 data type specification can be found [here](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). """ _zarr_v3_name: ClassVar[Literal["string"]] = "string" object_codec_id: ClassVar[Literal["vlen-utf8"]] = "vlen-utf8" @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create an instance of this data type from a compatible NumPy data type. Parameters ---------- dtype : TBaseDType The native data type. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input is not compatible with this data type. """ if cls._check_native_dtype(dtype): return cls() raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) @classmethod def _check_json_v2( cls, data: DTypeJSON, ) -> TypeGuard[VariableLengthUTF8JSON_V2]: """ "Check if the input is a valid JSON representation of a variable-length UTF-8 string dtype for Zarr v2." Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- ``TypeGuard[VariableLengthUTF8JSON_V2]`` Whether the input is a valid JSON representation of a NumPy "object" data type, and that the object codec id is appropriate for variable-length UTF-8 strings. """ return ( check_dtype_spec_v2(data) and data["name"] == "|O" and data["object_codec_id"] == cls.object_codec_id ) @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["variable_length_utf8"]]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- TypeGuard[Literal["variable_length_utf8"]] Whether the input is a valid JSON representation of a variable length UTF-8 string data type. """ return data == cls._zarr_v3_name @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this class from a JSON representation of a NumPy "object" dtype. Parameters ---------- data : DTypeJSON The JSON data to create an instance from. Returns ------- Self An instance of this data type. """ if cls._check_json_v2(data): return cls() msg = ( f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string '|O'" ) raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this class from a JSON representation of a variable length UTF-8 string data type. Parameters ---------- data : DTypeJSON The JSON data to create an instance from. Returns ------- Self An instance of this data type. """ if cls._check_json_v3(data): return cls() msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}." raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> VariableLengthUTF8JSON_V2: ... @overload def to_json(self, zarr_format: Literal[3]) -> Literal["string"]: ... def to_json(self, zarr_format: ZarrFormat) -> VariableLengthUTF8JSON_V2 | Literal["string"]: """ Convert this data type to a JSON representation. Parameters ---------- zarr_format : int The zarr format to use for the JSON representation. Returns ------- ``VariableLengthUTF8JSON_V2 | Literal["string"]`` The JSON representation of this data type. """ if zarr_format == 2: return {"name": "|O", "object_codec_id": self.object_codec_id} elif zarr_format == 3: return self._zarr_v3_name raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def default_scalar(self) -> str: """ Return the default scalar value for this data type. Returns ------- str The default scalar value. """ return "" def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: """ Convert a scalar value to a JSON representation. Parameters ---------- data : object The scalar value to convert. zarr_format : int The zarr format to use for the JSON representation. Returns ------- str The JSON representation of the scalar value. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) raise TypeError(f"Invalid type: {data}. Expected a string.") def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str: """ Convert a JSON representation of a scalar value to the native scalar type. Parameters ---------- data : JSON The JSON representation of the scalar value. zarr_format : int The zarr format to use for the JSON representation. Returns ------- str The native scalar type of the scalar value. """ if not check_vlen_string_json_scalar(data): raise TypeError(f"Invalid type: {data}. Expected a string or number.") return str(data) def _check_scalar(self, data: object) -> TypeGuard[SupportsStr]: """ Check that the input is a valid scalar value for this data type. Parameters ---------- data : object The scalar value to check. Returns ------- TypeGuard[SupportsStr] Whether the input is a valid scalar value for this data type. """ return isinstance(data, SupportsStr) def _cast_scalar_unchecked(self, data: SupportsStr) -> str: """ Cast a scalar value to a string. Parameters ---------- data : object The scalar value to cast. Returns ------- str The string representation of the scalar value. """ return str(data) def cast_scalar(self, data: object) -> str: """ Cast an object to a string. Parameters ---------- data : object The value to cast. Returns ------- str The input cast to str. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( # pragma: no cover f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) # pragma: no cover if _NUMPY_SUPPORTS_VLEN_STRING: @dataclass(frozen=True, kw_only=True) class VariableLengthUTF8(UTF8Base[np.dtypes.StringDType]): # type: ignore[type-var] """ A Zarr data type for arrays containing variable-length UTF-8 strings. Wraps the ``np.dtypes.StringDType`` data type. Scalars for this data type are instances of ``str``. Attributes ---------- dtype_cls : Type[np.dtypes.StringDType] The NumPy dtype class for this data type. _zarr_v3_name : ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8" The name of this data type in Zarr V3. object_codec_id : ClassVar[Literal["vlen-utf8"]] = "vlen-utf8" The object codec ID for this data type. """ dtype_cls = np.dtypes.StringDType def to_native_dtype(self) -> np.dtypes.StringDType: """ Create a NumPy string dtype from this VariableLengthUTF8 ZDType. Returns ------- np.dtypes.StringDType The NumPy string dtype. """ return self.dtype_cls() else: # Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead. @dataclass(frozen=True, kw_only=True) class VariableLengthUTF8(UTF8Base[np.dtypes.ObjectDType]): # type: ignore[no-redef] """ A Zarr data type for arrays containing variable-length UTF-8 strings. Wraps the ``np.dtypes.ObjectDType`` data type. Scalars for this data type are instances of ``str``. Attributes ---------- dtype_cls : Type[np.dtypes.ObjectDType] The NumPy dtype class for this data type. _zarr_v3_name : ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8" The name of this data type in Zarr V3. object_codec_id : ClassVar[Literal["vlen-utf8"]] = "vlen-utf8" The object codec ID for this data type. """ dtype_cls = np.dtypes.ObjectDType def to_native_dtype(self) -> np.dtypes.ObjectDType: """ Create a NumPy object dtype from this VariableLengthUTF8 ZDType. Returns ------- np.dtypes.ObjectDType The NumPy object dtype. """ return self.dtype_cls() zarr-python-3.1.5/src/zarr/core/dtype/npy/structured.py000066400000000000000000000357441511007055700232410ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Sequence from dataclasses import dataclass from typing import TYPE_CHECKING, ClassVar, Literal, Self, TypeGuard, cast, overload import numpy as np from zarr.core.common import NamedConfig from zarr.core.dtype.common import ( DataTypeValidationError, DTypeConfig_V2, DTypeJSON, HasItemSize, StructuredName_V2, check_dtype_spec_v2, check_structured_dtype_name_v2, v3_unstable_dtype_warning, ) from zarr.core.dtype.npy.common import ( bytes_from_json, bytes_to_json, check_json_str, ) from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat StructuredScalarLike = list[object] | tuple[object, ...] | bytes | int class StructuredJSON_V2(DTypeConfig_V2[StructuredName_V2, None]): """ A wrapper around the JSON representation of the ``Structured`` data type in Zarr V2. The ``name`` field is a sequence of sequences, where each inner sequence has two values: the field name and the data type name for that field (which could be another sequence). The data type names are strings, and the object codec ID is always None. References ---------- The structure of the ``name`` field is defined in the Zarr V2 [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- ```python { "name": [ ["f0", " None: if len(self.fields) < 1: raise ValueError(f"must have at least one field. Got {self.fields!r}") @classmethod def _check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]: """ Check that this dtype is a numpy structured dtype Parameters ---------- dtype : np.dtypes.DTypeLike The dtype to check. Returns ------- TypeGuard[np.dtypes.VoidDType] True if the dtype matches, False otherwise. """ return isinstance(dtype, cls.dtype_cls) and dtype.fields is not None @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create a Structured ZDType from a native NumPy data type. Parameters ---------- dtype : TBaseDType The native data type. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input data type is not an instance of np.dtypes.VoidDType with a non-null ``fields`` attribute. Notes ----- This method attempts to resolve the fields of the structured dtype using the data type registry. """ from zarr.core.dtype import get_data_type_from_native_dtype fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = [] if cls._check_native_dtype(dtype): # fields of a structured numpy dtype are either 2-tuples or 3-tuples. we only # care about the first element in either case. for key, (dtype_instance, *_) in dtype.fields.items(): # type: ignore[union-attr] dtype_wrapped = get_data_type_from_native_dtype(dtype_instance) fields.append((key, dtype_wrapped)) return cls(fields=tuple(fields)) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> np.dtypes.VoidDType[int]: """ Convert the structured Zarr data type to a native NumPy void dtype. This method constructs a NumPy dtype with fields corresponding to the fields of the structured Zarr data type, by converting each field's data type to its native dtype representation. Returns ------- np.dtypes.VoidDType[int] The native NumPy void dtype representing the structured data type. """ return cast( "np.dtypes.VoidDType[int]", np.dtype([(key, dtype.to_native_dtype()) for (key, dtype) in self.fields]), ) @classmethod def _check_json_v2( cls, data: DTypeJSON, ) -> TypeGuard[StructuredJSON_V2]: """ Check if the input is a valid JSON representation of a Structured data type for Zarr V2. The input data must be a mapping that contains a "name" key that is not a str, and an "object_codec_id" key that is None. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- TypeGuard[StructuredJSON_V2] True if the input is a valid JSON representation of a Structured data type for Zarr V2, False otherwise. """ return ( check_dtype_spec_v2(data) and not isinstance(data["name"], str) and check_structured_dtype_name_v2(data["name"]) and data["object_codec_id"] is None ) @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[StructuredJSON_V3]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- TypeGuard[StructuredJSON_V3] True if the input is a valid JSON representation of a structured data type for Zarr V3, False otherwise. """ return ( isinstance(data, dict) and set(data.keys()) == {"name", "configuration"} and data["name"] == cls._zarr_v3_name and isinstance(data["configuration"], dict) and set(data["configuration"].keys()) == {"fields"} ) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: # avoid circular import from zarr.core.dtype import get_data_type_from_json if cls._check_json_v2(data): # structured dtypes are constructed directly from a list of lists # note that we do not handle the object codec here! this will prevent structured # dtypes from containing object dtypes. return cls( fields=tuple( # type: ignore[misc] ( # type: ignore[misc] f_name, get_data_type_from_json( {"name": f_dtype, "object_codec_id": None}, zarr_format=2 ), ) for f_name, f_dtype in data["name"] ) ) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON array of arrays" raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: # avoid circular import from zarr.core.dtype import get_data_type_from_json if cls._check_json_v3(data): config = data["configuration"] meta_fields = config["fields"] return cls( fields=tuple( (f_name, get_data_type_from_json(f_dtype, zarr_format=3)) # type: ignore[misc] for f_name, f_dtype in meta_fields ) ) msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON object with the key {cls._zarr_v3_name!r}" raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> StructuredJSON_V2: ... @overload def to_json(self, zarr_format: Literal[3]) -> StructuredJSON_V3: ... def to_json(self, zarr_format: ZarrFormat) -> StructuredJSON_V2 | StructuredJSON_V3: """ Convert the structured data type to a JSON-serializable form. Parameters ---------- zarr_format : ZarrFormat The Zarr format version. Accepted values are 2 and 3. Returns ------- StructuredJSON_V2 | StructuredJSON_V3 The JSON representation of the structured data type. Raises ------ ValueError If the zarr_format is not 2 or 3. """ if zarr_format == 2: fields = [ [f_name, f_dtype.to_json(zarr_format=zarr_format)["name"]] for f_name, f_dtype in self.fields ] return {"name": fields, "object_codec_id": None} elif zarr_format == 3: v3_unstable_dtype_warning(self) fields = [ [f_name, f_dtype.to_json(zarr_format=zarr_format)] # type: ignore[list-item] for f_name, f_dtype in self.fields ] base_dict = { "name": self._zarr_v3_name, "configuration": {"fields": fields}, } return cast("StructuredJSON_V3", base_dict) raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[StructuredScalarLike]: # TODO: implement something more precise here! """ Check that the input is a valid scalar value for this structured data type. Parameters ---------- data : object The scalar value to check. Returns ------- TypeGuard[StructuredScalarLike] Whether the input is a valid scalar value for this structured data type. """ return isinstance(data, (bytes, list, tuple, int, np.void)) def _cast_scalar_unchecked(self, data: StructuredScalarLike) -> np.void: """ Cast a python object to a numpy structured scalar without type checking. Parameters ---------- data : StructuredScalarLike The data to cast. Returns ------- np.void The casted data as a numpy structured scalar. Notes ----- This method does not perform any type checking. The input data must be castable to a numpy structured scalar. """ na_dtype = self.to_native_dtype() if isinstance(data, bytes): res = np.frombuffer(data, dtype=na_dtype)[0] elif isinstance(data, list | tuple): res = np.array([tuple(data)], dtype=na_dtype)[0] else: res = np.array([data], dtype=na_dtype)[0] return cast("np.void", res) def cast_scalar(self, data: object) -> np.void: """ Cast a Python object to a NumPy structured scalar. This function attempts to cast the provided data to a NumPy structured scalar. If the data is compatible with the structured scalar type, it is cast without type checking. Otherwise, a TypeError is raised. Parameters ---------- data : object The data to be cast to a NumPy structured scalar. Returns ------- np.void The data cast as a NumPy structured scalar. Raises ------ TypeError If the data cannot be converted to a NumPy structured scalar. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) def default_scalar(self) -> np.void: """ Get the default scalar value for this structured data type. Returns ------- np.void The default scalar value, which is the scalar representation of 0 cast to this structured data type. """ return self._cast_scalar_unchecked(0) def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void: """ Read a JSON-serializable value as a NumPy structured scalar. Parameters ---------- data : JSON The JSON-serializable value. zarr_format : ZarrFormat The zarr format version. Returns ------- np.void The NumPy structured scalar. Raises ------ TypeError If the input is not a base64-encoded string. """ if check_json_str(data): as_bytes = bytes_from_json(data, zarr_format=zarr_format) dtype = self.to_native_dtype() return cast("np.void", np.array([as_bytes]).view(dtype)[0]) raise TypeError(f"Invalid type: {data}. Expected a string.") def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str: """ Convert a scalar to a JSON-serializable string representation. Parameters ---------- data : object The scalar to convert. zarr_format : ZarrFormat The zarr format version. Returns ------- str A string representation of the scalar, which is a base64-encoded string of the bytes that make up the scalar. """ return bytes_to_json(self.cast_scalar(data).tobytes(), zarr_format) @property def item_size(self) -> int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return self.to_native_dtype().itemsize zarr-python-3.1.5/src/zarr/core/dtype/npy/time.py000066400000000000000000000664211511007055700217670ustar00rootroot00000000000000from __future__ import annotations from dataclasses import dataclass from datetime import datetime, timedelta from typing import ( TYPE_CHECKING, ClassVar, Literal, Self, TypedDict, TypeGuard, TypeVar, cast, get_args, overload, ) import numpy as np from typing_extensions import ReadOnly from zarr.core.common import NamedConfig from zarr.core.dtype.common import ( DataTypeValidationError, DTypeConfig_V2, DTypeJSON, HasEndianness, HasItemSize, check_dtype_spec_v2, ) from zarr.core.dtype.npy.common import ( DATETIME_UNIT, DateTimeUnit, check_json_int, endianness_to_numpy_str, get_endianness_from_numpy_dtype, ) from zarr.core.dtype.wrapper import TBaseDType, ZDType if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat TimeDeltaLike = str | int | bytes | np.timedelta64 | timedelta | None DateTimeLike = str | int | bytes | np.datetime64 | datetime | None def datetime_from_int(data: int, *, unit: DateTimeUnit, scale_factor: int) -> np.datetime64: """ Convert an integer to a datetime64. Parameters ---------- data : int The integer to convert. unit : DateTimeUnit The unit of the datetime64. scale_factor : int The scale factor of the datetime64. Returns ------- numpy.datetime64 The datetime64 value. """ dtype_name = f"datetime64[{scale_factor}{unit}]" return cast("np.datetime64", np.int64(data).view(dtype_name)) def datetimelike_to_int(data: np.datetime64 | np.timedelta64) -> int: """ Convert a datetime64 or a timedelta64 to an integer. Parameters ---------- data : np.datetime64 | numpy.timedelta64 The value to convert. Returns ------- int An integer representation of the scalar. """ return data.view(np.int64).item() def check_json_time(data: JSON) -> TypeGuard[Literal["NaT"] | int]: """ Type guard to check if the input JSON data is the literal string "NaT" or an integer. """ return check_json_int(data) or data == "NaT" BaseTimeDType_co = TypeVar( "BaseTimeDType_co", bound=np.dtypes.TimeDelta64DType | np.dtypes.DateTime64DType, covariant=True, ) BaseTimeScalar_co = TypeVar( "BaseTimeScalar_co", bound=np.timedelta64 | np.datetime64, covariant=True ) class TimeConfig(TypedDict): """ The configuration for the numpy.timedelta64 or numpy.datetime64 data type in Zarr V3. Attributes ---------- unit : ReadOnly[DateTimeUnit] A string encoding a unit of time. scale_factor : ReadOnly[int] A scale factor. Examples -------- ```python {"unit": "ms", "scale_factor": 1} ``` """ unit: ReadOnly[DateTimeUnit] scale_factor: ReadOnly[int] class DateTime64JSON_V3(NamedConfig[Literal["numpy.datetime64"], TimeConfig]): """ The JSON representation of the ``numpy.datetime64`` data type in Zarr V3. References ---------- This representation is defined in the ``numpy.datetime64`` [specification document](https://zarr-specs.readthedocs.io/en/latest/spec/v3/datatypes.html#numpy-datetime64). Examples -------- ```python { "name": "numpy.datetime64", "configuration": { "unit": "ms", "scale_factor": 1 } } ``` """ class TimeDelta64JSON_V3(NamedConfig[Literal["numpy.timedelta64"], TimeConfig]): """ The JSON representation of the ``TimeDelta64`` data type in Zarr V3. References ---------- This representation is defined in the numpy.timedelta64 [specification document](https://zarr-specs.readthedocs.io/en/latest/spec/v3/datatypes.html#numpy-timedelta64). Examples -------- ```python { "name": "numpy.timedelta64", "configuration": { "unit": "ms", "scale_factor": 1 } } ``` """ class TimeDelta64JSON_V2(DTypeConfig_V2[str, None]): """ A wrapper around the JSON representation of the ``TimeDelta64`` data type in Zarr V2. The ``name`` field of this class contains the value that would appear under the ``dtype`` field in Zarr V2 array metadata. References ---------- The structure of the ``name`` field is defined in the Zarr V2 [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). Examples -------- ```python { "name": " None: if self.scale_factor < 1: raise ValueError(f"scale_factor must be > 0, got {self.scale_factor}.") if self.scale_factor >= 2**31: raise ValueError(f"scale_factor must be < 2147483648, got {self.scale_factor}.") if self.unit not in get_args(DateTimeUnit): raise ValueError(f"unit must be one of {get_args(DateTimeUnit)}, got {self.unit!r}.") @classmethod def from_native_dtype(cls, dtype: TBaseDType) -> Self: """ Create an instance of this class from a native NumPy data type. Parameters ---------- dtype : TBaseDType The native NumPy dtype to convert. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the dtype is not a valid representation of this class. """ if cls._check_native_dtype(dtype): unit, scale_factor = np.datetime_data(dtype.name) unit = cast("DateTimeUnit", unit) return cls( unit=unit, scale_factor=scale_factor, endianness=get_endianness_from_numpy_dtype(dtype), ) raise DataTypeValidationError( f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}" ) def to_native_dtype(self) -> BaseTimeDType_co: # Numpy does not allow creating datetime64 or timedelta64 via # np.dtypes.{dtype_name}() # so we use np.dtype with a formatted string. """ Convert this data type to a NumPy temporal data type with the appropriate unit and scale factor. Returns ------- BaseTimeDType_co A NumPy data type object representing the time data type with the specified unit, scale factor, and byte order. """ dtype_string = f"{self._numpy_name}[{self.scale_factor}{self.unit}]" return np.dtype(dtype_string).newbyteorder(endianness_to_numpy_str(self.endianness)) # type: ignore[return-value] def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> int: """ Convert a python object to a JSON representation of a datetime64 or timedelta64 scalar. Parameters ---------- data : object The python object to convert. zarr_format : ZarrFormat The Zarr format version (2 or 3). Returns ------- int The JSON representation of the scalar. """ return datetimelike_to_int(data) # type: ignore[arg-type] @property def item_size(self) -> int: """ The size of a single scalar in bytes. Returns ------- int The size of a single scalar in bytes. """ return 8 @dataclass(frozen=True, kw_only=True, slots=True) class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], HasEndianness): """ A Zarr data type for arrays containing NumPy TimeDelta64 data. Wraps the ``np.dtypesTimeDelta64DType`` data type. Scalars for this data type are instances of `np.timedelta64`. Attributes ---------- dtype_cls : Type[np.dtypesTimeDelta64DType] The NumPy dtype class for this data type. scale_factor : int The scale factor for this data type. unit : DateTimeUnit The unit for this data type. References ---------- The Zarr V2 representation of this data type is defined in the Zarr V2 [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). The Zarr V3 representation of this data type is defined in the ``numpy.timedelta64`` [specification document](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.timedelta64) """ # mypy infers the type of np.dtypes.TimeDelta64DType to be # "Callable[[Literal['Y', 'M', 'W', 'D'] | Literal['h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', 'as']], Never]" dtype_cls = np.dtypes.TimeDelta64DType # type: ignore[assignment] unit: DateTimeUnit = "generic" scale_factor: int = 1 _zarr_v3_name: ClassVar[Literal["numpy.timedelta64"]] = "numpy.timedelta64" _zarr_v2_names: ClassVar[tuple[Literal[">m8"], Literal["m8", " TypeGuard[TimeDelta64JSON_V2]: """ Validate that the provided JSON input accurately represents a NumPy timedelta64 data type, which could be in the form of strings like "m8[10s]". This method serves as a type guard, helping to refine the type of unknown JSON input by confirming its adherence to the expected format for NumPy timedelta64 data types. The JSON input should contain a "name" key with a value that matches the expected string pattern for NumPy timedelta64 data types. The pattern includes an optional unit enclosed within square brackets, following the base type identifier. Returns ------- bool True if the JSON input is a valid representation of this class, otherwise False. """ if not check_dtype_spec_v2(data): return False name = data["name"] # match m[M], etc # consider making this a standalone function if not isinstance(name, str): return False if not name.startswith(cls._zarr_v2_names): return False if len(name) == 3: # no unit, and # we already checked that this string is either m8 return True else: return name[4:-1].endswith(DATETIME_UNIT) and name[-1] == "]" @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[DateTime64JSON_V3]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Returns ------- TypeGuard[DateTime64JSON_V3] True if the JSON input is a valid representation of this class, otherwise False. """ return ( isinstance(data, dict) and set(data.keys()) == {"name", "configuration"} and data["name"] == cls._zarr_v3_name and isinstance(data["configuration"], dict) and set(data["configuration"].keys()) == {"unit", "scale_factor"} ) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create a TimeDelta64 from a Zarr V2-flavored JSON. Parameters ---------- data : DTypeJSON The JSON data. Returns ------- TimeDelta64 An instance of TimeDelta64. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v2(data): name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = ( f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string " f"representation of an instance of {cls.dtype_cls}" ) raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create a TimeDelta64 from a Zarr V3-flavored JSON. The JSON representation of a TimeDelta64 in Zarr V3 is a dict with a 'name' key with the value 'numpy.timedelta64', and a 'configuration' key with a value of a dict with a 'unit' key and a 'scale_factor' key. For example: ```json { "name": "numpy.timedelta64", "configuration": { "unit": "generic", "scale_factor": 1 } } ``` """ if cls._check_json_v3(data): unit = data["configuration"]["unit"] scale_factor = data["configuration"]["scale_factor"] return cls(unit=unit, scale_factor=scale_factor) msg = ( f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a dict " f"with a 'name' key with the value 'numpy.timedelta64', " "and a 'configuration' key with a value of a dict with a 'unit' key and a " "'scale_factor' key" ) raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> TimeDelta64JSON_V2: ... @overload def to_json(self, zarr_format: Literal[3]) -> TimeDelta64JSON_V3: ... def to_json(self, zarr_format: ZarrFormat) -> TimeDelta64JSON_V2 | TimeDelta64JSON_V3: """ Serialize this data type to JSON. Parameters ---------- zarr_format : ZarrFormat The Zarr format version (2 or 3). Returns ------- TimeDelta64JSON_V2 | TimeDelta64JSON_V3 The JSON representation of the data type. Raises ------ ValueError If the zarr_format is not 2 or 3. """ if zarr_format == 2: name = self.to_native_dtype().str return {"name": name, "object_codec_id": None} elif zarr_format == 3: return { "name": self._zarr_v3_name, "configuration": {"unit": self.unit, "scale_factor": self.scale_factor}, } raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[TimeDeltaLike]: """ Check if the input is a scalar of this data type. Parameters ---------- data : object The object to check. Returns ------- TypeGuard[TimeDeltaLike] True if the input is a scalar of this data type, False otherwise. """ if data is None: return True return isinstance(data, str | int | bytes | np.timedelta64 | timedelta) def _cast_scalar_unchecked(self, data: TimeDeltaLike) -> np.timedelta64: """ Cast the provided scalar input to a numpy timedelta64 without any type checking. This method assumes that the input data is already a valid scalar of this data type, and does not perform any validation or type checks. It directly casts the input to a numpy timedelta64 scalar using the unit and scale factor defined in the class. Parameters ---------- data : TimeDeltaLike The scalar input data to cast. Returns ------- numpy.timedelta64 The input data cast as a numpy timedelta64 scalar. """ return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}") def cast_scalar(self, data: object) -> np.timedelta64: """ Cast the input to a numpy timedelta64 scalar. If the input is not a scalar of this data type, raise a TypeError. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) def default_scalar(self) -> np.timedelta64: """ Return a default scalar of this data type. This method provides a default value for the timedelta64 scalar, which is a 'Not-a-Time' (NaT) value. """ return np.timedelta64("NaT") def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64: """ Create a scalar of this data type from JSON input. Parameters ---------- data : JSON The JSON representation of the scalar value. zarr_format : int The zarr format to use for the JSON representation. Returns ------- numpy.timedelta64 The scalar value of this data type. Raises ------ TypeError If the input JSON is not a valid representation of a scalar for this data type. """ if check_json_time(data): return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}") raise TypeError(f"Invalid type: {data}. Expected an integer.") # pragma: no cover @dataclass(frozen=True, kw_only=True, slots=True) class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEndianness): """ A Zarr data type for arrays containing NumPy Datetime64 data. Wraps the ``np.dtypes.TimeDelta64DType`` data type. Scalars for this data type are instances of ``np.datetime64``. Attributes ---------- dtype_cls : Type[np.dtypesTimeDelta64DType] The numpy dtype class for this data type. unit : DateTimeUnit The unit of time for this data type. scale_factor : int The scale factor for the time unit. References ---------- The Zarr V2 representation of this data type is defined in the Zarr V2 [specification document](https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding). The Zarr V3 representation of this data type is defined in the ``numpy.datetime64`` [specification document](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.datetime64) """ dtype_cls = np.dtypes.DateTime64DType # type: ignore[assignment] _zarr_v3_name: ClassVar[Literal["numpy.datetime64"]] = "numpy.datetime64" _zarr_v2_names: ClassVar[tuple[Literal[">M8"], Literal["M8", " TypeGuard[DateTime64JSON_V2]: """ Check that the input is a valid JSON representation of this data type. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- TypeGuard[DateTime64JSON_V2] True if the input is a valid JSON representation of a NumPy datetime64 data type, otherwise False. """ if not check_dtype_spec_v2(data): return False name = data["name"] if not isinstance(name, str): return False if not name.startswith(cls._zarr_v2_names): return False if len(name) == 3: # no unit, and # we already checked that this string is either M8 return True else: return name[4:-1].endswith(DATETIME_UNIT) and name[-1] == "]" @classmethod def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[DateTime64JSON_V3]: """ Check that the input is a valid JSON representation of this class in Zarr V3. Parameters ---------- data : DTypeJSON The JSON data to check. Returns ------- TypeGuard[DateTime64JSON_V3] True if the input is a valid JSON representation of a numpy datetime64 data type in Zarr V3, False otherwise. """ return ( isinstance(data, dict) and set(data.keys()) == {"name", "configuration"} and data["name"] == cls._zarr_v3_name and isinstance(data["configuration"], dict) and set(data["configuration"].keys()) == {"unit", "scale_factor"} ) @classmethod def _from_json_v2(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from a Zarr V2-flavored JSON representation. This method checks if the provided JSON data is a valid representation of this class. If valid, it creates an instance using the native NumPy dtype. Otherwise, it raises a DataTypeValidationError. Parameters ---------- data : DTypeJSON The JSON data to parse. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v2(data): name = data["name"] return cls.from_native_dtype(np.dtype(name)) msg = ( f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string " f"representation of an instance of {cls.dtype_cls}" ) raise DataTypeValidationError(msg) @classmethod def _from_json_v3(cls, data: DTypeJSON) -> Self: """ Create an instance of this data type from a Zarr V3-flavored JSON representation. This method checks if the provided JSON data is a valid representation of this class. If valid, it creates an instance using the native NumPy dtype. Otherwise, it raises a DataTypeValidationError. Parameters ---------- data : DTypeJSON The JSON data to parse. Returns ------- Self An instance of this data type. Raises ------ DataTypeValidationError If the input JSON is not a valid representation of this class. """ if cls._check_json_v3(data): unit = data["configuration"]["unit"] scale_factor = data["configuration"]["scale_factor"] return cls(unit=unit, scale_factor=scale_factor) msg = ( f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a dict " f"with a 'name' key with the value 'numpy.datetime64', " "and a 'configuration' key with a value of a dict with a 'unit' key and a " "'scale_factor' key" ) raise DataTypeValidationError(msg) @overload def to_json(self, zarr_format: Literal[2]) -> DateTime64JSON_V2: ... @overload def to_json(self, zarr_format: Literal[3]) -> DateTime64JSON_V3: ... def to_json(self, zarr_format: ZarrFormat) -> DateTime64JSON_V2 | DateTime64JSON_V3: """ Serialize this data type to JSON. Parameters ---------- zarr_format : ZarrFormat The Zarr format version (2 or 3). Returns ------- DateTime64JSON_V2 | DateTime64JSON_V3 The JSON representation of the data type. Raises ------ ValueError If the zarr_format is not 2 or 3. """ if zarr_format == 2: name = self.to_native_dtype().str return {"name": name, "object_codec_id": None} elif zarr_format == 3: return { "name": self._zarr_v3_name, "configuration": {"unit": self.unit, "scale_factor": self.scale_factor}, } raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def _check_scalar(self, data: object) -> TypeGuard[DateTimeLike]: """ Check if the input is convertible to a scalar of this data type. Parameters ---------- data : object The object to check. Returns ------- TypeGuard[DateTimeLike] True if the input is a scalar of this data type, False otherwise. """ if data is None: return True return isinstance(data, str | int | bytes | np.datetime64 | datetime) def _cast_scalar_unchecked(self, data: DateTimeLike) -> np.datetime64: """ Cast the input to a scalar of this data type without any type checking. Parameters ---------- data : DateTimeLike The scalar data to cast. Returns ------- numpy.datetime64 The input cast to a NumPy datetime scalar. """ return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}") def cast_scalar(self, data: object) -> np.datetime64: """ Cast the input to a scalar of this data type after a type check. Parameters ---------- data : object The scalar value to cast. Returns ------- numpy.datetime64 The input cast to a NumPy datetime scalar. Raises ------ TypeError If the data cannot be converted to a numpy datetime scalar. """ if self._check_scalar(data): return self._cast_scalar_unchecked(data) msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {self}." ) raise TypeError(msg) def default_scalar(self) -> np.datetime64: """ Return the default scalar value for this data type. Returns ------- numpy.datetime64 The default scalar value, which is a 'Not-a-Time' (NaT) value """ return np.datetime64("NaT") def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64: """ Read a JSON-serializable value as a scalar. Parameters ---------- data : JSON The JSON-serializable value. zarr_format : ZarrFormat The zarr format version. Returns ------- numpy.datetime64 The numpy datetime scalar. Raises ------ TypeError If the input is not a valid integer type. """ if check_json_time(data): return self._cast_scalar_unchecked(data) raise TypeError(f"Invalid type: {data}. Expected an integer.") # pragma: no cover zarr-python-3.1.5/src/zarr/core/dtype/registry.py000066400000000000000000000157111511007055700220670ustar00rootroot00000000000000from __future__ import annotations import contextlib from dataclasses import dataclass, field from typing import TYPE_CHECKING, Self import numpy as np from zarr.core.dtype.common import ( DataTypeValidationError, DTypeJSON, ) if TYPE_CHECKING: from importlib.metadata import EntryPoint from zarr.core.common import ZarrFormat from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType # This class is different from the other registry classes, which inherit from # dict. IMO it's simpler to just do a dataclass. But long-term we should # have just 1 registry class in use. @dataclass(frozen=True, kw_only=True) class DataTypeRegistry: """ A registry for ZDType classes. This registry is a mapping from Zarr data type names to their corresponding ZDType classes. Attributes ---------- contents : dict[str, type[ZDType[TBaseDType, TBaseScalar]]] The mapping from Zarr data type names to their corresponding ZDType classes. """ contents: dict[str, type[ZDType[TBaseDType, TBaseScalar]]] = field( default_factory=dict, init=False ) _lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False) def _lazy_load(self) -> None: """ Load all data types from the lazy load list and register them with the registry. After loading, clear the lazy load list. """ for e in self._lazy_load_list: self.register(e.load()._zarr_v3_name, e.load()) self._lazy_load_list.clear() def register(self: Self, key: str, cls: type[ZDType[TBaseDType, TBaseScalar]]) -> None: """ Register a data type with the registry. Parameters ---------- key : str The Zarr V3 name of the data type. cls : type[ZDType[TBaseDType, TBaseScalar]] The class of the data type to register. Notes ----- This method is idempotent. If the data type is already registered, this method does nothing. """ if key not in self.contents or self.contents[key] != cls: self.contents[key] = cls def unregister(self, key: str) -> None: """ Unregister a data type from the registry. Parameters ---------- key : str The key associated with the ZDType class to be unregistered. Returns ------- None Raises ------ KeyError If the data type is not found in the registry. """ if key in self.contents: del self.contents[key] else: raise KeyError(f"Data type '{key}' not found in registry.") def get(self, key: str) -> type[ZDType[TBaseDType, TBaseScalar]]: """ Retrieve a registered ZDType class by its key. Parameters ---------- key : str The key associated with the desired ZDType class. Returns ------- type[ZDType[TBaseDType, TBaseScalar]] The ZDType class registered under the given key. Raises ------ KeyError If the key is not found in the registry. """ return self.contents[key] def match_dtype(self, dtype: TBaseDType) -> ZDType[TBaseDType, TBaseScalar]: """ Match a native data type, e.g. a NumPy data type, to a registered ZDType. Parameters ---------- dtype : TBaseDType The native data type to match. Returns ------- ZDType[TBaseDType, TBaseScalar] The matched ZDType corresponding to the provided NumPy data type. Raises ------ ValueError If the data type is a NumPy "Object" type, which is ambiguous, or if multiple or no Zarr data types are found that match the provided dtype. Notes ----- This function attempts to resolve a Zarr data type from a given native data type. If the dtype is a NumPy "Object" data type, it raises a ValueError, as this type can represent multiple Zarr data types. In such cases, a specific Zarr data type should be explicitly constructed instead of relying on dynamic resolution. If multiple matches are found, it will also raise a ValueError. In this case conflicting data types must be unregistered, or the Zarr data type should be explicitly constructed. """ if dtype == np.dtype("O"): msg = ( f"Zarr data type resolution from {dtype} failed. " 'Attempted to resolve a zarr data type from a numpy "Object" data type, which is ' 'ambiguous, as multiple zarr data types can be represented by the numpy "Object" ' "data type. " "In this case you should construct your array by providing a specific Zarr data " 'type. For a list of Zarr data types that are compatible with the numpy "Object"' "data type, see https://github.com/zarr-developers/zarr-python/issues/3117" ) raise ValueError(msg) matched: list[ZDType[TBaseDType, TBaseScalar]] = [] for val in self.contents.values(): with contextlib.suppress(DataTypeValidationError): matched.append(val.from_native_dtype(dtype)) if len(matched) == 1: return matched[0] elif len(matched) > 1: msg = ( f"Zarr data type resolution from {dtype} failed. " f"Multiple data type wrappers found that match dtype '{dtype}': {matched}. " "You should unregister one of these data types, or avoid Zarr data type inference " "entirely by providing a specific Zarr data type when creating your array." "For more information, see https://github.com/zarr-developers/zarr-python/issues/3117" ) raise ValueError(msg) raise ValueError(f"No Zarr data type found that matches dtype '{dtype!r}'") def match_json( self, data: DTypeJSON, *, zarr_format: ZarrFormat ) -> ZDType[TBaseDType, TBaseScalar]: """ Match a JSON representation of a data type to a registered ZDType. Parameters ---------- data : DTypeJSON The JSON representation of a data type to match. zarr_format : ZarrFormat The Zarr format version to consider when matching data types. Returns ------- ZDType[TBaseDType, TBaseScalar] The matched ZDType corresponding to the JSON representation. Raises ------ ValueError If no matching Zarr data type is found for the given JSON data. """ for val in self.contents.values(): try: return val.from_json(data, zarr_format=zarr_format) except DataTypeValidationError: pass raise ValueError(f"No Zarr data type found that matches {data!r}") zarr-python-3.1.5/src/zarr/core/dtype/wrapper.py000066400000000000000000000227751511007055700217070ustar00rootroot00000000000000""" Wrapper for native array data types. The ``ZDType`` class is an abstract base class for wrapping native array data types, e.g. NumPy dtypes. ``ZDType`` provides a common interface for working with data types in a way that is independent of the underlying data type system. The wrapper class encapsulates a native data type. Instances of the class can be created from a native data type instance, and a native data type instance can be created from an instance of the wrapper class. The wrapper class is responsible for: - Serializing and deserializing a native data type to Zarr V2 or Zarr V3 metadata. This ensures that the data type can be properly stored and retrieved from array metadata. - Serializing and deserializing scalar values to Zarr V2 or Zarr V3 metadata. This is important for storing a fill value for an array in a manner that is valid for the data type. You can add support for a new data type in Zarr by subclassing ``ZDType`` wrapper class and adapt its methods to support your native data type. The wrapper class must be added to a data type registry (defined elsewhere) before array creation routines or array reading routines can use your new data type. """ from __future__ import annotations from abc import ABC, abstractmethod from dataclasses import dataclass from typing import ( TYPE_CHECKING, ClassVar, Generic, Literal, Self, TypeGuard, TypeVar, overload, ) import numpy as np if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat from zarr.core.dtype.common import DTypeJSON, DTypeSpec_V2, DTypeSpec_V3 # This the upper bound for the scalar types we support. It's numpy scalars + str, # because the new variable-length string dtype in numpy does not have a corresponding scalar type TBaseScalar = np.generic | str | bytes # This is the bound for the dtypes that we support. If we support non-numpy dtypes, # then this bound will need to be widened. TBaseDType = np.dtype[np.generic] # These two type parameters are covariant because we want # x : ZDType[BaseDType, BaseScalar] = ZDType[SubDType, SubScalar] # to type check TScalar_co = TypeVar("TScalar_co", bound=TBaseScalar, covariant=True) TDType_co = TypeVar("TDType_co", bound=TBaseDType, covariant=True) @dataclass(frozen=True, kw_only=True, slots=True) class ZDType(ABC, Generic[TDType_co, TScalar_co]): """ Abstract base class for wrapping native array data types, e.g. numpy dtypes Attributes ---------- dtype_cls : ClassVar[type[TDType]] The wrapped dtype class. This is a class variable. _zarr_v3_name : ClassVar[str] The name given to the data type by a Zarr v3 data type specification. This is a class variable, and it should generally be unique across different data types. """ # this class will create a native data type dtype_cls: ClassVar[type[TDType_co]] _zarr_v3_name: ClassVar[str] @classmethod def _check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_co]: """ Check that a native data type matches the dtype_cls class attribute. Used as a type guard. Parameters ---------- dtype : TDType The dtype to check. Returns ------- Bool True if the dtype matches, False otherwise. """ return type(dtype) is cls.dtype_cls @classmethod @abstractmethod def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self: """ Create a ZDType instance from a native data type. This method is used when taking a user-provided native data type, like a NumPy data type, and creating the corresponding ZDType instance from them. Parameters ---------- dtype : TDType The native data type object to wrap. Returns ------- Self The ZDType that wraps the native data type. Raises ------ TypeError If the native data type is not consistent with the wrapped data type. """ raise NotImplementedError # pragma: no cover @abstractmethod def to_native_dtype(self: Self) -> TDType_co: """ Return an instance of the wrapped data type. This operation inverts ``from_native_dtype``. Returns ------- TDType The native data type wrapped by this ZDType. """ raise NotImplementedError # pragma: no cover @classmethod @abstractmethod def _from_json_v2(cls: type[Self], data: DTypeJSON) -> Self: raise NotImplementedError # pragma: no cover @classmethod @abstractmethod def _from_json_v3(cls: type[Self], data: DTypeJSON) -> Self: raise NotImplementedError # pragma: no cover @classmethod def from_json(cls: type[Self], data: DTypeJSON, *, zarr_format: ZarrFormat) -> Self: """ Create an instance of this ZDType from JSON data. Parameters ---------- data : DTypeJSON The JSON representation of the data type. zarr_format : ZarrFormat The zarr format version. Returns ------- Self An instance of this data type. """ if zarr_format == 2: return cls._from_json_v2(data) if zarr_format == 3: return cls._from_json_v3(data) raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover @overload def to_json(self, zarr_format: Literal[2]) -> DTypeSpec_V2: ... @overload def to_json(self, zarr_format: Literal[3]) -> DTypeSpec_V3: ... @abstractmethod def to_json(self, zarr_format: ZarrFormat) -> DTypeSpec_V2 | DTypeSpec_V3: """ Serialize this ZDType to JSON. Parameters ---------- zarr_format : ZarrFormat The zarr format version. Returns ------- DTypeJSON_V2 | DTypeJSON_V3 The JSON-serializable representation of the wrapped data type """ raise NotImplementedError # pragma: no cover @abstractmethod def _check_scalar(self, data: object) -> bool: """ Check that an python object is a valid scalar value for the wrapped data type. Parameters ---------- data : object A value to check. Returns ------- Bool True if the object is valid, False otherwise. """ raise NotImplementedError # pragma: no cover @abstractmethod def cast_scalar(self, data: object) -> TScalar_co: """ Cast a python object to the wrapped scalar type. The type of the provided scalar is first checked for compatibility. If it's incompatible with the associated scalar type, a ``TypeError`` will be raised. Parameters ---------- data : object The python object to cast. Returns ------- TScalar The cast value. """ raise NotImplementedError # pragma: no cover @abstractmethod def default_scalar(self) -> TScalar_co: """ Get the default scalar value for the wrapped data type. This is a method, rather than an attribute, because the default value for some data types depends on parameters that are not known until a concrete data type is wrapped. For example, data types parametrized by a length like fixed-length strings or bytes will generate scalars consistent with that length. Returns ------- TScalar The default value for this data type. """ raise NotImplementedError # pragma: no cover @abstractmethod def from_json_scalar(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar_co: """ Read a JSON-serializable value as a scalar. Parameters ---------- data : JSON A JSON representation of a scalar value. zarr_format : ZarrFormat The zarr format version. This is specified because the JSON serialization of scalars differs between Zarr V2 and Zarr V3. Returns ------- TScalar The deserialized scalar value. """ raise NotImplementedError # pragma: no cover @abstractmethod def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> JSON: """ Serialize a python object to the JSON representation of a scalar. The value will first be cast to the scalar type associated with this ZDType, then serialized to JSON. Parameters ---------- data : object The value to convert. zarr_format : ZarrFormat The zarr format version. This is specified because the JSON serialization of scalars differs between Zarr V2 and Zarr V3. Returns ------- JSON The JSON-serialized scalar. """ raise NotImplementedError # pragma: no cover def scalar_failed_type_check_msg( cls_instance: ZDType[TBaseDType, TBaseScalar], bad_scalar: object ) -> str: """ Generate an error message reporting that a particular value failed a type check when attempting to cast that value to a scalar. """ return ( f"The value {bad_scalar!r} failed a type check. " f"It cannot be safely cast to a scalar compatible with {cls_instance}. " f"Consult the documentation for {cls_instance} to determine the possible values that can " "be cast to scalars of the wrapped data type." ) zarr-python-3.1.5/src/zarr/core/group.py000066400000000000000000004437131511007055700202350ustar00rootroot00000000000000from __future__ import annotations import asyncio import itertools import json import logging import unicodedata import warnings from collections import defaultdict from dataclasses import asdict, dataclass, field, fields, replace from itertools import accumulate from typing import TYPE_CHECKING, Literal, TypeVar, assert_never, cast, overload import numpy as np import numpy.typing as npt from typing_extensions import deprecated import zarr.api.asynchronous as async_api from zarr.abc.metadata import Metadata from zarr.abc.store import Store, set_or_delete from zarr.core._info import GroupInfo from zarr.core.array import ( DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike, CompressorsLike, FiltersLike, SerializerLike, ShardsLike, _parse_deprecated_compressor, create_array, ) from zarr.core.attributes import Attributes from zarr.core.buffer import default_buffer_prototype from zarr.core.common import ( JSON, ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZMETADATA_V2_JSON, DimensionNames, NodeType, ShapeLike, ZarrFormat, parse_shapelike, ) from zarr.core.config import config from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata from zarr.core.metadata.io import save_metadata from zarr.core.sync import SyncMixin, sync from zarr.errors import ( ContainsArrayError, ContainsGroupError, GroupNotFoundError, MetadataValidationError, ZarrDeprecationWarning, ZarrUserWarning, ) from zarr.storage import StoreLike, StorePath from zarr.storage._common import ensure_no_existing_node, make_store_path from zarr.storage._utils import _join_paths, _normalize_path_keys, normalize_path if TYPE_CHECKING: from collections.abc import ( AsyncGenerator, AsyncIterator, Coroutine, Generator, Iterable, Iterator, Mapping, ) from typing import Any from zarr.core.array_spec import ArrayConfigLike from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.chunk_key_encodings import ChunkKeyEncodingLike from zarr.core.common import MemoryOrder from zarr.core.dtype import ZDTypeLike from zarr.types import AnyArray, AnyAsyncArray, ArrayV2, ArrayV3, AsyncArrayV2, AsyncArrayV3 logger = logging.getLogger("zarr.group") DefaultT = TypeVar("DefaultT") def parse_zarr_format(data: Any) -> ZarrFormat: """Parse the zarr_format field from metadata.""" if data in (2, 3): return cast("ZarrFormat", data) msg = f"Invalid zarr_format. Expected one of 2 or 3. Got {data}." raise ValueError(msg) def parse_node_type(data: Any) -> NodeType: """Parse the node_type field from metadata.""" if data in ("array", "group"): return cast("Literal['array', 'group']", data) msg = f"Invalid value for 'node_type'. Expected 'array' or 'group'. Got '{data}'." raise MetadataValidationError(msg) # todo: convert None to empty dict def parse_attributes(data: Any) -> dict[str, Any]: """Parse the attributes field from metadata.""" if data is None: return {} elif isinstance(data, dict) and all(isinstance(k, str) for k in data): return data msg = f"Expected dict with string keys. Got {type(data)} instead." raise TypeError(msg) @overload def _parse_async_node(node: AsyncArrayV3) -> ArrayV3: ... @overload def _parse_async_node(node: AsyncArrayV2) -> ArrayV2: ... @overload def _parse_async_node(node: AsyncGroup) -> Group: ... def _parse_async_node( node: AnyAsyncArray | AsyncGroup, ) -> AnyArray | Group: """Wrap an AsyncArray in an Array, or an AsyncGroup in a Group.""" if isinstance(node, AsyncArray): return Array(node) elif isinstance(node, AsyncGroup): return Group(node) else: raise TypeError(f"Unknown node type, got {type(node)}") @dataclass(frozen=True) class ConsolidatedMetadata: """ Consolidated Metadata for this Group. This stores the metadata of child nodes below this group. Any child groups will have their consolidated metadata set appropriately. """ metadata: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] kind: Literal["inline"] = "inline" must_understand: Literal[False] = False def to_dict(self) -> dict[str, JSON]: return { "kind": self.kind, "must_understand": self.must_understand, "metadata": { k: v.to_dict() for k, v in sorted( self.flattened_metadata.items(), key=lambda item: ( item[0].count("/"), unicodedata.normalize("NFKC", item[0]).casefold(), ), ) }, } @classmethod def from_dict(cls, data: dict[str, JSON]) -> ConsolidatedMetadata: data = dict(data) kind = data.get("kind") if kind != "inline": raise ValueError(f"Consolidated metadata kind='{kind}' is not supported.") raw_metadata = data.get("metadata") if not isinstance(raw_metadata, dict): raise TypeError(f"Unexpected type for 'metadata': {type(raw_metadata)}") metadata: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] = {} if raw_metadata: for k, v in raw_metadata.items(): if not isinstance(v, dict): raise TypeError( f"Invalid value for metadata items. key='{k}', type='{type(v).__name__}'" ) # zarr_format is present in v2 and v3. zarr_format = parse_zarr_format(v["zarr_format"]) if zarr_format == 3: node_type = parse_node_type(v.get("node_type", None)) if node_type == "group": metadata[k] = GroupMetadata.from_dict(v) elif node_type == "array": metadata[k] = ArrayV3Metadata.from_dict(v) else: assert_never(node_type) elif zarr_format == 2: if "shape" in v: metadata[k] = ArrayV2Metadata.from_dict(v) else: metadata[k] = GroupMetadata.from_dict(v) else: assert_never(zarr_format) cls._flat_to_nested(metadata) return cls(metadata=metadata) @staticmethod def _flat_to_nested( metadata: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata], ) -> None: """ Convert a flat metadata representation to a nested one. Notes ----- Flat metadata is used when persisting the consolidated metadata. The keys include the full path, not just the node name. The key prefixes can be used to determine which nodes are children of which other nodes. Nested metadata is used in-memory. The outermost level will only have the *immediate* children of the Group. All nested child groups will be stored under the consolidated metadata of their immediate parent. """ # We have a flat mapping from {k: v} where the keys include the *full* # path segment: # { # "/a/b": { group_metadata }, # "/a/b/array-0": { array_metadata }, # "/a/b/array-1": { array_metadata }, # } # # We want to reorganize the metadata such that each Group contains the # array metadata of its immediate children. # In the example, the group at `/a/b` will have consolidated metadata # for its children `array-0` and `array-1`. # # metadata = dict(metadata) keys = sorted(metadata, key=lambda k: k.count("/")) grouped = { k: list(v) for k, v in itertools.groupby(keys, key=lambda k: k.rsplit("/", 1)[0]) } # we go top down and directly manipulate metadata. for key, children_keys in grouped.items(): # key is a key like "a", "a/b", "a/b/c" # The basic idea is to find the immediate parent (so "", "a", or "a/b") # and update that node's consolidated metadata to include the metadata # in children_keys *prefixes, name = key.split("/") parent = metadata while prefixes: # e.g. a/b/c has a parent "a/b". Walk through to get # metadata["a"]["b"] part = prefixes.pop(0) # we can assume that parent[part] here is a group # otherwise we wouldn't have a node with this `part` prefix. # We can also assume that the parent node will have consolidated metadata, # because we're walking top to bottom. parent = parent[part].consolidated_metadata.metadata # type: ignore[union-attr] node = parent[name] children_keys = list(children_keys) if isinstance(node, ArrayV2Metadata | ArrayV3Metadata): # These are already present, either thanks to being an array in the # root, or by being collected as a child in the else clause continue children_keys = list(children_keys) # We pop from metadata, since we're *moving* this under group children = { child_key.split("/")[-1]: metadata.pop(child_key) for child_key in children_keys if child_key != key } parent[name] = replace( node, consolidated_metadata=ConsolidatedMetadata(metadata=children) ) @property def flattened_metadata(self) -> dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata]: """ Return the flattened representation of Consolidated Metadata. The returned dictionary will have a key for each child node in the hierarchy under this group. Under the default (nested) representation available through ``self.metadata``, the dictionary only contains keys for immediate children. The keys of the dictionary will include the full path to a child node from the current group, where segments are joined by ``/``. Examples -------- ```python from zarr.core.group import ConsolidatedMetadata, GroupMetadata cm = ConsolidatedMetadata( metadata={ "group-0": GroupMetadata( consolidated_metadata=ConsolidatedMetadata( { "group-0-0": GroupMetadata(), } ) ), "group-1": GroupMetadata(), } ) # {'group-0': GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group'), # 'group-0/group-0-0': GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group'), # 'group-1': GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group')} ``` """ metadata = {} def flatten( key: str, group: GroupMetadata | ArrayV2Metadata | ArrayV3Metadata ) -> dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata]: children: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] = {} if isinstance(group, ArrayV2Metadata | ArrayV3Metadata): children[key] = group else: if group.consolidated_metadata and group.consolidated_metadata.metadata is not None: children[key] = replace( group, consolidated_metadata=ConsolidatedMetadata(metadata={}) ) for name, val in group.consolidated_metadata.metadata.items(): full_key = f"{key}/{name}" if isinstance(val, GroupMetadata): children.update(flatten(full_key, val)) else: children[full_key] = val else: children[key] = replace(group, consolidated_metadata=None) return children for k, v in self.metadata.items(): metadata.update(flatten(k, v)) return metadata @dataclass(frozen=True) class GroupMetadata(Metadata): """ Metadata for a Group. """ attributes: dict[str, Any] = field(default_factory=dict) zarr_format: ZarrFormat = 3 consolidated_metadata: ConsolidatedMetadata | None = None node_type: Literal["group"] = field(default="group", init=False) def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: json_indent = config.get("json_indent") if self.zarr_format == 3: return { ZARR_JSON: prototype.buffer.from_bytes( json.dumps(self.to_dict(), indent=json_indent, allow_nan=True).encode() ) } else: items = { ZGROUP_JSON: prototype.buffer.from_bytes( json.dumps({"zarr_format": self.zarr_format}, indent=json_indent).encode() ), ZATTRS_JSON: prototype.buffer.from_bytes( json.dumps(self.attributes, indent=json_indent, allow_nan=True).encode() ), } if self.consolidated_metadata: d = { ZGROUP_JSON: {"zarr_format": self.zarr_format}, ZATTRS_JSON: self.attributes, } consolidated_metadata = self.consolidated_metadata.to_dict()["metadata"] assert isinstance(consolidated_metadata, dict) for k, v in consolidated_metadata.items(): attrs = v.pop("attributes", {}) d[f"{k}/{ZATTRS_JSON}"] = attrs if "shape" in v: # it's an array d[f"{k}/{ZARRAY_JSON}"] = v else: d[f"{k}/{ZGROUP_JSON}"] = { "zarr_format": self.zarr_format, "consolidated_metadata": { "metadata": {}, "must_understand": False, "kind": "inline", }, } items[ZMETADATA_V2_JSON] = prototype.buffer.from_bytes( json.dumps( {"metadata": d, "zarr_consolidated_format": 1}, allow_nan=True ).encode() ) return items def __init__( self, attributes: dict[str, Any] | None = None, zarr_format: ZarrFormat = 3, consolidated_metadata: ConsolidatedMetadata | None = None, ) -> None: attributes_parsed = parse_attributes(attributes) zarr_format_parsed = parse_zarr_format(zarr_format) object.__setattr__(self, "attributes", attributes_parsed) object.__setattr__(self, "zarr_format", zarr_format_parsed) object.__setattr__(self, "consolidated_metadata", consolidated_metadata) @classmethod def from_dict(cls, data: dict[str, Any]) -> GroupMetadata: data = dict(data) assert data.pop("node_type", None) in ("group", None) consolidated_metadata = data.pop("consolidated_metadata", None) if consolidated_metadata: data["consolidated_metadata"] = ConsolidatedMetadata.from_dict(consolidated_metadata) zarr_format = data.get("zarr_format") if zarr_format == 2 or zarr_format is None: # zarr v2 allowed arbitrary keys here. # We don't want the GroupMetadata constructor to fail just because someone put an # extra key in the metadata. expected = {x.name for x in fields(cls)} data = {k: v for k, v in data.items() if k in expected} return cls(**data) def to_dict(self) -> dict[str, Any]: result = asdict(replace(self, consolidated_metadata=None)) if self.consolidated_metadata is not None: result["consolidated_metadata"] = self.consolidated_metadata.to_dict() else: # Leave consolidated metadata unset if it's None result.pop("consolidated_metadata") return result @dataclass(frozen=True) class ImplicitGroupMarker(GroupMetadata): """ Marker for an implicit group. Instances of this class are only used in the context of group creation as a placeholder to represent groups that should only be created if they do not already exist in storage """ @dataclass(frozen=True) class AsyncGroup: """ Asynchronous Group object. """ metadata: GroupMetadata store_path: StorePath # TODO: make this correct and work # TODO: ensure that this can be bound properly to subclass of AsyncGroup @classmethod async def from_store( cls, store: StoreLike, *, attributes: dict[str, Any] | None = None, overwrite: bool = False, zarr_format: ZarrFormat = 3, ) -> AsyncGroup: store_path = await make_store_path(store) if overwrite: if store_path.store.supports_deletes: await store_path.delete_dir() else: await ensure_no_existing_node(store_path, zarr_format=zarr_format) else: await ensure_no_existing_node(store_path, zarr_format=zarr_format) attributes = attributes or {} group = cls( metadata=GroupMetadata(attributes=attributes, zarr_format=zarr_format), store_path=store_path, ) await group._save_metadata(ensure_parents=True) return group @classmethod async def open( cls, store: StoreLike, zarr_format: ZarrFormat | None = 3, use_consolidated: bool | str | None = None, ) -> AsyncGroup: """Open a new AsyncGroup Parameters ---------- store : StoreLike zarr_format : {2, 3}, optional use_consolidated : bool or str, default None Whether to use consolidated metadata. By default, consolidated metadata is used if it's present in the store (in the ``zarr.json`` for Zarr format 3 and in the ``.zmetadata`` file for Zarr format 2) and the Store supports it. To explicitly require consolidated metadata, set ``use_consolidated=True``. In this case, if the Store doesn't support consolidation or consolidated metadata is not found, a ``ValueError`` exception is raised. To explicitly *not* use consolidated metadata, set ``use_consolidated=False``, which will fall back to using the regular, non consolidated metadata. Zarr format 2 allowed configuring the key storing the consolidated metadata (``.zmetadata`` by default). Specify the custom key as ``use_consolidated`` to load consolidated metadata from a non-default key. """ store_path = await make_store_path(store) if not store_path.store.supports_consolidated_metadata: # Fail if consolidated metadata was requested but the Store doesn't support it if use_consolidated: store_name = type(store_path.store).__name__ raise ValueError( f"The Zarr store in use ({store_name}) doesn't support consolidated metadata." ) # if use_consolidated was None (optional), the Store dictates it doesn't want consolidation use_consolidated = False consolidated_key = ZMETADATA_V2_JSON if (zarr_format == 2 or zarr_format is None) and isinstance(use_consolidated, str): consolidated_key = use_consolidated if zarr_format == 2: paths = [store_path / ZGROUP_JSON, store_path / ZATTRS_JSON] if use_consolidated or use_consolidated is None: paths.append(store_path / consolidated_key) zgroup_bytes, zattrs_bytes, *rest = await asyncio.gather( *[path.get() for path in paths] ) if zgroup_bytes is None: raise FileNotFoundError(store_path) if use_consolidated or use_consolidated is None: maybe_consolidated_metadata_bytes = rest[0] else: maybe_consolidated_metadata_bytes = None elif zarr_format == 3: zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: raise FileNotFoundError(store_path) elif zarr_format is None: ( zarr_json_bytes, zgroup_bytes, zattrs_bytes, maybe_consolidated_metadata_bytes, ) = await asyncio.gather( (store_path / ZARR_JSON).get(), (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get(), (store_path / str(consolidated_key)).get(), ) if zarr_json_bytes is not None and zgroup_bytes is not None: # warn and favor v3 msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used." warnings.warn(msg, category=ZarrUserWarning, stacklevel=1) if zarr_json_bytes is None and zgroup_bytes is None: raise FileNotFoundError( f"could not find zarr.json or .zgroup objects in {store_path}" ) # set zarr_format based on which keys were found if zarr_json_bytes is not None: zarr_format = 3 else: zarr_format = 2 else: msg = f"Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '{zarr_format}'." # type: ignore[unreachable] raise MetadataValidationError(msg) if zarr_format == 2: # this is checked above, asserting here for mypy assert zgroup_bytes is not None if use_consolidated and maybe_consolidated_metadata_bytes is None: # the user requested consolidated metadata, but it was missing raise ValueError(consolidated_key) elif use_consolidated is False: # the user explicitly opted out of consolidated_metadata. # Discard anything we might have read. maybe_consolidated_metadata_bytes = None return cls._from_bytes_v2( store_path, zgroup_bytes, zattrs_bytes, maybe_consolidated_metadata_bytes ) else: # V3 groups are comprised of a zarr.json object assert zarr_json_bytes is not None if not isinstance(use_consolidated, bool | None): raise TypeError("use_consolidated must be a bool or None for Zarr format 3.") return cls._from_bytes_v3( store_path, zarr_json_bytes, use_consolidated=use_consolidated, ) @classmethod def _from_bytes_v2( cls, store_path: StorePath, zgroup_bytes: Buffer, zattrs_bytes: Buffer | None, consolidated_metadata_bytes: Buffer | None, ) -> AsyncGroup: # V2 groups are comprised of a .zgroup and .zattrs objects zgroup = json.loads(zgroup_bytes.to_bytes()) zattrs = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {} group_metadata = {**zgroup, "attributes": zattrs} if consolidated_metadata_bytes is not None: v2_consolidated_metadata = json.loads(consolidated_metadata_bytes.to_bytes()) v2_consolidated_metadata = v2_consolidated_metadata["metadata"] # We already read zattrs and zgroup. Should we ignore these? v2_consolidated_metadata.pop(".zattrs", None) v2_consolidated_metadata.pop(".zgroup", None) consolidated_metadata: defaultdict[str, dict[str, Any]] = defaultdict(dict) # keys like air/.zarray, air/.zattrs for k, v in v2_consolidated_metadata.items(): path, kind = k.rsplit("/.", 1) if kind == "zarray": consolidated_metadata[path].update(v) elif kind == "zattrs": consolidated_metadata[path]["attributes"] = v elif kind == "zgroup": consolidated_metadata[path].update(v) else: raise ValueError(f"Invalid file type '{kind}' at path '{path}") group_metadata["consolidated_metadata"] = { "metadata": dict(consolidated_metadata), "kind": "inline", "must_understand": False, } return cls.from_dict(store_path, group_metadata) @classmethod def _from_bytes_v3( cls, store_path: StorePath, zarr_json_bytes: Buffer, use_consolidated: bool | None, ) -> AsyncGroup: group_metadata = json.loads(zarr_json_bytes.to_bytes()) if use_consolidated and group_metadata.get("consolidated_metadata") is None: msg = f"Consolidated metadata requested with 'use_consolidated=True' but not found in '{store_path.path}'." raise ValueError(msg) elif use_consolidated is False: # Drop consolidated metadata if it's there. group_metadata.pop("consolidated_metadata", None) return cls.from_dict(store_path, group_metadata) @classmethod def from_dict( cls, store_path: StorePath, data: dict[str, Any], ) -> AsyncGroup: node_type = data.pop("node_type", None) if node_type == "array": msg = f"An array already exists in store {store_path.store} at path {store_path.path}." raise ContainsArrayError(msg) elif node_type not in ("group", None): msg = f"Node type in metadata ({node_type}) is not 'group'" raise GroupNotFoundError(msg) return cls( metadata=GroupMetadata.from_dict(data), store_path=store_path, ) async def setitem(self, key: str, value: Any) -> None: """ Fastpath for creating a new array New arrays will be created with default array settings for the array type. Parameters ---------- key : str Array name value : array-like Array data """ path = self.store_path / key await async_api.save_array( store=path, arr=value, zarr_format=self.metadata.zarr_format, overwrite=True ) async def getitem( self, key: str, ) -> AnyAsyncArray | AsyncGroup: """ Get a subarray or subgroup from the group. Parameters ---------- key : str Array or group name Returns ------- AsyncArray or AsyncGroup """ store_path = self.store_path / key logger.debug("key=%s, store_path=%s", key, store_path) # Consolidated metadata lets us avoid some I/O operations so try that first. if self.metadata.consolidated_metadata is not None: return self._getitem_consolidated(store_path, key, prefix=self.name) try: return await get_node( store=store_path.store, path=store_path.path, zarr_format=self.metadata.zarr_format ) except FileNotFoundError as e: raise KeyError(key) from e def _getitem_consolidated( self, store_path: StorePath, key: str, prefix: str ) -> AnyAsyncArray | AsyncGroup: # getitem, in the special case where we have consolidated metadata. # Note that this is a regular def (non async) function. # This shouldn't do any additional I/O. # the caller needs to verify this! assert self.metadata.consolidated_metadata is not None # we support nested getitems like group/subgroup/array indexers = normalize_path(key).split("/") indexers.reverse() metadata: ArrayV2Metadata | ArrayV3Metadata | GroupMetadata = self.metadata while indexers: indexer = indexers.pop() if isinstance(metadata, ArrayV2Metadata | ArrayV3Metadata): # we've indexed into an array with group["array/subarray"]. Invalid. raise KeyError(key) if metadata.consolidated_metadata is None: # we've indexed into a group without consolidated metadata. # This isn't normal; typically, consolidated metadata # will include explicit markers for when there are no child # nodes as metadata={}. # We have some freedom in exactly how we interpret this case. # For now, we treat None as the same as {}, i.e. we don't # have any children. raise KeyError(key) try: metadata = metadata.consolidated_metadata.metadata[indexer] except KeyError as e: # The Group Metadata has consolidated metadata, but the key # isn't present. We trust this to mean that the key isn't in # the hierarchy, and *don't* fall back to checking the store. msg = f"'{key}' not found in consolidated metadata." raise KeyError(msg) from e # update store_path to ensure that AsyncArray/Group.name is correct if prefix != "/": key = "/".join([prefix.lstrip("/"), key]) store_path = StorePath(store=store_path.store, path=key) if isinstance(metadata, GroupMetadata): return AsyncGroup(metadata=metadata, store_path=store_path) else: return AsyncArray(metadata=metadata, store_path=store_path) async def delitem(self, key: str) -> None: """Delete a group member. Parameters ---------- key : str Array or group name """ store_path = self.store_path / key await store_path.delete_dir() if self.metadata.consolidated_metadata: self.metadata.consolidated_metadata.metadata.pop(key, None) await self._save_metadata() async def get( self, key: str, default: DefaultT | None = None ) -> AnyAsyncArray | AsyncGroup | DefaultT | None: """Obtain a group member, returning default if not found. Parameters ---------- key : str Group member name. default : object Default value to return if key is not found (default: None). Returns ------- object Group member (AsyncArray or AsyncGroup) or default if not found. """ try: return await self.getitem(key) except KeyError: return default async def _save_metadata(self, ensure_parents: bool = False) -> None: await save_metadata(self.store_path, self.metadata, ensure_parents=ensure_parents) @property def path(self) -> str: """Storage path.""" return self.store_path.path @property def name(self) -> str: """Group name following h5py convention.""" if self.path: # follow h5py convention: add leading slash name = self.path if name[0] != "/": name = "/" + name return name return "/" @property def basename(self) -> str: """Final component of name.""" return self.name.split("/")[-1] @property def attrs(self) -> dict[str, Any]: return self.metadata.attributes @property def info(self) -> Any: """ Return a visual representation of the statically known information about a group. Note that this doesn't include dynamic information, like the number of child Groups or Arrays. Returns ------- GroupInfo Related ------- [zarr.AsyncGroup.info_complete][] All information about a group, including dynamic information """ if self.metadata.consolidated_metadata: members = list(self.metadata.consolidated_metadata.flattened_metadata.values()) else: members = None return self._info(members=members) async def info_complete(self) -> Any: """ Return all the information for a group. This includes dynamic information like the number of child Groups or Arrays. If this group doesn't contain consolidated metadata then this will need to read from the backing Store. Returns ------- GroupInfo Related ------- [zarr.AsyncGroup.info][] """ members = [x[1].metadata async for x in self.members(max_depth=None)] return self._info(members=members) def _info( self, members: list[ArrayV2Metadata | ArrayV3Metadata | GroupMetadata] | None = None ) -> Any: kwargs = {} if members is not None: kwargs["_count_members"] = len(members) count_arrays = 0 count_groups = 0 for member in members: if isinstance(member, GroupMetadata): count_groups += 1 else: count_arrays += 1 kwargs["_count_arrays"] = count_arrays kwargs["_count_groups"] = count_groups return GroupInfo( _name=self.store_path.path, _read_only=self.read_only, _store_type=type(self.store_path.store).__name__, _zarr_format=self.metadata.zarr_format, # maybe do a typeddict **kwargs, # type: ignore[arg-type] ) @property def store(self) -> Store: return self.store_path.store @property def read_only(self) -> bool: # Backwards compatibility for 2.x return self.store_path.read_only @property def synchronizer(self) -> None: # Backwards compatibility for 2.x # Not implemented in 3.x yet. return None async def create_group( self, name: str, *, overwrite: bool = False, attributes: dict[str, Any] | None = None, ) -> AsyncGroup: """Create a sub-group. Parameters ---------- name : str Group name. overwrite : bool, optional If True, do not raise an error if the group already exists. attributes : dict, optional Group attributes. Returns ------- g : AsyncGroup """ attributes = attributes or {} return await type(self).from_store( self.store_path / name, attributes=attributes, overwrite=overwrite, zarr_format=self.metadata.zarr_format, ) async def require_group(self, name: str, overwrite: bool = False) -> AsyncGroup: """Obtain a sub-group, creating one if it doesn't exist. Parameters ---------- name : str Group name. overwrite : bool, optional Overwrite any existing group with given `name` if present. Returns ------- g : AsyncGroup """ if overwrite: # TODO: check that overwrite=True errors if an array exists where the group is being created grp = await self.create_group(name, overwrite=True) else: try: item: AsyncGroup | AnyAsyncArray = await self.getitem(name) if not isinstance(item, AsyncGroup): raise TypeError( f"Incompatible object ({item.__class__.__name__}) already exists" ) assert isinstance(item, AsyncGroup) # make mypy happy grp = item except KeyError: grp = await self.create_group(name) return grp async def require_groups(self, *names: str) -> tuple[AsyncGroup, ...]: """Convenience method to require multiple groups in a single call. Parameters ---------- *names : str Group names. Returns ------- Tuple[AsyncGroup, ...] """ if not names: return () return tuple(await asyncio.gather(*(self.require_group(name) for name in names))) async def create_array( self, name: str, *, shape: ShapeLike | None = None, dtype: ZDTypeLike | None = None, data: np.ndarray[Any, np.dtype[Any]] | None = None, chunks: tuple[int, ...] | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfigLike | None = None, write_data: bool = True, ) -> AnyAsyncArray: """Create an array within this group. This method lightly wraps [zarr.core.array.create_array][]. Parameters ---------- name : str The name of the array relative to the group. If ``path`` is ``None``, the array will be located at the root of the store. shape : tuple[int, ...] Shape of the array. dtype : npt.DTypeLike Data type of the array. chunks : tuple[int, ...], optional Chunk shape of the array. If not specified, default are guessed based on the shape and dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. For Zarr format 3, a "compressor" is a codec that takes a bytestream, and returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. storage_options : dict, optional If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. write_data : bool If a pre-existing array-like object was provided to this function via the ``data`` parameter then ``write_data`` determines whether the values in that array-like object should be written to the Zarr array created by this function. If ``write_data`` is ``False``, then the array will be left empty. Returns ------- AsyncArray """ compressors = _parse_deprecated_compressor( compressor, compressors, zarr_format=self.metadata.zarr_format ) return await create_array( store=self.store_path, name=name, shape=shape, dtype=dtype, data=data, chunks=chunks, shards=shards, filters=filters, compressors=compressors, serializer=serializer, fill_value=fill_value, order=order, zarr_format=self.metadata.zarr_format, attributes=attributes, chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, storage_options=storage_options, overwrite=overwrite, config=config, write_data=write_data, ) @deprecated("Use AsyncGroup.create_array instead.", category=ZarrDeprecationWarning) async def create_dataset(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> AnyAsyncArray: """Create an array. !!! warning "Deprecated" `AsyncGroup.create_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. Use `AsyncGroup.create_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the [zarr.AsyncGroup.require_dataset][] method. Parameters ---------- name : str Array name. **kwargs : dict Additional arguments passed to [zarr.AsyncGroup.create_array][]. Returns ------- a : AsyncArray """ data = kwargs.pop("data", None) # create_dataset in zarr 2.x requires shape but not dtype if data is # provided. Allow this configuration by inferring dtype from data if # necessary and passing it to create_array if "dtype" not in kwargs and data is not None: kwargs["dtype"] = data.dtype array = await self.create_array(name, shape=shape, **kwargs) if data is not None: await array.setitem(slice(None), data) return array @deprecated("Use AsyncGroup.require_array instead.", category=ZarrDeprecationWarning) async def require_dataset( self, name: str, *, shape: tuple[int, ...], dtype: npt.DTypeLike = None, exact: bool = False, **kwargs: Any, ) -> AnyAsyncArray: """Obtain an array, creating if it doesn't exist. !!! warning "Deprecated" `AsyncGroup.require_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. Use `AsyncGroup.require_dataset` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the [zarr.AsyncGroup.create_dataset][] method. Other `kwargs` are as per [zarr.AsyncGroup.create_dataset][]. Parameters ---------- name : str Array name. shape : int or tuple of ints Array shape. dtype : str or dtype, optional NumPy dtype. exact : bool, optional If True, require `dtype` to match exactly. If false, require `dtype` can be cast from array dtype. Returns ------- a : AsyncArray """ return await self.require_array(name, shape=shape, dtype=dtype, exact=exact, **kwargs) async def require_array( self, name: str, *, shape: ShapeLike, dtype: npt.DTypeLike = None, exact: bool = False, **kwargs: Any, ) -> AnyAsyncArray: """Obtain an array, creating if it doesn't exist. Other `kwargs` are as per [zarr.AsyncGroup.create_dataset][]. Parameters ---------- name : str Array name. shape : int or tuple of ints Array shape. dtype : str or dtype, optional NumPy dtype. exact : bool, optional If True, require `dtype` to match exactly. If false, require `dtype` can be cast from array dtype. Returns ------- a : AsyncArray """ try: ds = await self.getitem(name) if not isinstance(ds, AsyncArray): raise TypeError(f"Incompatible object ({ds.__class__.__name__}) already exists") shape = parse_shapelike(shape) if shape != ds.shape: raise TypeError(f"Incompatible shape ({ds.shape} vs {shape})") dtype = np.dtype(dtype) if exact: if ds.dtype != dtype: raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})") else: if not np.can_cast(ds.dtype, dtype): raise TypeError(f"Incompatible dtype ({ds.dtype} vs {dtype})") except KeyError: ds = await self.create_array(name, shape=shape, dtype=dtype, **kwargs) return ds async def update_attributes(self, new_attributes: dict[str, Any]) -> AsyncGroup: """Update group attributes. Parameters ---------- new_attributes : dict New attributes to set on the group. Returns ------- self : AsyncGroup """ self.metadata.attributes.update(new_attributes) # Write new metadata await self._save_metadata() return self def __repr__(self) -> str: return f"" async def nmembers( self, max_depth: int | None = 0, ) -> int: """Count the number of members in this group. Parameters ---------- max_depth : int, default 0 The maximum number of levels of the hierarchy to include. By default, (``max_depth=0``) only immediate children are included. Set ``max_depth=None`` to include all nodes, and some positive integer to consider children within that many levels of the root Group. Returns ------- count : int """ # check if we can use consolidated metadata, which requires that we have non-None # consolidated metadata at all points in the hierarchy. if self.metadata.consolidated_metadata is not None: if max_depth is not None and max_depth < 0: raise ValueError(f"max_depth must be None or >= 0. Got '{max_depth}' instead") if max_depth is None: return len(self.metadata.consolidated_metadata.flattened_metadata) else: return len( [ x for x in self.metadata.consolidated_metadata.flattened_metadata if x.count("/") <= max_depth ] ) # TODO: consider using aioitertools.builtins.sum for this # return await aioitertools.builtins.sum((1 async for _ in self.members()), start=0) n = 0 async for _ in self.members(max_depth=max_depth): n += 1 return n async def members( self, max_depth: int | None = 0, *, use_consolidated_for_children: bool = True, ) -> AsyncGenerator[ tuple[str, AnyAsyncArray | AsyncGroup], None, ]: """ Returns an AsyncGenerator over the arrays and groups contained in this group. This method requires that `store_path.store` supports directory listing. The results are not guaranteed to be ordered. Parameters ---------- max_depth : int, default 0 The maximum number of levels of the hierarchy to include. By default, (``max_depth=0``) only immediate children are included. Set ``max_depth=None`` to include all nodes, and some positive integer to consider children within that many levels of the root Group. use_consolidated_for_children : bool, default True Whether to use the consolidated metadata of child groups loaded from the store. Note that this only affects groups loaded from the store. If the current Group already has consolidated metadata, it will always be used. Returns ------- path: A string giving the path to the target, relative to the Group ``self``. value: AsyncArray or AsyncGroup The AsyncArray or AsyncGroup that is a child of ``self``. """ if max_depth is not None and max_depth < 0: raise ValueError(f"max_depth must be None or >= 0. Got '{max_depth}' instead") async for item in self._members( max_depth=max_depth, use_consolidated_for_children=use_consolidated_for_children ): yield item def _members_consolidated( self, max_depth: int | None, prefix: str = "" ) -> Generator[ tuple[str, AnyAsyncArray | AsyncGroup], None, ]: consolidated_metadata = self.metadata.consolidated_metadata do_recursion = max_depth is None or max_depth > 0 # we kind of just want the top-level keys. if consolidated_metadata is not None: for key in consolidated_metadata.metadata: obj = self._getitem_consolidated( self.store_path, key, prefix=self.name ) # Metadata -> Group/Array key = f"{prefix}/{key}".lstrip("/") yield key, obj if do_recursion and isinstance(obj, AsyncGroup): if max_depth is None: new_depth = None else: new_depth = max_depth - 1 yield from obj._members_consolidated(new_depth, prefix=key) async def _members( self, max_depth: int | None, *, use_consolidated_for_children: bool = True ) -> AsyncGenerator[tuple[str, AnyAsyncArray | AsyncGroup], None]: skip_keys: tuple[str, ...] if self.metadata.zarr_format == 2: skip_keys = (".zattrs", ".zgroup", ".zarray", ".zmetadata") elif self.metadata.zarr_format == 3: skip_keys = ("zarr.json",) else: raise ValueError(f"Unknown Zarr format: {self.metadata.zarr_format}") if self.metadata.consolidated_metadata is not None: members = self._members_consolidated(max_depth=max_depth) for member in members: yield member return if not self.store_path.store.supports_listing: msg = ( f"The store associated with this group ({type(self.store_path.store)}) " "does not support listing, " "specifically via the `list_dir` method. " "This function requires a store that supports listing." ) raise ValueError(msg) # enforce a concurrency limit by passing a semaphore to all the recursive functions semaphore = asyncio.Semaphore(config.get("async.concurrency")) async for member in _iter_members_deep( self, max_depth=max_depth, skip_keys=skip_keys, semaphore=semaphore, use_consolidated_for_children=use_consolidated_for_children, ): yield member async def create_hierarchy( self, nodes: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata], *, overwrite: bool = False, ) -> AsyncIterator[tuple[str, AsyncGroup | AnyAsyncArray]]: """ Create a hierarchy of arrays or groups rooted at this group. This function will parse its input to ensure that the hierarchy is complete. Any implicit groups will be inserted as needed. For example, an input like ```{'a/b': GroupMetadata}``` will be parsed to ```{'': GroupMetadata, 'a': GroupMetadata, 'b': Groupmetadata}```. Explicitly specifying a root group, e.g. with ``nodes = {'': GroupMetadata()}`` is an error because this group instance is the root group. After input parsing, this function then creates all the nodes in the hierarchy concurrently. Arrays and Groups are yielded in the order they are created. This order is not stable and should not be relied on. Parameters ---------- nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata] A dictionary defining the hierarchy. The keys are the paths of the nodes in the hierarchy, relative to the path of the group. The values are instances of ``GroupMetadata`` or ``ArrayMetadata``. Note that all values must have the same ``zarr_format`` as the parent group -- it is an error to mix zarr versions in the same hierarchy. Leading "/" characters from keys will be removed. overwrite : bool Whether to overwrite existing nodes. Defaults to ``False``, in which case an error is raised instead of overwriting an existing array or group. This function will not erase an existing group unless that group is explicitly named in ``nodes``. If ``nodes`` defines implicit groups, e.g. ``{`'a/b/c': GroupMetadata}``, and a group already exists at path ``a``, then this function will leave the group at ``a`` as-is. Yields ------ tuple[str, AsyncArray | AsyncGroup]. """ # check that all the nodes have the same zarr_format as Self prefix = self.path nodes_parsed = {} for key, value in nodes.items(): if value.zarr_format != self.metadata.zarr_format: msg = ( "The zarr_format of the nodes must be the same as the parent group. " f"The node at {key} has zarr_format {value.zarr_format}, but the parent group" f" has zarr_format {self.metadata.zarr_format}." ) raise ValueError(msg) if normalize_path(key) == "": msg = ( "The input defines a root node, but a root node already exists, namely this Group instance." "It is an error to use this method to create a root node. " "Remove the root node from the input dict, or use a function like " "create_rooted_hierarchy to create a rooted hierarchy." ) raise ValueError(msg) else: nodes_parsed[_join_paths([prefix, key])] = value async for key, node in create_hierarchy( store=self.store, nodes=nodes_parsed, overwrite=overwrite, ): if prefix == "": out_key = key else: out_key = key.removeprefix(prefix + "/") yield out_key, node async def keys(self) -> AsyncGenerator[str, None]: """Iterate over member names.""" async for key, _ in self.members(): yield key async def contains(self, member: str) -> bool: """Check if a member exists in the group. Parameters ---------- member : str Member name. Returns ------- bool """ # TODO: this can be made more efficient. try: await self.getitem(member) except KeyError: return False else: return True async def groups(self) -> AsyncGenerator[tuple[str, AsyncGroup], None]: """Iterate over subgroups.""" async for name, value in self.members(): if isinstance(value, AsyncGroup): yield name, value async def group_keys(self) -> AsyncGenerator[str, None]: """Iterate over group names.""" async for key, _ in self.groups(): yield key async def group_values(self) -> AsyncGenerator[AsyncGroup, None]: """Iterate over group values.""" async for _, group in self.groups(): yield group async def arrays( self, ) -> AsyncGenerator[tuple[str, AnyAsyncArray], None]: """Iterate over arrays.""" async for key, value in self.members(): if isinstance(value, AsyncArray): yield key, value async def array_keys(self) -> AsyncGenerator[str, None]: """Iterate over array names.""" async for key, _ in self.arrays(): yield key async def array_values( self, ) -> AsyncGenerator[AnyAsyncArray, None]: """Iterate over array values.""" async for _, array in self.arrays(): yield array async def tree(self, expand: bool | None = None, level: int | None = None) -> Any: """ Return a tree-like representation of a hierarchy. This requires the optional ``rich`` dependency. Parameters ---------- expand : bool, optional This keyword is not yet supported. A NotImplementedError is raised if it's used. level : int, optional The maximum depth below this Group to display in the tree. Returns ------- TreeRepr A pretty-printable object displaying the hierarchy. """ from zarr.core._tree import group_tree_async if expand is not None: raise NotImplementedError("'expand' is not yet implemented.") return await group_tree_async(self, max_depth=level) async def empty(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyAsyncArray: """Create an empty array with the specified shape in this Group. The contents will be filled with the array's fill value or zeros if no fill value is provided. Parameters ---------- name : str Name of the array. shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Notes ----- The contents of an empty Zarr array are not defined. On attempting to retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ return await async_api.empty(shape=shape, store=self.store_path, path=name, **kwargs) async def zeros(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyAsyncArray: """Create an array, with zero being used as the default value for uninitialized portions of the array. Parameters ---------- name : str Name of the array. shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- AsyncArray The new array. """ return await async_api.zeros(shape=shape, store=self.store_path, path=name, **kwargs) async def ones(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyAsyncArray: """Create an array, with one being used as the default value for uninitialized portions of the array. Parameters ---------- name : str Name of the array. shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- AsyncArray The new array. """ return await async_api.ones(shape=shape, store=self.store_path, path=name, **kwargs) async def full( self, *, name: str, shape: tuple[int, ...], fill_value: Any | None, **kwargs: Any ) -> AnyAsyncArray: """Create an array, with "fill_value" being used as the default value for uninitialized portions of the array. Parameters ---------- name : str Name of the array. shape : int or tuple of int Shape of the empty array. fill_value : scalar Value to fill the array with. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- AsyncArray The new array. """ return await async_api.full( shape=shape, fill_value=fill_value, store=self.store_path, path=name, **kwargs, ) async def empty_like( self, *, name: str, data: async_api.ArrayLike, **kwargs: Any ) -> AnyAsyncArray: """Create an empty sub-array like `data`. The contents will be filled with the array's fill value or zeros if no fill value is provided. Parameters ---------- name : str Name of the array. data : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- AsyncArray The new array. """ return await async_api.empty_like(a=data, store=self.store_path, path=name, **kwargs) async def zeros_like( self, *, name: str, data: async_api.ArrayLike, **kwargs: Any ) -> AnyAsyncArray: """Create a sub-array of zeros like `data`. Parameters ---------- name : str Name of the array. data : array-like The array to create the new array like. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- AsyncArray The new array. """ return await async_api.zeros_like(a=data, store=self.store_path, path=name, **kwargs) async def ones_like( self, *, name: str, data: async_api.ArrayLike, **kwargs: Any ) -> AnyAsyncArray: """Create a sub-array of ones like `data`. Parameters ---------- name : str Name of the array. data : array-like The array to create the new array like. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- AsyncArray The new array. """ return await async_api.ones_like(a=data, store=self.store_path, path=name, **kwargs) async def full_like( self, *, name: str, data: async_api.ArrayLike, **kwargs: Any ) -> AnyAsyncArray: """Create a sub-array like `data` filled with the `fill_value` of `data` . Parameters ---------- name : str Name of the array. data : array-like The array to create the new array like. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- AsyncArray The new array. """ return await async_api.full_like(a=data, store=self.store_path, path=name, **kwargs) async def move(self, source: str, dest: str) -> None: """Move a sub-group or sub-array from one path to another. Notes ----- Not implemented """ raise NotImplementedError @dataclass(frozen=True) class Group(SyncMixin): """ A Zarr group. """ _async_group: AsyncGroup @classmethod def from_store( cls, store: StoreLike, *, attributes: dict[str, Any] | None = None, zarr_format: ZarrFormat = 3, overwrite: bool = False, ) -> Group: """Instantiate a group from an initialized store. Parameters ---------- store : StoreLike StoreLike containing the Group. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. attributes : dict, optional A dictionary of JSON-serializable values with user-defined attributes. zarr_format : {2, 3}, optional Zarr storage format version. overwrite : bool, optional If True, do not raise an error if the group already exists. Returns ------- Group Group instantiated from the store. Raises ------ ContainsArrayError, ContainsGroupError, ContainsArrayAndGroupError """ attributes = attributes or {} obj = sync( AsyncGroup.from_store( store, attributes=attributes, overwrite=overwrite, zarr_format=zarr_format, ), ) return cls(obj) @classmethod def open( cls, store: StoreLike, zarr_format: ZarrFormat | None = 3, ) -> Group: """Open a group from an initialized store. Parameters ---------- store : StoreLike Store containing the Group. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. zarr_format : {2, 3, None}, optional Zarr storage format version. Returns ------- Group Group instantiated from the store. """ obj = sync(AsyncGroup.open(store, zarr_format=zarr_format)) return cls(obj) def __getitem__(self, path: str) -> AnyArray | Group: """Obtain a group member. Parameters ---------- path : str Group member name. Returns ------- Array | Group Group member (Array or Group) at the specified key Examples -------- ```python import zarr from zarr.core.group import Group group = Group.from_store(zarr.storage.MemoryStore()) group.create_array(name="subarray", shape=(10,), chunks=(10,), dtype="float64") group.create_group(name="subgroup").create_array(name="subarray", shape=(10,), chunks=(10,), dtype="float64") group["subarray"] # group["subgroup"] # group["subgroup"]["subarray"] # ``` """ obj = self._sync(self._async_group.getitem(path)) if isinstance(obj, AsyncArray): return Array(obj) else: return Group(obj) def get(self, path: str, default: DefaultT | None = None) -> AnyArray | Group | DefaultT | None: """Obtain a group member, returning default if not found. Parameters ---------- path : str Group member name. default : object Default value to return if key is not found (default: None). Returns ------- object Group member (Array or Group) or default if not found. Examples -------- ```python import zarr from zarr.core.group import Group group = Group.from_store(zarr.storage.MemoryStore()) group.create_array(name="subarray", shape=(10,), chunks=(10,), dtype="float64") group.create_group(name="subgroup") group.get("subarray") # group.get("subgroup") # group.get("nonexistent", None) # None ``` """ try: return self[path] except KeyError: return default def __delitem__(self, key: str) -> None: """Delete a group member. Parameters ---------- key : str Group member name. Examples -------- >>> import zarr >>> group = Group.from_store(zarr.storage.MemoryStore() >>> group.create_array(name="subarray", shape=(10,), chunks=(10,)) >>> del group["subarray"] >>> "subarray" in group False """ self._sync(self._async_group.delitem(key)) def __iter__(self) -> Iterator[str]: """Return an iterator over group member names. Examples -------- >>> import zarr >>> g1 = zarr.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> d1 = g1.create_array('baz', shape=(10,), chunks=(10,)) >>> d2 = g1.create_array('quux', shape=(10,), chunks=(10,)) >>> for name in g1: ... print(name) baz bar foo quux """ yield from self.keys() def __len__(self) -> int: """Number of members.""" return self.nmembers() def __setitem__(self, key: str, value: Any) -> None: """Fastpath for creating a new array. New arrays will be created using default settings for the array type. If you need to create an array with custom settings, use the `create_array` method. Parameters ---------- key : str Array name. value : Any Array data. Examples -------- >>> import zarr >>> group = zarr.group() >>> group["foo"] = zarr.zeros((10,)) >>> group["foo"] """ self._sync(self._async_group.setitem(key, value)) def __repr__(self) -> str: return f"" async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group: """Update the attributes of this group. Examples -------- >>> import zarr >>> group = zarr.group() >>> await group.update_attributes_async({"foo": "bar"}) >>> group.attrs.asdict() {'foo': 'bar'} """ new_metadata = replace(self.metadata, attributes=new_attributes) # Write new metadata to_save = new_metadata.to_buffer_dict(default_buffer_prototype()) awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()] await asyncio.gather(*awaitables) async_group = replace(self._async_group, metadata=new_metadata) return replace(self, _async_group=async_group) @property def store_path(self) -> StorePath: """Path-like interface for the Store.""" return self._async_group.store_path @property def metadata(self) -> GroupMetadata: """Group metadata.""" return self._async_group.metadata @property def path(self) -> str: """Storage path.""" return self._async_group.path @property def name(self) -> str: """Group name following h5py convention.""" return self._async_group.name @property def basename(self) -> str: """Final component of name.""" return self._async_group.basename @property def attrs(self) -> Attributes: """Attributes of this Group""" return Attributes(self) @property def info(self) -> Any: """ Return the statically known information for a group. Returns ------- GroupInfo Related ------- [zarr.Group.info_complete][] All information about a group, including dynamic information like the children members. """ return self._async_group.info def info_complete(self) -> Any: """ Return information for a group. If this group doesn't contain consolidated metadata then this will need to read from the backing Store. Returns ------- GroupInfo Related ------- [zarr.Group.info][] """ return self._sync(self._async_group.info_complete()) @property def store(self) -> Store: # Backwards compatibility for 2.x return self._async_group.store @property def read_only(self) -> bool: # Backwards compatibility for 2.x return self._async_group.read_only @property def synchronizer(self) -> None: # Backwards compatibility for 2.x # Not implemented in 3.x yet. return self._async_group.synchronizer def update_attributes(self, new_attributes: dict[str, Any]) -> Group: """Update the attributes of this group. Examples -------- >>> import zarr >>> group = zarr.group() >>> group.update_attributes({"foo": "bar"}) >>> group.attrs.asdict() {'foo': 'bar'} """ self._sync(self._async_group.update_attributes(new_attributes)) return self def nmembers(self, max_depth: int | None = 0) -> int: """Count the number of members in this group. Parameters ---------- max_depth : int, default 0 The maximum number of levels of the hierarchy to include. By default, (``max_depth=0``) only immediate children are included. Set ``max_depth=None`` to include all nodes, and some positive integer to consider children within that many levels of the root Group. Returns ------- count : int """ return self._sync(self._async_group.nmembers(max_depth=max_depth)) def members( self, max_depth: int | None = 0, *, use_consolidated_for_children: bool = True ) -> tuple[tuple[str, AnyArray | Group], ...]: """ Returns an AsyncGenerator over the arrays and groups contained in this group. This method requires that `store_path.store` supports directory listing. The results are not guaranteed to be ordered. Parameters ---------- max_depth : int, default 0 The maximum number of levels of the hierarchy to include. By default, (``max_depth=0``) only immediate children are included. Set ``max_depth=None`` to include all nodes, and some positive integer to consider children within that many levels of the root Group. use_consolidated_for_children : bool, default True Whether to use the consolidated metadata of child groups loaded from the store. Note that this only affects groups loaded from the store. If the current Group already has consolidated metadata, it will always be used. Returns ------- path: A string giving the path to the target, relative to the Group ``self``. value: AsyncArray or AsyncGroup The AsyncArray or AsyncGroup that is a child of ``self``. """ _members = self._sync_iter(self._async_group.members(max_depth=max_depth)) return tuple((kv[0], _parse_async_node(kv[1])) for kv in _members) def create_hierarchy( self, nodes: dict[str, ArrayV2Metadata | ArrayV3Metadata | GroupMetadata], *, overwrite: bool = False, ) -> Iterator[tuple[str, Group | AnyArray]]: """ Create a hierarchy of arrays or groups rooted at this group. This function will parse its input to ensure that the hierarchy is complete. Any implicit groups will be inserted as needed. For example, an input like ```{'a/b': GroupMetadata}``` will be parsed to ```{'': GroupMetadata, 'a': GroupMetadata, 'b': Groupmetadata}```. Explicitly specifying a root group, e.g. with ``nodes = {'': GroupMetadata()}`` is an error because this group instance is the root group. After input parsing, this function then creates all the nodes in the hierarchy concurrently. Arrays and Groups are yielded in the order they are created. This order is not stable and should not be relied on. Parameters ---------- nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata] A dictionary defining the hierarchy. The keys are the paths of the nodes in the hierarchy, relative to the path of the group. The values are instances of ``GroupMetadata`` or ``ArrayMetadata``. Note that all values must have the same ``zarr_format`` as the parent group -- it is an error to mix zarr versions in the same hierarchy. Leading "/" characters from keys will be removed. overwrite : bool Whether to overwrite existing nodes. Defaults to ``False``, in which case an error is raised instead of overwriting an existing array or group. This function will not erase an existing group unless that group is explicitly named in ``nodes``. If ``nodes`` defines implicit groups, e.g. ``{`'a/b/c': GroupMetadata}``, and a group already exists at path ``a``, then this function will leave the group at ``a`` as-is. Yields ------ tuple[str, Array | Group]. Examples -------- >>> import zarr >>> from zarr.core.group import GroupMetadata >>> root = zarr.create_group(store={}) >>> for key, val in root.create_hierarchy({'a/b/c': GroupMetadata()}): ... print(key, val) ... """ for key, node in self._sync_iter( self._async_group.create_hierarchy(nodes, overwrite=overwrite) ): yield (key, _parse_async_node(node)) def keys(self) -> Generator[str, None]: """Return an iterator over group member names. Examples -------- >>> import zarr >>> g1 = zarr.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> d1 = g1.create_array('baz', shape=(10,), chunks=(10,)) >>> d2 = g1.create_array('quux', shape=(10,), chunks=(10,)) >>> for name in g1.keys(): ... print(name) baz bar foo quux """ yield from self._sync_iter(self._async_group.keys()) def __contains__(self, member: str) -> bool: """Test for group membership. Examples -------- >>> import zarr >>> g1 = zarr.group() >>> g2 = g1.create_group('foo') >>> d1 = g1.create_array('bar', shape=(10,), chunks=(10,)) >>> 'foo' in g1 True >>> 'bar' in g1 True >>> 'baz' in g1 False """ return self._sync(self._async_group.contains(member)) def groups(self) -> Generator[tuple[str, Group], None]: """Return the sub-groups of this group as a generator of (name, group) pairs. Examples -------- >>> import zarr >>> group = zarr.group() >>> group.create_group("subgroup") >>> for name, subgroup in group.groups(): ... print(name, subgroup) subgroup """ for name, async_group in self._sync_iter(self._async_group.groups()): yield name, Group(async_group) def group_keys(self) -> Generator[str, None]: """Return an iterator over group member names. Examples -------- >>> import zarr >>> group = zarr.group() >>> group.create_group("subgroup") >>> for name in group.group_keys(): ... print(name) subgroup """ for name, _ in self.groups(): yield name def group_values(self) -> Generator[Group, None]: """Return an iterator over group members. Examples -------- >>> import zarr >>> group = zarr.group() >>> group.create_group("subgroup") >>> for subgroup in group.group_values(): ... print(subgroup) """ for _, group in self.groups(): yield group def arrays(self) -> Generator[tuple[str, AnyArray], None]: """Return the sub-arrays of this group as a generator of (name, array) pairs Examples -------- >>> import zarr >>> group = zarr.group() >>> group.create_array("subarray", shape=(10,), chunks=(10,)) >>> for name, subarray in group.arrays(): ... print(name, subarray) subarray """ for name, async_array in self._sync_iter(self._async_group.arrays()): yield name, Array(async_array) def array_keys(self) -> Generator[str, None]: """Return an iterator over group member names. Examples -------- >>> import zarr >>> group = zarr.group() >>> group.create_array("subarray", shape=(10,), chunks=(10,)) >>> for name in group.array_keys(): ... print(name) subarray """ for name, _ in self.arrays(): yield name def array_values(self) -> Generator[AnyArray, None]: """Return an iterator over group members. Examples -------- >>> import zarr >>> group = zarr.group() >>> group.create_array("subarray", shape=(10,), chunks=(10,)) >>> for subarray in group.array_values(): ... print(subarray) """ for _, array in self.arrays(): yield array def tree(self, expand: bool | None = None, level: int | None = None) -> Any: """ Return a tree-like representation of a hierarchy. This requires the optional ``rich`` dependency. Parameters ---------- expand : bool, optional This keyword is not yet supported. A NotImplementedError is raised if it's used. level : int, optional The maximum depth below this Group to display in the tree. Returns ------- TreeRepr A pretty-printable object displaying the hierarchy. """ return self._sync(self._async_group.tree(expand=expand, level=level)) def create_group(self, name: str, **kwargs: Any) -> Group: """Create a sub-group. Parameters ---------- name : str Name of the new subgroup. Returns ------- Group Examples -------- >>> import zarr >>> group = zarr.group() >>> subgroup = group.create_group("subgroup") >>> subgroup """ return Group(self._sync(self._async_group.create_group(name, **kwargs))) def require_group(self, name: str, **kwargs: Any) -> Group: """Obtain a sub-group, creating one if it doesn't exist. Parameters ---------- name : str Group name. Returns ------- g : Group """ return Group(self._sync(self._async_group.require_group(name, **kwargs))) def require_groups(self, *names: str) -> tuple[Group, ...]: """Convenience method to require multiple groups in a single call. Parameters ---------- *names : str Group names. Returns ------- groups : tuple of Groups """ return tuple(map(Group, self._sync(self._async_group.require_groups(*names)))) def create( self, name: str, *, shape: ShapeLike | None = None, dtype: ZDTypeLike | None = None, data: np.ndarray[Any, np.dtype[Any]] | None = None, chunks: tuple[int, ...] | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfigLike | None = None, write_data: bool = True, ) -> AnyArray: """Create an array within this group. This method lightly wraps [`zarr.core.array.create_array`][]. Parameters ---------- name : str The name of the array relative to the group. If ``path`` is ``None``, the array will be located at the root of the store. shape : ShapeLike, optional Shape of the array. Must be ``None`` if ``data`` is provided. dtype : npt.DTypeLike | None Data type of the array. Must be ``None`` if ``data`` is provided. data : Array-like data to use for initializing the array. If this parameter is provided, the ``shape`` and ``dtype`` parameters must be ``None``. chunks : tuple[int, ...], optional Chunk shape of the array. If not specified, default are guessed based on the shape and dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. For Zarr format 3, a "compressor" is a codec that takes a bytestream, and returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` in [`zarr.config`][]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. in [`zarr.config`][]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` in [`zarr.config`][]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. storage_options : dict, optional If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. write_data : bool If a pre-existing array-like object was provided to this function via the ``data`` parameter then ``write_data`` determines whether the values in that array-like object should be written to the Zarr array created by this function. If ``write_data`` is ``False``, then the array will be left empty. Returns ------- AsyncArray """ return self.create_array( name, shape=shape, dtype=dtype, data=data, chunks=chunks, shards=shards, filters=filters, compressors=compressors, compressor=compressor, serializer=serializer, fill_value=fill_value, order=order, attributes=attributes, chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, storage_options=storage_options, overwrite=overwrite, config=config, write_data=write_data, ) def create_array( self, name: str, *, shape: ShapeLike | None = None, dtype: ZDTypeLike | None = None, data: np.ndarray[Any, np.dtype[Any]] | None = None, chunks: tuple[int, ...] | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", compressor: CompressorLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfigLike | None = None, write_data: bool = True, ) -> AnyArray: """Create an array within this group. This method lightly wraps [zarr.core.array.create_array][]. Parameters ---------- name : str The name of the array relative to the group. If ``path`` is ``None``, the array will be located at the root of the store. shape : ShapeLike, optional Shape of the array. Must be ``None`` if ``data`` is provided. dtype : npt.DTypeLike | None Data type of the array. Must be ``None`` if ``data`` is provided. data : Array-like data to use for initializing the array. If this parameter is provided, the ``shape`` and ``dtype`` parameters must be ``None``. chunks : tuple[int, ...], optional Chunk shape of the array. If not specified, default are guessed based on the shape and dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. For Zarr format 3, a "compressor" is a codec that takes a bytestream, and returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. storage_options : dict, optional If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. write_data : bool If a pre-existing array-like object was provided to this function via the ``data`` parameter then ``write_data`` determines whether the values in that array-like object should be written to the Zarr array created by this function. If ``write_data`` is ``False``, then the array will be left empty. Returns ------- AsyncArray """ compressors = _parse_deprecated_compressor( compressor, compressors, zarr_format=self.metadata.zarr_format ) return Array( self._sync( self._async_group.create_array( name=name, shape=shape, dtype=dtype, data=data, chunks=chunks, shards=shards, fill_value=fill_value, attributes=attributes, chunk_key_encoding=chunk_key_encoding, compressors=compressors, serializer=serializer, dimension_names=dimension_names, order=order, filters=filters, overwrite=overwrite, storage_options=storage_options, config=config, write_data=write_data, ) ) ) @deprecated("Use Group.create_array instead.", category=ZarrDeprecationWarning) def create_dataset(self, name: str, **kwargs: Any) -> AnyArray: """Create an array. !!! warning "Deprecated" `Group.create_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. Use `Group.create_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the [zarr.Group.require_dataset][] method. Parameters ---------- name : str Array name. **kwargs : dict Additional arguments passed to [zarr.Group.create_array][] Returns ------- a : Array """ return Array(self._sync(self._async_group.create_dataset(name, **kwargs))) @deprecated("Use Group.require_array instead.", category=ZarrDeprecationWarning) def require_dataset(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> AnyArray: """Obtain an array, creating if it doesn't exist. !!! warning "Deprecated" `Group.require_dataset()` is deprecated since v3.0.0 and will be removed in v3.1.0. Use `Group.require_array` instead. Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the [zarr.Group.create_dataset][] method. Other `kwargs` are as per [zarr.Group.create_dataset][]. Parameters ---------- name : str Array name. **kwargs : See [zarr.Group.create_dataset][]. Returns ------- a : Array """ return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs))) def require_array(self, name: str, *, shape: ShapeLike, **kwargs: Any) -> AnyArray: """Obtain an array, creating if it doesn't exist. Other `kwargs` are as per [zarr.Group.create_array][]. Parameters ---------- name : str Array name. **kwargs : See [zarr.Group.create_array][]. Returns ------- a : Array """ return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs))) def empty(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyArray: """Create an empty array with the specified shape in this Group. The contents will be filled with the array's fill value or zeros if no fill value is provided. Parameters ---------- name : str Name of the array. shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Notes ----- The contents of an empty Zarr array are not defined. On attempting to retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ return Array(self._sync(self._async_group.empty(name=name, shape=shape, **kwargs))) def zeros(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyArray: """Create an array, with zero being used as the default value for uninitialized portions of the array. Parameters ---------- name : str Name of the array. shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- Array The new array. """ return Array(self._sync(self._async_group.zeros(name=name, shape=shape, **kwargs))) def ones(self, *, name: str, shape: tuple[int, ...], **kwargs: Any) -> AnyArray: """Create an array, with one being used as the default value for uninitialized portions of the array. Parameters ---------- name : str Name of the array. shape : int or tuple of int Shape of the empty array. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- Array The new array. """ return Array(self._sync(self._async_group.ones(name=name, shape=shape, **kwargs))) def full( self, *, name: str, shape: tuple[int, ...], fill_value: Any | None, **kwargs: Any ) -> AnyArray: """Create an array, with "fill_value" being used as the default value for uninitialized portions of the array. Parameters ---------- name : str Name of the array. shape : int or tuple of int Shape of the empty array. fill_value : scalar Value to fill the array with. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- Array The new array. """ return Array( self._sync( self._async_group.full(name=name, shape=shape, fill_value=fill_value, **kwargs) ) ) def empty_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> AnyArray: """Create an empty sub-array like `data`. The contents will be filled with the array's fill value or zeros if no fill value is provided. Parameters ---------- name : str Name of the array. data : array-like The array to create an empty array like. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- Array The new array. Notes ----- The contents of an empty Zarr array are not defined. On attempting to retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ return Array(self._sync(self._async_group.empty_like(name=name, data=data, **kwargs))) def zeros_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> AnyArray: """Create a sub-array of zeros like `data`. Parameters ---------- name : str Name of the array. data : array-like The array to create the new array like. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- Array The new array. """ return Array(self._sync(self._async_group.zeros_like(name=name, data=data, **kwargs))) def ones_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> AnyArray: """Create a sub-array of ones like `data`. Parameters ---------- name : str Name of the array. data : array-like The array to create the new array like. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- Array The new array. """ return Array(self._sync(self._async_group.ones_like(name=name, data=data, **kwargs))) def full_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> AnyArray: """Create a sub-array like `data` filled with the `fill_value` of `data` . Parameters ---------- name : str Name of the array. data : array-like The array to create the new array like. **kwargs Keyword arguments passed to [zarr.api.asynchronous.create][]. Returns ------- Array The new array. """ return Array(self._sync(self._async_group.full_like(name=name, data=data, **kwargs))) def move(self, source: str, dest: str) -> None: """Move a sub-group or sub-array from one path to another. Notes ----- Not implemented """ return self._sync(self._async_group.move(source, dest)) @deprecated("Use Group.create_array instead.", category=ZarrDeprecationWarning) def array( self, name: str, *, shape: ShapeLike, dtype: npt.DTypeLike, chunks: tuple[int, ...] | Literal["auto"] = "auto", shards: tuple[int, ...] | Literal["auto"] | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", compressor: CompressorLike = None, serializer: SerializerLike = "auto", fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfigLike | None = None, data: npt.ArrayLike | None = None, ) -> AnyArray: """Create an array within this group. !!! warning "Deprecated" `Group.array()` is deprecated since v3.0.0 and will be removed in a future release. Use `Group.create_array` instead. This method lightly wraps [zarr.core.array.create_array][]. Parameters ---------- name : str The name of the array relative to the group. If ``path`` is ``None``, the array will be located at the root of the store. shape : tuple[int, ...] Shape of the array. dtype : npt.DTypeLike Data type of the array. chunks : tuple[int, ...], optional Chunk shape of the array. If not specified, default are guessed based on the shape and dtype. shards : tuple[int, ...], optional Shard shape of the array. The default value of ``None`` results in no sharding at all. filters : Iterable[Codec] | Literal["auto"], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of [`zarr.abc.codec.ArrayArrayCodec`][], or a dict representations of [`zarr.abc.codec.ArrayArrayCodec`][]. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. The default value of ``"auto"`` instructs Zarr to use a default used based on the data type of the array and the Zarr format specified. For all data types in Zarr V3, and most data types in Zarr V2, the default filters are empty. The only cases where default filters are not empty is when the Zarr format is 2, and the data type is a variable-length data type like [`zarr.dtype.VariableLengthUTF8`][] or [`zarr.dtype.VariableLengthUTF8`][]. In these cases, the default filters contains a single element which is a codec specific to that particular data type. To create an array with no filters, provide an empty iterable or the value ``None``. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. For Zarr format 3, a "compressor" is a codec that takes a bytestream, and returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` in [`zarr.config`][zarr.config]. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. in [`zarr.config`][zarr.config]. Use ``None`` to omit the default compressor. compressor : Codec, optional Deprecated in favor of ``compressors``. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` in [`zarr.config`][zarr.config]. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. This default can be changed by modifying the value of ``array.order`` in [`zarr.config`][zarr.config]. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. storage_options : dict, optional If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. data : array_like The data to fill the array with. Returns ------- AsyncArray """ compressors = _parse_deprecated_compressor(compressor, compressors) return Array( self._sync( self._async_group.create_dataset( name=name, shape=shape, dtype=dtype, chunks=chunks, shards=shards, fill_value=fill_value, attributes=attributes, chunk_key_encoding=chunk_key_encoding, compressors=compressors, serializer=serializer, dimension_names=dimension_names, order=order, filters=filters, overwrite=overwrite, storage_options=storage_options, config=config, data=data, ) ) ) async def create_hierarchy( *, store: Store, nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata], overwrite: bool = False, ) -> AsyncIterator[tuple[str, AsyncGroup | AnyAsyncArray]]: """ Create a complete zarr hierarchy from a collection of metadata objects. This function will parse its input to ensure that the hierarchy is complete. Any implicit groups will be inserted as needed. For example, an input like ```{'a/b': GroupMetadata}``` will be parsed to ```{'': GroupMetadata, 'a': GroupMetadata, 'b': Groupmetadata}``` After input parsing, this function then creates all the nodes in the hierarchy concurrently. Arrays and Groups are yielded in the order they are created. This order is not stable and should not be relied on. Parameters ---------- store : Store The storage backend to use. nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata] A dictionary defining the hierarchy. The keys are the paths of the nodes in the hierarchy, relative to the root of the ``Store``. The root of the store can be specified with the empty string ``''``. The values are instances of ``GroupMetadata`` or ``ArrayMetadata``. Note that all values must have the same ``zarr_format`` -- it is an error to mix zarr versions in the same hierarchy. Leading "/" characters from keys will be removed. overwrite : bool Whether to overwrite existing nodes. Defaults to ``False``, in which case an error is raised instead of overwriting an existing array or group. This function will not erase an existing group unless that group is explicitly named in ``nodes``. If ``nodes`` defines implicit groups, e.g. ``{`'a/b/c': GroupMetadata}``, and a group already exists at path ``a``, then this function will leave the group at ``a`` as-is. Yields ------ tuple[str, AsyncGroup | AsyncArray] This function yields (path, node) pairs, in the order the nodes were created. Examples -------- >>> from zarr.api.asynchronous import create_hierarchy >>> from zarr.storage import MemoryStore >>> from zarr.core.group import GroupMetadata >>> import asyncio >>> store = MemoryStore() >>> nodes = {'a': GroupMetadata(attributes={'name': 'leaf'})} >>> async def run(): ... print(dict([x async for x in create_hierarchy(store=store, nodes=nodes)])) >>> asyncio.run(run()) # {'a': , '': } """ # normalize the keys to be valid paths nodes_normed_keys = _normalize_path_keys(nodes) # ensure that all nodes have the same zarr_format, and add implicit groups as needed nodes_parsed = _parse_hierarchy_dict(data=nodes_normed_keys) redundant_implicit_groups = [] # empty hierarchies should be a no-op if len(nodes_parsed) > 0: # figure out which zarr format we are using zarr_format = next(iter(nodes_parsed.values())).zarr_format # check which implicit groups will require materialization implicit_group_keys = tuple( filter(lambda k: isinstance(nodes_parsed[k], ImplicitGroupMarker), nodes_parsed) ) # read potential group metadata for each implicit group maybe_extant_group_coros = ( _read_group_metadata(store, k, zarr_format=zarr_format) for k in implicit_group_keys ) maybe_extant_groups = await asyncio.gather( *maybe_extant_group_coros, return_exceptions=True ) for key, value in zip(implicit_group_keys, maybe_extant_groups, strict=True): if isinstance(value, BaseException): if isinstance(value, FileNotFoundError): # this is fine -- there was no group there, so we will create one pass else: raise value else: # a loop exists already at ``key``, so we can avoid creating anything there redundant_implicit_groups.append(key) if overwrite: # we will remove any nodes that collide with arrays and non-implicit groups defined in # nodes # track the keys of nodes we need to delete to_delete_keys = [] to_delete_keys.extend( [k for k, v in nodes_parsed.items() if k not in implicit_group_keys] ) await asyncio.gather(*(store.delete_dir(key) for key in to_delete_keys)) else: # This type is long. coros: ( Generator[Coroutine[Any, Any, ArrayV2Metadata | GroupMetadata], None, None] | Generator[Coroutine[Any, Any, ArrayV3Metadata | GroupMetadata], None, None] ) if zarr_format == 2: coros = (_read_metadata_v2(store=store, path=key) for key in nodes_parsed) elif zarr_format == 3: coros = (_read_metadata_v3(store=store, path=key) for key in nodes_parsed) else: # pragma: no cover raise ValueError(f"Invalid zarr_format: {zarr_format}") # pragma: no cover extant_node_query = dict( zip( nodes_parsed.keys(), await asyncio.gather(*coros, return_exceptions=True), strict=False, ) ) # iterate over the existing arrays / groups and figure out which of them conflict # with the arrays / groups we want to create for key, extant_node in extant_node_query.items(): proposed_node = nodes_parsed[key] if isinstance(extant_node, BaseException): if isinstance(extant_node, FileNotFoundError): # ignore FileNotFoundError, because they represent nodes we can safely create pass else: # Any other exception is a real error raise extant_node else: # this is a node that already exists, but a node with the same key was specified # in nodes_parsed. if isinstance(extant_node, GroupMetadata): # a group already exists where we want to create a group if isinstance(proposed_node, ImplicitGroupMarker): # we have proposed an implicit group, which is OK -- we will just skip # creating this particular metadata document redundant_implicit_groups.append(key) else: # we have proposed an explicit group, which is an error, given that a # group already exists. msg = f"A group exists in store {store!r} at path {key!r}." raise ContainsGroupError(msg) elif isinstance(extant_node, ArrayV2Metadata | ArrayV3Metadata): # we are trying to overwrite an existing array. this is an error. msg = f"An array exists in store {store!r} at path {key!r}." raise ContainsArrayError(msg) nodes_explicit: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata] = {} for k, v in nodes_parsed.items(): if k not in redundant_implicit_groups: if isinstance(v, ImplicitGroupMarker): nodes_explicit[k] = GroupMetadata(zarr_format=v.zarr_format) else: nodes_explicit[k] = v async for key, node in create_nodes(store=store, nodes=nodes_explicit): yield key, node async def create_nodes( *, store: Store, nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata], ) -> AsyncIterator[tuple[str, AsyncGroup | AnyAsyncArray]]: """Create a collection of arrays and / or groups concurrently. Note: no attempt is made to validate that these arrays and / or groups collectively form a valid Zarr hierarchy. It is the responsibility of the caller of this function to ensure that the ``nodes`` parameter satisfies any correctness constraints. Parameters ---------- store : Store The storage backend to use. nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata] A dictionary defining the hierarchy. The keys are the paths of the nodes in the hierarchy, and the values are the metadata of the nodes. The metadata must be either an instance of GroupMetadata, ArrayV3Metadata or ArrayV2Metadata. Yields ------ AsyncGroup | AsyncArray The created nodes in the order they are created. """ # Note: the only way to alter this value is via the config. If that's undesirable for some reason, # then we should consider adding a keyword argument this this function semaphore = asyncio.Semaphore(config.get("async.concurrency")) create_tasks: list[Coroutine[None, None, str]] = [] for key, value in nodes.items(): # make the key absolute create_tasks.extend(_persist_metadata(store, key, value, semaphore=semaphore)) created_object_keys = [] for coro in asyncio.as_completed(create_tasks): created_key = await coro # we need this to track which metadata documents were written so that we can yield a # complete v2 Array / Group class after both .zattrs and the metadata JSON was created. created_object_keys.append(created_key) # get the node name from the object key if len(created_key.split("/")) == 1: # this is the root node meta_out = nodes[""] node_name = "" else: # turn "foo/" into "foo" node_name = created_key[: created_key.rfind("/")] meta_out = nodes[node_name] if meta_out.zarr_format == 3: yield node_name, _build_node(store=store, path=node_name, metadata=meta_out) else: # For zarr v2 # we only want to yield when both the metadata and attributes are created # so we track which keys have been created, and wait for both the meta key and # the attrs key to be created before yielding back the AsyncArray / AsyncGroup attrs_done = _join_paths([node_name, ZATTRS_JSON]) in created_object_keys if isinstance(meta_out, GroupMetadata): meta_done = _join_paths([node_name, ZGROUP_JSON]) in created_object_keys else: meta_done = _join_paths([node_name, ZARRAY_JSON]) in created_object_keys if meta_done and attrs_done: yield node_name, _build_node(store=store, path=node_name, metadata=meta_out) continue def _get_roots( data: Iterable[str], ) -> tuple[str, ...]: """ Return the keys of the root(s) of the hierarchy. A root is a key with the fewest number of path segments. """ if "" in data: return ("",) keys_split = sorted((key.split("/") for key in data), key=len) groups: defaultdict[int, list[str]] = defaultdict(list) for key_split in keys_split: groups[len(key_split)].append("/".join(key_split)) return tuple(groups[min(groups.keys())]) def _parse_hierarchy_dict( *, data: Mapping[str, ImplicitGroupMarker | GroupMetadata | ArrayV2Metadata | ArrayV3Metadata], ) -> dict[str, ImplicitGroupMarker | GroupMetadata | ArrayV2Metadata | ArrayV3Metadata]: """ Take an input with type Mapping[str, ArrayMetadata | GroupMetadata] and parse it into a dict of str: node pairs that models a valid, complete Zarr hierarchy. If the input represents a complete Zarr hierarchy, i.e. one with no implicit groups, then return a dict with the exact same data as the input. Otherwise, return a dict derived from the input with GroupMetadata inserted as needed to make the hierarchy complete. For example, an input of {'a/b': ArrayMetadata} is incomplete, because it references two groups (the root group '' and a group at 'a') that are not specified in the input. Applying this function to that input will result in a return value of {'': GroupMetadata, 'a': GroupMetadata, 'a/b': ArrayMetadata}, i.e. the implied groups were added. The input is also checked for the following conditions; an error is raised if any are violated: - No arrays can contain group or arrays (i.e., all arrays must be leaf nodes). - All arrays and groups must have the same ``zarr_format`` value. This function ensures that the input is transformed into a specification of a complete and valid Zarr hierarchy. """ # ensure that all nodes have the same zarr format data_purified = _ensure_consistent_zarr_format(data) # ensure that keys are normalized to zarr paths data_normed_keys = _normalize_path_keys(data_purified) # insert an implicit root group if a root was not specified # but not if an empty dict was provided, because any empty hierarchy has no nodes if len(data_normed_keys) > 0 and "" not in data_normed_keys: z_format = next(iter(data_normed_keys.values())).zarr_format data_normed_keys = data_normed_keys | {"": ImplicitGroupMarker(zarr_format=z_format)} out: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata] = {**data_normed_keys} for k, v in data_normed_keys.items(): key_split = k.split("/") # get every parent path *subpaths, _ = accumulate(key_split, lambda a, b: _join_paths([a, b])) for subpath in subpaths: # If a component is not already in the output dict, add ImplicitGroupMetadata if subpath not in out: out[subpath] = ImplicitGroupMarker(zarr_format=v.zarr_format) else: if not isinstance(out[subpath], GroupMetadata | ImplicitGroupMarker): msg = ( f"The node at {subpath} contains other nodes, but it is not a Zarr group. " "This is invalid. Only Zarr groups can contain other nodes." ) raise ValueError(msg) return out def _ensure_consistent_zarr_format( data: Mapping[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata], ) -> Mapping[str, GroupMetadata | ArrayV2Metadata] | Mapping[str, GroupMetadata | ArrayV3Metadata]: """ Ensure that all values of the input dict have the same zarr format. If any do not, then a value error is raised. """ observed_zarr_formats: dict[ZarrFormat, list[str]] = {2: [], 3: []} for k, v in data.items(): observed_zarr_formats[v.zarr_format].append(k) if len(observed_zarr_formats[2]) > 0 and len(observed_zarr_formats[3]) > 0: msg = ( "Got data with both Zarr v2 and Zarr v3 nodes, which is invalid. " f"The following keys map to Zarr v2 nodes: {observed_zarr_formats.get(2)}. " f"The following keys map to Zarr v3 nodes: {observed_zarr_formats.get(3)}." "Ensure that all nodes have the same Zarr format." ) raise ValueError(msg) return cast( "Mapping[str, GroupMetadata | ArrayV2Metadata] | Mapping[str, GroupMetadata | ArrayV3Metadata]", data, ) async def _getitem_semaphore( node: AsyncGroup, key: str, semaphore: asyncio.Semaphore | None ) -> AnyAsyncArray | AsyncGroup: """ Wrap Group.getitem with an optional semaphore. If the semaphore parameter is an asyncio.Semaphore instance, then the getitem operation is performed inside an async context manager provided by that semaphore. If the semaphore parameter is None, then getitem is invoked without a context manager. """ if semaphore is not None: async with semaphore: return await node.getitem(key) else: return await node.getitem(key) async def _iter_members( node: AsyncGroup, skip_keys: tuple[str, ...], semaphore: asyncio.Semaphore | None, ) -> AsyncGenerator[tuple[str, AnyAsyncArray | AsyncGroup], None]: """ Iterate over the arrays and groups contained in a group. Parameters ---------- node : AsyncGroup The group to traverse. skip_keys : tuple[str, ...] A tuple of keys to skip when iterating over the possible members of the group. semaphore : asyncio.Semaphore | None An optional semaphore to use for concurrency control. Yields ------ tuple[str, AnyAsyncArray | AsyncGroup] """ # retrieve keys from storage keys = [key async for key in node.store.list_dir(node.path)] keys_filtered = tuple(filter(lambda v: v not in skip_keys, keys)) node_tasks = tuple( asyncio.create_task(_getitem_semaphore(node, key, semaphore), name=key) for key in keys_filtered ) for fetched_node_coro in asyncio.as_completed(node_tasks): try: fetched_node = await fetched_node_coro except KeyError as e: # keyerror is raised when `key` names an object (in the object storage sense), # as opposed to a prefix, in the store under the prefix associated with this group # in which case `key` cannot be the name of a sub-array or sub-group. warnings.warn( f"Object at {e.args[0]} is not recognized as a component of a Zarr hierarchy.", ZarrUserWarning, stacklevel=1, ) continue match fetched_node: case AsyncArray() | AsyncGroup(): yield fetched_node.basename, fetched_node case _: raise ValueError(f"Unexpected type: {type(fetched_node)}") async def _iter_members_deep( group: AsyncGroup, *, max_depth: int | None, skip_keys: tuple[str, ...], semaphore: asyncio.Semaphore | None = None, use_consolidated_for_children: bool = True, ) -> AsyncGenerator[tuple[str, AnyAsyncArray | AsyncGroup], None]: """ Iterate over the arrays and groups contained in a group, and optionally the arrays and groups contained in those groups. Parameters ---------- group : AsyncGroup The group to traverse. max_depth : int | None The maximum depth of recursion. skip_keys : tuple[str, ...] A tuple of keys to skip when iterating over the possible members of the group. semaphore : asyncio.Semaphore | None An optional semaphore to use for concurrency control. use_consolidated_for_children : bool, default True Whether to use the consolidated metadata of child groups loaded from the store. Note that this only affects groups loaded from the store. If the current Group already has consolidated metadata, it will always be used. Yields ------ tuple[str, AnyAsyncArray | AsyncGroup] """ to_recurse = {} do_recursion = max_depth is None or max_depth > 0 if max_depth is None: new_depth = None else: new_depth = max_depth - 1 async for name, node in _iter_members(group, skip_keys=skip_keys, semaphore=semaphore): is_group = isinstance(node, AsyncGroup) if ( is_group and not use_consolidated_for_children and node.metadata.consolidated_metadata is not None ): node = cast("AsyncGroup", node) # We've decided not to trust consolidated metadata at this point, because we're # reconsolidating the metadata, for example. node = replace(node, metadata=replace(node.metadata, consolidated_metadata=None)) yield name, node if is_group and do_recursion: node = cast("AsyncGroup", node) to_recurse[name] = _iter_members_deep( node, max_depth=new_depth, skip_keys=skip_keys, semaphore=semaphore ) for prefix, subgroup_iter in to_recurse.items(): async for name, node in subgroup_iter: key = f"{prefix}/{name}".lstrip("/") yield key, node async def _read_metadata_v3(store: Store, path: str) -> ArrayV3Metadata | GroupMetadata: """ Given a store_path, return ArrayV3Metadata or GroupMetadata defined by the metadata document stored at store_path.path / zarr.json. If no such document is found, raise a FileNotFoundError. """ zarr_json_bytes = await store.get( _join_paths([path, ZARR_JSON]), prototype=default_buffer_prototype() ) if zarr_json_bytes is None: raise FileNotFoundError(path) else: zarr_json = json.loads(zarr_json_bytes.to_bytes()) return _build_metadata_v3(zarr_json) async def _read_metadata_v2(store: Store, path: str) -> ArrayV2Metadata | GroupMetadata: """ Given a store_path, return ArrayV2Metadata or GroupMetadata defined by the metadata document stored at store_path.path / (.zgroup | .zarray). If no such document is found, raise a FileNotFoundError. """ # TODO: consider first fetching array metadata, and only fetching group metadata when we don't # find an array zarray_bytes, zgroup_bytes, zattrs_bytes = await asyncio.gather( store.get(_join_paths([path, ZARRAY_JSON]), prototype=default_buffer_prototype()), store.get(_join_paths([path, ZGROUP_JSON]), prototype=default_buffer_prototype()), store.get(_join_paths([path, ZATTRS_JSON]), prototype=default_buffer_prototype()), ) if zattrs_bytes is None: zattrs = {} else: zattrs = json.loads(zattrs_bytes.to_bytes()) # TODO: decide how to handle finding both array and group metadata. The spec does not seem to # consider this situation. A practical approach would be to ignore that combination, and only # return the array metadata. if zarray_bytes is not None: zmeta = json.loads(zarray_bytes.to_bytes()) else: if zgroup_bytes is None: # neither .zarray or .zgroup were found results in KeyError raise FileNotFoundError(path) else: zmeta = json.loads(zgroup_bytes.to_bytes()) return _build_metadata_v2(zmeta, zattrs) async def _read_group_metadata_v2(store: Store, path: str) -> GroupMetadata: """ Read group metadata or error """ meta = await _read_metadata_v2(store=store, path=path) if not isinstance(meta, GroupMetadata): raise FileNotFoundError(f"Group metadata was not found in {store} at {path}") return meta async def _read_group_metadata_v3(store: Store, path: str) -> GroupMetadata: """ Read group metadata or error """ meta = await _read_metadata_v3(store=store, path=path) if not isinstance(meta, GroupMetadata): raise FileNotFoundError(f"Group metadata was not found in {store} at {path}") return meta async def _read_group_metadata( store: Store, path: str, *, zarr_format: ZarrFormat ) -> GroupMetadata: if zarr_format == 2: return await _read_group_metadata_v2(store=store, path=path) return await _read_group_metadata_v3(store=store, path=path) def _build_metadata_v3(zarr_json: dict[str, JSON]) -> ArrayV3Metadata | GroupMetadata: """ Convert a dict representation of Zarr V3 metadata into the corresponding metadata class. """ if "node_type" not in zarr_json: msg = "Required key 'node_type' is missing from the provided metadata document." raise MetadataValidationError(msg) match zarr_json: case {"node_type": "array"}: return ArrayV3Metadata.from_dict(zarr_json) case {"node_type": "group"}: return GroupMetadata.from_dict(zarr_json) case _: # pragma: no cover raise ValueError( "invalid value for `node_type` key in metadata document" ) # pragma: no cover def _build_metadata_v2( zarr_json: dict[str, JSON], attrs_json: dict[str, JSON] ) -> ArrayV2Metadata | GroupMetadata: """ Convert a dict representation of Zarr V2 metadata into the corresponding metadata class. """ match zarr_json: case {"shape": _}: return ArrayV2Metadata.from_dict(zarr_json | {"attributes": attrs_json}) case _: # pragma: no cover return GroupMetadata.from_dict(zarr_json | {"attributes": attrs_json}) @overload def _build_node(*, store: Store, path: str, metadata: ArrayV2Metadata) -> AsyncArrayV2: ... @overload def _build_node(*, store: Store, path: str, metadata: ArrayV3Metadata) -> AsyncArrayV3: ... @overload def _build_node(*, store: Store, path: str, metadata: GroupMetadata) -> AsyncGroup: ... def _build_node( *, store: Store, path: str, metadata: ArrayV3Metadata | ArrayV2Metadata | GroupMetadata ) -> AnyAsyncArray | AsyncGroup: """ Take a metadata object and return a node (AsyncArray or AsyncGroup). """ store_path = StorePath(store=store, path=path) match metadata: case ArrayV2Metadata() | ArrayV3Metadata(): return AsyncArray(metadata, store_path=store_path) case GroupMetadata(): return AsyncGroup(metadata, store_path=store_path) case _: # pragma: no cover raise ValueError(f"Unexpected metadata type: {type(metadata)}") # pragma: no cover async def _get_node_v2(store: Store, path: str) -> AsyncArrayV2 | AsyncGroup: """ Read a Zarr v2 AsyncArray or AsyncGroup from a path in a Store. Parameters ---------- store : Store The store-like object to read from. path : str The path to the node to read. Returns ------- AsyncArray | AsyncGroup """ metadata = await _read_metadata_v2(store=store, path=path) return _build_node(store=store, path=path, metadata=metadata) async def _get_node_v3(store: Store, path: str) -> AsyncArrayV3 | AsyncGroup: """ Read a Zarr v3 AsyncArray or AsyncGroup from a path in a Store. Parameters ---------- store : Store The store-like object to read from. path : str The path to the node to read. Returns ------- AsyncArray | AsyncGroup """ metadata = await _read_metadata_v3(store=store, path=path) return _build_node(store=store, path=path, metadata=metadata) async def get_node(store: Store, path: str, zarr_format: ZarrFormat) -> AnyAsyncArray | AsyncGroup: """ Get an AsyncArray or AsyncGroup from a path in a Store. Parameters ---------- store : Store The store-like object to read from. path : str The path to the node to read. zarr_format : {2, 3} The zarr format of the node to read. Returns ------- AsyncArray | AsyncGroup """ match zarr_format: case 2: return await _get_node_v2(store=store, path=path) case 3: return await _get_node_v3(store=store, path=path) case _: # pragma: no cover raise ValueError(f"Unexpected zarr format: {zarr_format}") # pragma: no cover async def _set_return_key( *, store: Store, key: str, value: Buffer, semaphore: asyncio.Semaphore | None = None ) -> str: """ Write a value to storage at the given key. The key is returned. Useful when saving values via routines that return results in execution order, like asyncio.as_completed, because in this case we need to know which key was saved in order to yield the right object to the caller. Parameters ---------- store : Store The store to save the value to. key : str The key to save the value to. value : Buffer The value to save. semaphore : asyncio.Semaphore | None An optional semaphore to use to limit the number of concurrent writes. """ if semaphore is not None: async with semaphore: await store.set(key, value) else: await store.set(key, value) return key def _persist_metadata( store: Store, path: str, metadata: ArrayV2Metadata | ArrayV3Metadata | GroupMetadata, semaphore: asyncio.Semaphore | None = None, ) -> tuple[Coroutine[None, None, str], ...]: """ Prepare to save a metadata document to storage, returning a tuple of coroutines that must be awaited. """ to_save = metadata.to_buffer_dict(default_buffer_prototype()) return tuple( _set_return_key(store=store, key=_join_paths([path, key]), value=value, semaphore=semaphore) for key, value in to_save.items() ) async def create_rooted_hierarchy( *, store: Store, nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata], overwrite: bool = False, ) -> AsyncGroup | AnyAsyncArray: """ Create an ``AsyncGroup`` or ``AsyncArray`` from a store and a dict of metadata documents. This function ensures that its input contains a specification of a root node, calls ``create_hierarchy`` to create nodes, and returns the root node of the hierarchy. """ roots = _get_roots(nodes.keys()) if len(roots) != 1: msg = ( "The input does not specify a root node. " "This function can only create hierarchies that contain a root node, which is " "defined as a group that is ancestral to all the other arrays and " "groups in the hierarchy, or a single array." ) raise ValueError(msg) else: root_key = roots[0] nodes_created = [ x async for x in create_hierarchy(store=store, nodes=nodes, overwrite=overwrite) ] return dict(nodes_created)[root_key] zarr-python-3.1.5/src/zarr/core/indexing.py000066400000000000000000001560741511007055700207070ustar00rootroot00000000000000from __future__ import annotations import itertools import math import numbers import operator from collections.abc import Iterator, Sequence from dataclasses import dataclass from enum import Enum from functools import reduce from types import EllipsisType from typing import ( TYPE_CHECKING, Any, Generic, Literal, NamedTuple, Protocol, TypeAlias, TypeGuard, TypeVar, cast, runtime_checkable, ) import numpy as np import numpy.typing as npt from zarr.core.common import ceildiv, product from zarr.core.metadata import T_ArrayMetadata from zarr.errors import ( ArrayIndexError, BoundsCheckError, NegativeStepError, VindexInvalidSelectionError, ) if TYPE_CHECKING: from zarr.core.array import AsyncArray from zarr.core.buffer import NDArrayLikeOrScalar from zarr.core.chunk_grids import ChunkGrid from zarr.types import AnyArray IntSequence = list[int] | npt.NDArray[np.intp] ArrayOfIntOrBool = npt.NDArray[np.intp] | npt.NDArray[np.bool_] BasicSelector = int | slice | EllipsisType Selector = BasicSelector | ArrayOfIntOrBool BasicSelection = BasicSelector | tuple[BasicSelector, ...] # also used for BlockIndex CoordinateSelection = IntSequence | tuple[IntSequence, ...] MaskSelection = npt.NDArray[np.bool_] OrthogonalSelection = Selector | tuple[Selector, ...] Selection = BasicSelection | CoordinateSelection | MaskSelection | OrthogonalSelection CoordinateSelectionNormalized = tuple[npt.NDArray[np.intp], ...] SelectionNormalized = tuple[Selector, ...] | ArrayOfIntOrBool SelectionWithFields = Selection | str | Sequence[str] SelectorTuple = tuple[Selector, ...] | npt.NDArray[np.intp] | slice Fields = str | list[str] | tuple[str, ...] def err_too_many_indices(selection: Any, shape: tuple[int, ...]) -> None: raise IndexError(f"too many indices for array; expected {len(shape)}, got {len(selection)}") def _zarr_array_to_int_or_bool_array(arr: AnyArray) -> npt.NDArray[np.intp] | npt.NDArray[np.bool_]: if arr.dtype.kind in ("i", "b"): return np.asarray(arr) else: raise IndexError( f"Invalid array dtype: {arr.dtype}. Arrays used as indices must be of integer or boolean type" ) @runtime_checkable class Indexer(Protocol): shape: tuple[int, ...] drop_axes: tuple[int, ...] def __iter__(self) -> Iterator[ChunkProjection]: ... _ArrayIndexingOrder: TypeAlias = Literal["lexicographic"] def _iter_grid( grid_shape: Sequence[int], *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None, order: _ArrayIndexingOrder = "lexicographic", ) -> Iterator[tuple[int, ...]]: """ Iterate over the elements of grid of integers, with the option to restrict the domain of iteration to a contiguous subregion of that grid. Parameters ---------- grid_shape : Sequence[int] The size of the domain to iterate over. origin : Sequence[int] | None, default=None The first coordinate of the domain to return. selection_shape : Sequence[int] | None, default=None The shape of the selection. order : Literal["lexicographic"], default="lexicographic" The linear indexing order to use. Returns ------- Iterator[tuple[int, ...]] An iterator over tuples of integers Examples -------- ```python from zarr.core.indexing import _iter_grid tuple(_iter_grid((1,))) # ((0,),) tuple(_iter_grid((2,3))) # ((0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)) tuple(_iter_grid((2,3), origin=(1,1))) # ((1, 1), (1, 2)) tuple(_iter_grid((2,3), origin=(0,0), selection_shape=(2,2))) # ((0, 0), (0, 1), (1, 0), (1, 1)) ``` """ if origin is None: origin_parsed = (0,) * len(grid_shape) else: if len(origin) != len(grid_shape): msg = ( "Shape and origin parameters must have the same length." f"Got {len(grid_shape)} elements in shape, but {len(origin)} elements in origin." ) raise ValueError(msg) origin_parsed = tuple(origin) if selection_shape is None: selection_shape_parsed = tuple( g - o for o, g in zip(origin_parsed, grid_shape, strict=True) ) else: selection_shape_parsed = tuple(selection_shape) if order == "lexicographic": dimensions: tuple[range, ...] = () for idx, (o, gs, ss) in enumerate( zip(origin_parsed, grid_shape, selection_shape_parsed, strict=True) ): if o + ss > gs: raise IndexError( f"Invalid selection shape ({ss}) for origin ({o}) and grid shape ({gs}) at axis {idx}." ) dimensions += (range(o, o + ss),) return itertools.product(*(dimensions)) else: msg = f"Indexing order {order} is not supported at this time." # type: ignore[unreachable] # pragma: no cover raise NotImplementedError(msg) # pragma: no cover def _iter_regions( domain_shape: Sequence[int], region_shape: Sequence[int], *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None, order: _ArrayIndexingOrder = "lexicographic", trim_excess: bool = True, ) -> Iterator[tuple[slice, ...]]: """ Iterate over contiguous regions on a grid of integers, with the option to restrict the domain of iteration to a contiguous subregion of that grid. Parameters ---------- domain_shape : Sequence[int] The size of the domain to iterate over. region_shape : Sequence[int] The shape of the region to iterate over. origin : Sequence[int] | None, default=None The location, in grid coordinates, of the first region to return. selection_shape : Sequence[int] | None, default=None The shape of the selection, in grid coordinates. order : Literal["lexicographic"], default="lexicographic" The linear indexing order to use. Yields ------ Iterator[tuple[slice, ...]] An iterator over tuples of slices, where each slice spans a separate contiguous region Examples -------- ```python from zarr.core.indexing import _iter_regions tuple(_iter_regions((1,), (1,))) # ((slice(0, 1, 1),),) tuple(_iter_regions((2, 3), (1, 2))) # ((slice(0, 1, 1), slice(0, 2, 1)), (slice(1, 2, 1), slice(0, 2, 1))) tuple(_iter_regions((2,3), (1,2), origin=(1,1))) # ((slice(1, 2, 1), slice(1, 3, 1)), (slice(2, 3, 1), slice(1, 3, 1))) tuple(_iter_regions((2,3), (1,2), origin=(0,0), selection_shape=(2,2))) # ((slice(0, 1, 1), slice(0, 2, 1)), (slice(1, 2, 1), slice(0, 2, 1))) ``` """ grid_shape = tuple(ceildiv(d, s) for d, s in zip(domain_shape, region_shape, strict=True)) for grid_position in _iter_grid( grid_shape=grid_shape, origin=origin, selection_shape=selection_shape, order=order ): out: list[slice] = [] for g_pos, r_shape, d_shape in zip(grid_position, region_shape, domain_shape, strict=True): start = g_pos * r_shape stop = start + r_shape if trim_excess: stop = min(stop, d_shape) out.append(slice(start, stop, 1)) yield tuple(out) def is_integer(x: Any) -> TypeGuard[int]: """True if x is an integer (both pure Python or NumPy).""" return isinstance(x, numbers.Integral) and not is_bool(x) def is_bool(x: Any) -> TypeGuard[bool | np.bool_]: """True if x is a boolean (both pure Python or NumPy).""" return type(x) in [bool, np.bool_] def is_integer_list(x: Any) -> TypeGuard[list[int]]: """True if x is a list of integers.""" return isinstance(x, list) and len(x) > 0 and all(is_integer(i) for i in x) def is_bool_list(x: Any) -> TypeGuard[list[bool | np.bool_]]: """True if x is a list of boolean.""" return isinstance(x, list) and len(x) > 0 and all(is_bool(i) for i in x) def is_integer_array(x: Any, ndim: int | None = None) -> TypeGuard[npt.NDArray[np.intp]]: t = not np.isscalar(x) and hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype.kind in "ui" if ndim is not None: t = t and hasattr(x, "shape") and len(x.shape) == ndim return t def is_bool_array(x: Any, ndim: int | None = None) -> TypeGuard[npt.NDArray[np.bool_]]: t = hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype == bool if ndim is not None: t = t and hasattr(x, "shape") and len(x.shape) == ndim return t def is_int_or_bool_iterable(x: Any) -> bool: return is_integer_list(x) or is_integer_array(x) or is_bool_array(x) or is_bool_list(x) def is_scalar(value: Any, dtype: np.dtype[Any]) -> bool: if np.isscalar(value): return True if hasattr(value, "shape") and value.shape == (): return True return isinstance(value, tuple) and dtype.names is not None and len(value) == len(dtype.names) def is_pure_fancy_indexing(selection: Any, ndim: int) -> bool: """Check whether a selection contains only scalars or integer/bool array-likes. Parameters ---------- selection : tuple, slice, or scalar A valid selection value for indexing into arrays. Returns ------- is_pure : bool True if the selection is a pure fancy indexing expression (ie not mixed with boolean or slices). """ if is_bool_array(selection): # is mask selection return True if ndim == 1 and ( is_integer_list(selection) or is_integer_array(selection) or is_bool_list(selection) ): return True # if not, we go through the normal path below, because a 1-tuple # of integers is also allowed. no_slicing = ( isinstance(selection, tuple) and len(selection) == ndim and not (any(isinstance(elem, slice) or elem is Ellipsis for elem in selection)) ) return ( no_slicing and all( is_integer(elem) or is_integer_list(elem) or is_integer_array(elem) for elem in selection ) and any(is_integer_list(elem) or is_integer_array(elem) for elem in selection) ) def is_pure_orthogonal_indexing(selection: Selection, ndim: int) -> TypeGuard[OrthogonalSelection]: if not ndim: return False selection_normalized = (selection,) if not isinstance(selection, tuple) else selection # Case 1: Selection contains of iterable of integers or boolean if len(selection_normalized) == ndim and all( is_int_or_bool_iterable(s) for s in selection_normalized ): return True # Case 2: selection contains either zero or one integer iterables. # All other selection elements are slices or integers return ( len(selection_normalized) <= ndim and sum(is_int_or_bool_iterable(s) for s in selection_normalized) <= 1 and all( is_int_or_bool_iterable(s) or isinstance(s, int | slice) for s in selection_normalized ) ) def get_chunk_shape(chunk_grid: ChunkGrid) -> tuple[int, ...]: from zarr.core.chunk_grids import RegularChunkGrid assert isinstance(chunk_grid, RegularChunkGrid), ( "Only regular chunk grid is supported, currently." ) return chunk_grid.chunk_shape def normalize_integer_selection(dim_sel: int, dim_len: int) -> int: # normalize type to int dim_sel = int(dim_sel) # handle wraparound if dim_sel < 0: dim_sel = dim_len + dim_sel # handle out of bounds if dim_sel >= dim_len or dim_sel < 0: msg = f"index out of bounds for dimension with length {dim_len}" raise BoundsCheckError(msg) return dim_sel class ChunkDimProjection(NamedTuple): """A mapping from chunk to output array for a single dimension. Attributes ---------- dim_chunk_ix Index of chunk. dim_chunk_sel Selection of items from chunk array. dim_out_sel Selection of items in target (output) array. """ dim_chunk_ix: int dim_chunk_sel: Selector dim_out_sel: Selector | None is_complete_chunk: bool @dataclass(frozen=True) class IntDimIndexer: dim_sel: int dim_len: int dim_chunk_len: int nitems: int = 1 def __init__(self, dim_sel: int, dim_len: int, dim_chunk_len: int) -> None: object.__setattr__(self, "dim_sel", normalize_integer_selection(dim_sel, dim_len)) object.__setattr__(self, "dim_len", dim_len) object.__setattr__(self, "dim_chunk_len", dim_chunk_len) def __iter__(self) -> Iterator[ChunkDimProjection]: dim_chunk_ix = self.dim_sel // self.dim_chunk_len dim_offset = dim_chunk_ix * self.dim_chunk_len dim_chunk_sel = self.dim_sel - dim_offset dim_out_sel = None is_complete_chunk = self.dim_chunk_len == 1 yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk) @dataclass(frozen=True) class SliceDimIndexer: dim_len: int dim_chunk_len: int nitems: int nchunks: int start: int stop: int step: int def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int) -> None: # normalize start, stop, step = dim_sel.indices(dim_len) if step < 1: raise NegativeStepError("only slices with step >= 1 are supported.") object.__setattr__(self, "start", start) object.__setattr__(self, "stop", stop) object.__setattr__(self, "step", step) object.__setattr__(self, "dim_len", dim_len) object.__setattr__(self, "dim_chunk_len", dim_chunk_len) object.__setattr__(self, "nitems", max(0, ceildiv((stop - start), step))) object.__setattr__(self, "nchunks", ceildiv(dim_len, dim_chunk_len)) def __iter__(self) -> Iterator[ChunkDimProjection]: # figure out the range of chunks we need to visit dim_chunk_ix_from = 0 if self.start == 0 else self.start // self.dim_chunk_len dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len) # iterate over chunks in range for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to): # compute offsets for chunk within overall array dim_offset = dim_chunk_ix * self.dim_chunk_len dim_limit = min(self.dim_len, (dim_chunk_ix + 1) * self.dim_chunk_len) # determine chunk length, accounting for trailing chunk dim_chunk_len = dim_limit - dim_offset if self.start < dim_offset: # selection starts before current chunk dim_chunk_sel_start = 0 remainder = (dim_offset - self.start) % self.step if remainder: dim_chunk_sel_start += self.step - remainder # compute number of previous items, provides offset into output array dim_out_offset = ceildiv((dim_offset - self.start), self.step) else: # selection starts within current chunk dim_chunk_sel_start = self.start - dim_offset dim_out_offset = 0 if self.stop > dim_limit: # selection ends after current chunk dim_chunk_sel_stop = dim_chunk_len else: # selection ends within current chunk dim_chunk_sel_stop = self.stop - dim_offset dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step) dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step) # If there are no elements on the selection within this chunk, then skip if dim_chunk_nitems == 0: continue dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems) is_complete_chunk = ( dim_chunk_sel_start == 0 and (self.stop >= dim_limit) and self.step in [1, None] ) yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk) def check_selection_length(selection: SelectionNormalized, shape: tuple[int, ...]) -> None: if len(selection) > len(shape): err_too_many_indices(selection, shape) def replace_ellipsis(selection: Any, shape: tuple[int, ...]) -> SelectionNormalized: selection = ensure_tuple(selection) # count number of ellipsis present n_ellipsis = sum(1 for i in selection if i is Ellipsis) if n_ellipsis > 1: # more than 1 is an error raise IndexError("an index can only have a single ellipsis ('...')") elif n_ellipsis == 1: # locate the ellipsis, count how many items to left and right n_items_l = selection.index(Ellipsis) # items to left of ellipsis n_items_r = len(selection) - (n_items_l + 1) # items to right of ellipsis n_items = len(selection) - 1 # all non-ellipsis items if n_items >= len(shape): # ellipsis does nothing, just remove it selection = tuple(i for i in selection if i != Ellipsis) else: # replace ellipsis with as many slices are needed for number of dims new_item = selection[:n_items_l] + ((slice(None),) * (len(shape) - n_items)) if n_items_r: new_item += selection[-n_items_r:] selection = new_item # fill out selection if not completely specified if len(selection) < len(shape): selection += (slice(None),) * (len(shape) - len(selection)) # check selection not too long check_selection_length(selection, shape) return cast("SelectionNormalized", selection) def replace_lists(selection: SelectionNormalized) -> SelectionNormalized: return tuple( np.asarray(dim_sel) if isinstance(dim_sel, list) else dim_sel for dim_sel in selection ) T = TypeVar("T") def ensure_tuple(v: Any) -> SelectionNormalized: if not isinstance(v, tuple): v = (v,) return cast("SelectionNormalized", v) class ChunkProjection(NamedTuple): """A mapping of items from chunk to output array. Can be used to extract items from the chunk array for loading into an output array. Can also be used to extract items from a value array for setting/updating in a chunk array. Attributes ---------- chunk_coords Indices of chunk. chunk_selection Selection of items from chunk array. out_selection Selection of items in target (output) array. is_complete_chunk: True if a complete chunk is indexed """ chunk_coords: tuple[int, ...] chunk_selection: tuple[Selector, ...] | npt.NDArray[np.intp] out_selection: tuple[Selector, ...] | npt.NDArray[np.intp] | slice is_complete_chunk: bool def is_slice(s: Any) -> TypeGuard[slice]: return isinstance(s, slice) def is_contiguous_slice(s: Any) -> TypeGuard[slice]: return is_slice(s) and (s.step is None or s.step == 1) def is_positive_slice(s: Any) -> TypeGuard[slice]: return is_slice(s) and (s.step is None or s.step >= 1) def is_contiguous_selection(selection: Any) -> TypeGuard[slice]: selection = ensure_tuple(selection) return all((is_integer_array(s) or is_contiguous_slice(s) or s == Ellipsis) for s in selection) def is_basic_selection(selection: Any) -> TypeGuard[BasicSelection]: selection = ensure_tuple(selection) return all(is_integer(s) or is_positive_slice(s) for s in selection) @dataclass(frozen=True) class BasicIndexer(Indexer): dim_indexers: list[IntDimIndexer | SliceDimIndexer] shape: tuple[int, ...] drop_axes: tuple[int, ...] def __init__( self, selection: BasicSelection, shape: tuple[int, ...], chunk_grid: ChunkGrid, ) -> None: chunk_shape = get_chunk_shape(chunk_grid) # handle ellipsis selection_normalized = replace_ellipsis(selection, shape) # setup per-dimension indexers dim_indexers: list[IntDimIndexer | SliceDimIndexer] = [] for dim_sel, dim_len, dim_chunk_len in zip( selection_normalized, shape, chunk_shape, strict=True ): dim_indexer: IntDimIndexer | SliceDimIndexer if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) elif is_slice(dim_sel): dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) else: raise IndexError( "unsupported selection item for basic indexing; " f"expected integer or slice, got {type(dim_sel)!r}" ) dim_indexers.append(dim_indexer) object.__setattr__(self, "dim_indexers", dim_indexers) object.__setattr__( self, "shape", tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)), ) object.__setattr__(self, "drop_axes", ()) def __iter__(self) -> Iterator[ChunkProjection]: for dim_projections in itertools.product(*self.dim_indexers): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) out_selection = tuple( p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None ) is_complete_chunk = all(p.is_complete_chunk for p in dim_projections) yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk) @dataclass(frozen=True) class BoolArrayDimIndexer: dim_sel: npt.NDArray[np.bool_] dim_len: int dim_chunk_len: int nchunks: int chunk_nitems: npt.NDArray[Any] chunk_nitems_cumsum: npt.NDArray[Any] nitems: int dim_chunk_ixs: npt.NDArray[np.intp] def __init__(self, dim_sel: npt.NDArray[np.bool_], dim_len: int, dim_chunk_len: int) -> None: # check number of dimensions if not is_bool_array(dim_sel, 1): raise IndexError("Boolean arrays in an orthogonal selection must be 1-dimensional only") # check shape if dim_sel.shape[0] != dim_len: raise IndexError( f"Boolean array has the wrong length for dimension; expected {dim_len}, got {dim_sel.shape[0]}" ) # precompute number of selected items for each chunk nchunks = ceildiv(dim_len, dim_chunk_len) chunk_nitems = np.zeros(nchunks, dtype="i8") for dim_chunk_ix in range(nchunks): dim_offset = dim_chunk_ix * dim_chunk_len chunk_nitems[dim_chunk_ix] = np.count_nonzero( dim_sel[dim_offset : dim_offset + dim_chunk_len] ) chunk_nitems_cumsum = np.cumsum(chunk_nitems) nitems = chunk_nitems_cumsum[-1] dim_chunk_ixs = np.nonzero(chunk_nitems)[0] # store attributes object.__setattr__(self, "dim_sel", dim_sel) object.__setattr__(self, "dim_len", dim_len) object.__setattr__(self, "dim_chunk_len", dim_chunk_len) object.__setattr__(self, "nchunks", nchunks) object.__setattr__(self, "chunk_nitems", chunk_nitems) object.__setattr__(self, "chunk_nitems_cumsum", chunk_nitems_cumsum) object.__setattr__(self, "nitems", nitems) object.__setattr__(self, "dim_chunk_ixs", dim_chunk_ixs) def __iter__(self) -> Iterator[ChunkDimProjection]: # iterate over chunks with at least one item for dim_chunk_ix in self.dim_chunk_ixs: # find region in chunk dim_offset = dim_chunk_ix * self.dim_chunk_len dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] # pad out if final chunk if dim_chunk_sel.shape[0] < self.dim_chunk_len: tmp = np.zeros(self.dim_chunk_len, dtype=bool) tmp[: dim_chunk_sel.shape[0]] = dim_chunk_sel dim_chunk_sel = tmp # find region in output if dim_chunk_ix == 0: start = 0 else: start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] stop = self.chunk_nitems_cumsum[dim_chunk_ix] dim_out_sel = slice(start, stop) is_complete_chunk = False # TODO yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk) class Order(Enum): """ Enum for indexing order. """ UNKNOWN = 0 INCREASING = 1 DECREASING = 2 UNORDERED = 3 @staticmethod def check(a: npt.NDArray[Any]) -> Order: diff = np.diff(a) diff_positive = diff >= 0 n_diff_positive = np.count_nonzero(diff_positive) all_increasing = n_diff_positive == len(diff_positive) any_increasing = n_diff_positive > 0 if all_increasing: order = Order.INCREASING elif any_increasing: order = Order.UNORDERED else: order = Order.DECREASING return order def wraparound_indices(x: npt.NDArray[Any], dim_len: int) -> None: loc_neg = x < 0 if np.any(loc_neg): x[loc_neg] += dim_len def boundscheck_indices(x: npt.NDArray[Any], dim_len: int) -> None: if np.any(x < 0) or np.any(x >= dim_len): msg = f"index out of bounds for dimension with length {dim_len}" raise BoundsCheckError(msg) @dataclass(frozen=True) class IntArrayDimIndexer: """Integer array selection against a single dimension.""" dim_len: int dim_chunk_len: int nchunks: int nitems: int order: Order dim_sel: npt.NDArray[np.intp] dim_out_sel: npt.NDArray[np.intp] chunk_nitems: int dim_chunk_ixs: npt.NDArray[np.intp] chunk_nitems_cumsum: npt.NDArray[np.intp] def __init__( self, dim_sel: npt.NDArray[np.intp], dim_len: int, dim_chunk_len: int, wraparound: bool = True, boundscheck: bool = True, order: Order = Order.UNKNOWN, ) -> None: # ensure 1d array dim_sel = np.asanyarray(dim_sel) if not is_integer_array(dim_sel, 1): raise IndexError("integer arrays in an orthogonal selection must be 1-dimensional only") nitems = len(dim_sel) nchunks = ceildiv(dim_len, dim_chunk_len) # handle wraparound if wraparound: wraparound_indices(dim_sel, dim_len) # handle out of bounds if boundscheck: boundscheck_indices(dim_sel, dim_len) # determine which chunk is needed for each selection item # note: for dense integer selections, the division operation here is the # bottleneck dim_sel_chunk = dim_sel // dim_chunk_len # determine order of indices if order == Order.UNKNOWN: order = Order.check(dim_sel) order = Order(order) if order == Order.INCREASING: dim_out_sel = None elif order == Order.DECREASING: dim_sel = dim_sel[::-1] # TODO should be possible to do this without creating an arange dim_out_sel = np.arange(nitems - 1, -1, -1) else: # sort indices to group by chunk dim_out_sel = np.argsort(dim_sel_chunk) dim_sel = np.take(dim_sel, dim_out_sel) # precompute number of selected items for each chunk chunk_nitems = np.bincount(dim_sel_chunk, minlength=nchunks) # find chunks that we need to visit dim_chunk_ixs = np.nonzero(chunk_nitems)[0] # compute offsets into the output array chunk_nitems_cumsum = np.cumsum(chunk_nitems) # store attributes object.__setattr__(self, "dim_len", dim_len) object.__setattr__(self, "dim_chunk_len", dim_chunk_len) object.__setattr__(self, "nchunks", nchunks) object.__setattr__(self, "nitems", nitems) object.__setattr__(self, "order", order) object.__setattr__(self, "dim_sel", dim_sel) object.__setattr__(self, "dim_out_sel", dim_out_sel) object.__setattr__(self, "chunk_nitems", chunk_nitems) object.__setattr__(self, "dim_chunk_ixs", dim_chunk_ixs) object.__setattr__(self, "chunk_nitems_cumsum", chunk_nitems_cumsum) def __iter__(self) -> Iterator[ChunkDimProjection]: for dim_chunk_ix in self.dim_chunk_ixs: dim_out_sel: slice | npt.NDArray[np.intp] # find region in output if dim_chunk_ix == 0: start = 0 else: start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] stop = self.chunk_nitems_cumsum[dim_chunk_ix] if self.order == Order.INCREASING: dim_out_sel = slice(start, stop) else: dim_out_sel = self.dim_out_sel[start:stop] # find region in chunk dim_offset = dim_chunk_ix * self.dim_chunk_len dim_chunk_sel = self.dim_sel[start:stop] - dim_offset is_complete_chunk = False # TODO yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk) def slice_to_range(s: slice, length: int) -> range: return range(*s.indices(length)) def ix_(selection: Any, shape: tuple[int, ...]) -> npt.NDArray[np.intp]: """Convert an orthogonal selection to a numpy advanced (fancy) selection, like ``numpy.ix_`` but with support for slices and single ints.""" # normalisation selection = replace_ellipsis(selection, shape) # replace slice and int as these are not supported by numpy.ix_ selection = [ slice_to_range(dim_sel, dim_len) if isinstance(dim_sel, slice) else [dim_sel] if is_integer(dim_sel) else dim_sel for dim_sel, dim_len in zip(selection, shape, strict=True) ] # now get numpy to convert to a coordinate selection selection = np.ix_(*selection) return cast("npt.NDArray[np.intp]", selection) def oindex(a: npt.NDArray[Any], selection: Selection) -> npt.NDArray[Any]: """Implementation of orthogonal indexing with slices and ints.""" selection = replace_ellipsis(selection, a.shape) drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) selection = ix_(selection, a.shape) result = a[selection] if drop_axes: result = result.squeeze(axis=drop_axes) return result def oindex_set(a: npt.NDArray[Any], selection: Selection, value: Any) -> None: selection = replace_ellipsis(selection, a.shape) drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) selection = ix_(selection, a.shape) if not np.isscalar(value) and drop_axes: value = np.asanyarray(value) value_selection: list[Selector | None] = [slice(None)] * len(a.shape) for i in drop_axes: value_selection[i] = np.newaxis value = value[tuple(value_selection)] a[selection] = value @dataclass(frozen=True) class OrthogonalIndexer(Indexer): dim_indexers: list[IntDimIndexer | SliceDimIndexer | IntArrayDimIndexer | BoolArrayDimIndexer] shape: tuple[int, ...] chunk_shape: tuple[int, ...] is_advanced: bool drop_axes: tuple[int, ...] def __init__(self, selection: Selection, shape: tuple[int, ...], chunk_grid: ChunkGrid) -> None: chunk_shape = get_chunk_shape(chunk_grid) # handle ellipsis selection = replace_ellipsis(selection, shape) # normalize list to array selection = replace_lists(selection) # setup per-dimension indexers dim_indexers: list[ IntDimIndexer | SliceDimIndexer | IntArrayDimIndexer | BoolArrayDimIndexer ] = [] for dim_sel, dim_len, dim_chunk_len in zip(selection, shape, chunk_shape, strict=True): dim_indexer: IntDimIndexer | SliceDimIndexer | IntArrayDimIndexer | BoolArrayDimIndexer if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) elif isinstance(dim_sel, slice): dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) elif is_integer_array(dim_sel): dim_indexer = IntArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) elif is_bool_array(dim_sel): dim_indexer = BoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) else: raise IndexError( "unsupported selection item for orthogonal indexing; " "expected integer, slice, integer array or Boolean " f"array, got {type(dim_sel)!r}" ) dim_indexers.append(dim_indexer) shape = tuple(s.nitems for s in dim_indexers if not isinstance(s, IntDimIndexer)) is_advanced = not is_basic_selection(selection) if is_advanced: drop_axes = tuple( i for i, dim_indexer in enumerate(dim_indexers) if isinstance(dim_indexer, IntDimIndexer) ) else: drop_axes = () object.__setattr__(self, "dim_indexers", dim_indexers) object.__setattr__(self, "shape", shape) object.__setattr__(self, "chunk_shape", chunk_shape) object.__setattr__(self, "is_advanced", is_advanced) object.__setattr__(self, "drop_axes", drop_axes) def __iter__(self) -> Iterator[ChunkProjection]: for dim_projections in itertools.product(*self.dim_indexers): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection: tuple[Selector, ...] | npt.NDArray[Any] = tuple( p.dim_chunk_sel for p in dim_projections ) out_selection: tuple[Selector, ...] | npt.NDArray[Any] = tuple( p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None ) # handle advanced indexing arrays orthogonally if self.is_advanced: # N.B., numpy doesn't support orthogonal indexing directly as yet, # so need to work around via np.ix_. Also np.ix_ does not support a # mixture of arrays and slices or integers, so need to convert slices # and integers into ranges. chunk_selection = ix_(chunk_selection, self.chunk_shape) # special case for non-monotonic indices if not is_basic_selection(out_selection): out_selection = ix_(out_selection, self.shape) is_complete_chunk = all(p.is_complete_chunk for p in dim_projections) yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk) @dataclass(frozen=True) class OIndex: array: AnyArray # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool def __getitem__(self, selection: OrthogonalSelection | AnyArray) -> NDArrayLikeOrScalar: from zarr.core.array import Array # if input is a Zarr array, we materialize it now. if isinstance(selection, Array): selection = _zarr_array_to_int_or_bool_array(selection) fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) return self.array.get_orthogonal_selection( cast("OrthogonalSelection", new_selection), fields=fields ) def __setitem__(self, selection: OrthogonalSelection, value: npt.ArrayLike) -> None: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) return self.array.set_orthogonal_selection( cast("OrthogonalSelection", new_selection), value, fields=fields ) @dataclass(frozen=True) class AsyncOIndex(Generic[T_ArrayMetadata]): array: AsyncArray[T_ArrayMetadata] async def getitem(self, selection: OrthogonalSelection | AnyArray) -> NDArrayLikeOrScalar: from zarr.core.array import Array # if input is a Zarr array, we materialize it now. if isinstance(selection, Array): selection = _zarr_array_to_int_or_bool_array(selection) fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) return await self.array.get_orthogonal_selection( cast(OrthogonalSelection, new_selection), fields=fields ) @dataclass(frozen=True) class BlockIndexer(Indexer): dim_indexers: list[SliceDimIndexer] shape: tuple[int, ...] drop_axes: tuple[int, ...] def __init__( self, selection: BasicSelection, shape: tuple[int, ...], chunk_grid: ChunkGrid ) -> None: chunk_shape = get_chunk_shape(chunk_grid) # handle ellipsis selection_normalized = replace_ellipsis(selection, shape) # normalize list to array selection_normalized = replace_lists(selection_normalized) # setup per-dimension indexers dim_indexers = [] for dim_sel, dim_len, dim_chunk_size in zip( selection_normalized, shape, chunk_shape, strict=True ): dim_numchunks = int(np.ceil(dim_len / dim_chunk_size)) if is_integer(dim_sel): if dim_sel < 0: dim_sel = dim_numchunks + dim_sel start = dim_sel * dim_chunk_size stop = start + dim_chunk_size slice_ = slice(start, stop) elif is_slice(dim_sel): start = dim_sel.start if dim_sel.start is not None else 0 stop = dim_sel.stop if dim_sel.stop is not None else dim_numchunks if dim_sel.step not in {1, None}: raise IndexError( "unsupported selection item for block indexing; " f"expected integer or slice with step=1, got {type(dim_sel)!r}" ) # Can't reuse wraparound_indices because it expects a numpy array # We have integers here. if start < 0: start = dim_numchunks + start if stop < 0: stop = dim_numchunks + stop start *= dim_chunk_size stop *= dim_chunk_size slice_ = slice(start, stop) else: raise IndexError( "unsupported selection item for block indexing; " f"expected integer or slice, got {type(dim_sel)!r}" ) dim_indexer = SliceDimIndexer(slice_, dim_len, dim_chunk_size) dim_indexers.append(dim_indexer) if start >= dim_len or start < 0: msg = f"index out of bounds for dimension with length {dim_len}" raise BoundsCheckError(msg) shape = tuple(s.nitems for s in dim_indexers) object.__setattr__(self, "dim_indexers", dim_indexers) object.__setattr__(self, "shape", shape) object.__setattr__(self, "drop_axes", ()) def __iter__(self) -> Iterator[ChunkProjection]: for dim_projections in itertools.product(*self.dim_indexers): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) out_selection = tuple( p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None ) is_complete_chunk = all(p.is_complete_chunk for p in dim_projections) yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk) @dataclass(frozen=True) class BlockIndex: array: AnyArray def __getitem__(self, selection: BasicSelection) -> NDArrayLikeOrScalar: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) return self.array.get_block_selection(cast("BasicSelection", new_selection), fields=fields) def __setitem__(self, selection: BasicSelection, value: npt.ArrayLike) -> None: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) return self.array.set_block_selection( cast("BasicSelection", new_selection), value, fields=fields ) def is_coordinate_selection( selection: SelectionNormalized, shape: tuple[int, ...] ) -> TypeGuard[CoordinateSelectionNormalized]: return ( isinstance(selection, tuple) and len(selection) == len(shape) and all(is_integer(dim_sel) or is_integer_array(dim_sel) for dim_sel in selection) ) def is_mask_selection(selection: Selection, shape: tuple[int, ...]) -> TypeGuard[MaskSelection]: return ( isinstance(selection, tuple) and len(selection) == 1 and is_bool_array(selection[0]) and selection[0].shape == shape ) @dataclass(frozen=True) class CoordinateIndexer(Indexer): sel_shape: tuple[int, ...] selection: CoordinateSelectionNormalized sel_sort: npt.NDArray[np.intp] | None chunk_nitems_cumsum: npt.NDArray[np.intp] chunk_rixs: npt.NDArray[np.intp] chunk_mixs: tuple[npt.NDArray[np.intp], ...] shape: tuple[int, ...] chunk_shape: tuple[int, ...] drop_axes: tuple[int, ...] def __init__( self, selection: CoordinateSelection, shape: tuple[int, ...], chunk_grid: ChunkGrid ) -> None: chunk_shape = get_chunk_shape(chunk_grid) cdata_shape: tuple[int, ...] if shape == (): cdata_shape = (1,) else: cdata_shape = tuple(math.ceil(s / c) for s, c in zip(shape, chunk_shape, strict=True)) nchunks = reduce(operator.mul, cdata_shape, 1) # some initial normalization selection_normalized = cast("CoordinateSelectionNormalized", ensure_tuple(selection)) selection_normalized = tuple( np.asarray([i]) if is_integer(i) else i for i in selection_normalized ) selection_normalized = cast( "CoordinateSelectionNormalized", replace_lists(selection_normalized) ) # validation if not is_coordinate_selection(selection_normalized, shape): raise IndexError( "invalid coordinate selection; expected one integer " "(coordinate) array per dimension of the target array, " f"got {selection!r}" ) # handle wraparound, boundscheck for dim_sel, dim_len in zip(selection_normalized, shape, strict=True): # handle wraparound wraparound_indices(dim_sel, dim_len) # handle out of bounds boundscheck_indices(dim_sel, dim_len) # compute chunk index for each point in the selection chunks_multi_index = tuple( dim_sel // dim_chunk_len for (dim_sel, dim_chunk_len) in zip(selection_normalized, chunk_shape, strict=True) ) # broadcast selection - this will raise error if array dimensions don't match selection_broadcast = tuple(np.broadcast_arrays(*selection_normalized)) chunks_multi_index_broadcast = np.broadcast_arrays(*chunks_multi_index) # remember shape of selection, because we will flatten indices for processing sel_shape = selection_broadcast[0].shape or (1,) # flatten selection selection_broadcast = tuple(dim_sel.reshape(-1) for dim_sel in selection_broadcast) chunks_multi_index_broadcast = tuple( dim_chunks.reshape(-1) for dim_chunks in chunks_multi_index_broadcast ) # ravel chunk indices chunks_raveled_indices = np.ravel_multi_index( chunks_multi_index_broadcast, dims=cdata_shape ) # group points by chunk if np.any(np.diff(chunks_raveled_indices) < 0): # optimisation, only sort if needed sel_sort = np.argsort(chunks_raveled_indices) selection_broadcast = tuple(dim_sel[sel_sort] for dim_sel in selection_broadcast) else: sel_sort = None shape = selection_broadcast[0].shape or (1,) # precompute number of selected items for each chunk chunk_nitems = np.bincount(chunks_raveled_indices, minlength=nchunks) chunk_nitems_cumsum = np.cumsum(chunk_nitems) # locate the chunks we need to process chunk_rixs = np.nonzero(chunk_nitems)[0] # unravel chunk indices chunk_mixs = np.unravel_index(chunk_rixs, cdata_shape) object.__setattr__(self, "sel_shape", sel_shape) object.__setattr__(self, "selection", selection_broadcast) object.__setattr__(self, "sel_sort", sel_sort) object.__setattr__(self, "chunk_nitems_cumsum", chunk_nitems_cumsum) object.__setattr__(self, "chunk_rixs", chunk_rixs) object.__setattr__(self, "chunk_mixs", chunk_mixs) object.__setattr__(self, "chunk_shape", chunk_shape) object.__setattr__(self, "shape", shape) object.__setattr__(self, "drop_axes", ()) def __iter__(self) -> Iterator[ChunkProjection]: # iterate over chunks for i, chunk_rix in enumerate(self.chunk_rixs): chunk_coords = tuple(m[i] for m in self.chunk_mixs) if chunk_rix == 0: start = 0 else: start = self.chunk_nitems_cumsum[chunk_rix - 1] stop = self.chunk_nitems_cumsum[chunk_rix] out_selection: slice | npt.NDArray[np.intp] if self.sel_sort is None: out_selection = slice(start, stop) else: out_selection = self.sel_sort[start:stop] chunk_offsets = tuple( dim_chunk_ix * dim_chunk_len for dim_chunk_ix, dim_chunk_len in zip(chunk_coords, self.chunk_shape, strict=True) ) chunk_selection = tuple( dim_sel[start:stop] - dim_chunk_offset for (dim_sel, dim_chunk_offset) in zip(self.selection, chunk_offsets, strict=True) ) is_complete_chunk = False # TODO yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk) @dataclass(frozen=True) class MaskIndexer(CoordinateIndexer): def __init__( self, selection: MaskSelection, shape: tuple[int, ...], chunk_grid: ChunkGrid ) -> None: # some initial normalization selection_normalized = cast("tuple[MaskSelection]", ensure_tuple(selection)) selection_normalized = cast("tuple[MaskSelection]", replace_lists(selection_normalized)) # validation if not is_mask_selection(selection_normalized, shape): raise IndexError( "invalid mask selection; expected one Boolean (mask)" f"array with the same shape as the target array, got {selection_normalized!r}" ) # convert to indices selection_indices = np.nonzero(selection_normalized[0]) # delegate the rest to superclass super().__init__(selection_indices, shape, chunk_grid) @dataclass(frozen=True) class VIndex: array: AnyArray # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool def __getitem__( self, selection: CoordinateSelection | MaskSelection | AnyArray ) -> NDArrayLikeOrScalar: from zarr.core.array import Array # if input is a Zarr array, we materialize it now. if isinstance(selection, Array): selection = _zarr_array_to_int_or_bool_array(selection) fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) if is_coordinate_selection(new_selection, self.array.shape): return self.array.get_coordinate_selection(new_selection, fields=fields) elif is_mask_selection(new_selection, self.array.shape): return self.array.get_mask_selection(new_selection, fields=fields) else: msg = ( "unsupported selection type for vectorized indexing; only " "coordinate selection (tuple of integer arrays) and mask selection " f"(single Boolean array) are supported; got {new_selection!r}" ) raise VindexInvalidSelectionError(msg) def __setitem__( self, selection: CoordinateSelection | MaskSelection, value: npt.ArrayLike ) -> None: fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) if is_coordinate_selection(new_selection, self.array.shape): self.array.set_coordinate_selection(new_selection, value, fields=fields) elif is_mask_selection(new_selection, self.array.shape): self.array.set_mask_selection(new_selection, value, fields=fields) else: msg = ( "unsupported selection type for vectorized indexing; only " "coordinate selection (tuple of integer arrays) and mask selection " f"(single Boolean array) are supported; got {new_selection!r}" ) raise VindexInvalidSelectionError(msg) @dataclass(frozen=True) class AsyncVIndex(Generic[T_ArrayMetadata]): array: AsyncArray[T_ArrayMetadata] # TODO: develop Array generic and move zarr.Array[np.intp] | zarr.Array[np.bool_] to ArrayOfIntOrBool async def getitem( self, selection: CoordinateSelection | MaskSelection | AnyArray ) -> NDArrayLikeOrScalar: # TODO deduplicate these internals with the sync version of getitem # TODO requires solving this circular sync issue: https://github.com/zarr-developers/zarr-python/pull/3083#discussion_r2230737448 from zarr.core.array import Array # if input is a Zarr array, we materialize it now. if isinstance(selection, Array): selection = _zarr_array_to_int_or_bool_array(selection) fields, new_selection = pop_fields(selection) new_selection = ensure_tuple(new_selection) new_selection = replace_lists(new_selection) if is_coordinate_selection(new_selection, self.array.shape): return await self.array.get_coordinate_selection(new_selection, fields=fields) elif is_mask_selection(new_selection, self.array.shape): return await self.array.get_mask_selection(new_selection, fields=fields) else: msg = ( "unsupported selection type for vectorized indexing; only " "coordinate selection (tuple of integer arrays) and mask selection " f"(single Boolean array) are supported; got {new_selection!r}" ) raise VindexInvalidSelectionError(msg) def check_fields(fields: Fields | None, dtype: np.dtype[Any]) -> np.dtype[Any]: # early out if fields is None: return dtype # check type if not isinstance(fields, str | list | tuple): raise IndexError( f"'fields' argument must be a string or list of strings; found {type(fields)!r}" ) if fields: if dtype.names is None: raise IndexError("invalid 'fields' argument, array does not have any fields") try: if isinstance(fields, str): # single field selection out_dtype = dtype[fields] else: # multiple field selection out_dtype = np.dtype([(f, dtype[f]) for f in fields]) except KeyError as e: raise IndexError(f"invalid 'fields' argument, field not found: {e!r}") from e else: return out_dtype else: return dtype def check_no_multi_fields(fields: Fields | None) -> Fields | None: if isinstance(fields, list): if len(fields) == 1: return fields[0] elif len(fields) > 1: raise IndexError("multiple fields are not supported for this operation") return fields def pop_fields(selection: SelectionWithFields) -> tuple[Fields | None, Selection]: if isinstance(selection, str): # single field selection return selection, () elif not isinstance(selection, tuple): # single selection item, no fields # leave selection as-is return None, cast("Selection", selection) else: # multiple items, split fields from selection items fields: Fields = [f for f in selection if isinstance(f, str)] fields = fields[0] if len(fields) == 1 else fields selection_tuple = tuple(s for s in selection if not isinstance(s, str)) selection = cast( "Selection", selection_tuple[0] if len(selection_tuple) == 1 else selection_tuple ) return fields, selection def make_slice_selection(selection: Any) -> list[slice]: ls: list[slice] = [] for dim_selection in selection: if is_integer(dim_selection): ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) elif isinstance(dim_selection, np.ndarray): if len(dim_selection) == 1: ls.append(slice(int(dim_selection[0]), int(dim_selection[0]) + 1, 1)) else: raise ArrayIndexError else: ls.append(dim_selection) return ls def decode_morton(z: int, chunk_shape: tuple[int, ...]) -> tuple[int, ...]: # Inspired by compressed morton code as implemented in Neuroglancer # https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape) max_coords_bits = max(bits) input_bit = 0 input_value = z out = [0] * len(chunk_shape) for coord_bit in range(max_coords_bits): for dim in range(len(chunk_shape)): if coord_bit < bits[dim]: bit = (input_value >> input_bit) & 1 out[dim] |= bit << coord_bit input_bit += 1 return tuple(out) def morton_order_iter(chunk_shape: tuple[int, ...]) -> Iterator[tuple[int, ...]]: i = 0 order: list[tuple[int, ...]] = [] while len(order) < product(chunk_shape): m = decode_morton(i, chunk_shape) if m not in order and all(x < y for x, y in zip(m, chunk_shape, strict=False)): order.append(m) i += 1 for j in range(product(chunk_shape)): yield order[j] def c_order_iter(chunks_per_shard: tuple[int, ...]) -> Iterator[tuple[int, ...]]: return itertools.product(*(range(x) for x in chunks_per_shard)) def get_indexer( selection: SelectionWithFields, shape: tuple[int, ...], chunk_grid: ChunkGrid ) -> Indexer: _, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, len(shape)): new_selection = ensure_tuple(selection) new_selection = replace_lists(new_selection) if is_coordinate_selection(new_selection, shape): return CoordinateIndexer(cast("CoordinateSelection", selection), shape, chunk_grid) elif is_mask_selection(new_selection, shape): return MaskIndexer(cast("MaskSelection", selection), shape, chunk_grid) else: msg = ( "unsupported selection type for vectorized indexing; only " "coordinate selection (tuple of integer arrays) and mask selection " f"(single Boolean array) are supported; got {new_selection!r}" ) raise VindexInvalidSelectionError(msg) elif is_pure_orthogonal_indexing(pure_selection, len(shape)): return OrthogonalIndexer(cast("OrthogonalSelection", selection), shape, chunk_grid) else: return BasicIndexer(cast("BasicSelection", selection), shape, chunk_grid) zarr-python-3.1.5/src/zarr/core/metadata/000077500000000000000000000000001511007055700202735ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/core/metadata/__init__.py000066400000000000000000000010331511007055700224010ustar00rootroot00000000000000from typing import TypeAlias, TypeVar from .v2 import ArrayV2Metadata, ArrayV2MetadataDict from .v3 import ArrayMetadataJSON_V3, ArrayV3Metadata ArrayMetadata: TypeAlias = ArrayV2Metadata | ArrayV3Metadata ArrayMetadataDict: TypeAlias = ArrayV2MetadataDict | ArrayMetadataJSON_V3 T_ArrayMetadata = TypeVar("T_ArrayMetadata", ArrayV2Metadata, ArrayV3Metadata, covariant=True) __all__ = [ "ArrayMetadata", "ArrayMetadataDict", "ArrayMetadataJSON_V3", "ArrayV2Metadata", "ArrayV2MetadataDict", "ArrayV3Metadata", ] zarr-python-3.1.5/src/zarr/core/metadata/common.py000066400000000000000000000003771511007055700221440ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: from zarr.core.common import JSON def parse_attributes(data: dict[str, JSON] | None) -> dict[str, JSON]: if data is None: return {} return data zarr-python-3.1.5/src/zarr/core/metadata/io.py000066400000000000000000000061721511007055700212620ustar00rootroot00000000000000from __future__ import annotations import asyncio from typing import TYPE_CHECKING from zarr.abc.store import set_or_delete from zarr.core.buffer.core import default_buffer_prototype from zarr.errors import ContainsArrayError from zarr.storage._common import StorePath, ensure_no_existing_node if TYPE_CHECKING: from zarr.core.common import ZarrFormat from zarr.core.group import GroupMetadata from zarr.core.metadata import ArrayMetadata def _build_parents(store_path: StorePath, zarr_format: ZarrFormat) -> dict[str, GroupMetadata]: from zarr.core.group import GroupMetadata path = store_path.path if not path: return {} required_parts = path.split("/")[:-1] # the root group parents = {"": GroupMetadata(zarr_format=zarr_format)} for i, part in enumerate(required_parts): parent_path = "/".join(required_parts[:i] + [part]) parents[parent_path] = GroupMetadata(zarr_format=zarr_format) return parents async def save_metadata( store_path: StorePath, metadata: ArrayMetadata | GroupMetadata, ensure_parents: bool = False ) -> None: """Asynchronously save the array or group metadata. Parameters ---------- store_path : StorePath Location to save metadata. metadata : ArrayMetadata | GroupMetadata Metadata to save. ensure_parents : bool, optional Create any missing parent groups, and check no existing parents are arrays. Raises ------ ValueError """ to_save = metadata.to_buffer_dict(default_buffer_prototype()) set_awaitables = [set_or_delete(store_path / key, value) for key, value in to_save.items()] if ensure_parents: # To enable zarr.create(store, path="a/b/c"), we need to create all the intermediate groups. parents = _build_parents(store_path, metadata.zarr_format) ensure_array_awaitables = [] for parent_path, parent_metadata in parents.items(): parent_store_path = StorePath(store_path.store, parent_path) # Error if an array already exists at any parent location. Only groups can have child nodes. ensure_array_awaitables.append( ensure_no_existing_node( parent_store_path, parent_metadata.zarr_format, node_type="array" ) ) set_awaitables.extend( [ (parent_store_path / key).set_if_not_exists(value) for key, value in parent_metadata.to_buffer_dict( default_buffer_prototype() ).items() ] ) # Checks for parent arrays must happen first, before any metadata is modified try: await asyncio.gather(*ensure_array_awaitables) except ContainsArrayError as e: # clear awaitables to avoid RuntimeWarning: coroutine was never awaited for awaitable in set_awaitables: awaitable.close() raise ValueError( f"A parent of {store_path} is an array - only groups may have child nodes." ) from e await asyncio.gather(*set_awaitables) zarr-python-3.1.5/src/zarr/core/metadata/v2.py000066400000000000000000000304151511007055700211770ustar00rootroot00000000000000from __future__ import annotations import warnings from collections.abc import Iterable, Sequence from functools import cached_property from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast from zarr.abc.metadata import Metadata from zarr.abc.numcodec import Numcodec, _is_numcodec from zarr.core.chunk_grids import RegularChunkGrid from zarr.core.dtype import get_data_type_from_json from zarr.core.dtype.common import OBJECT_CODEC_IDS, DTypeSpec_V2 from zarr.errors import ZarrUserWarning from zarr.registry import get_numcodec if TYPE_CHECKING: from typing import Literal, Self import numpy.typing as npt from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.dtype.wrapper import ( TBaseDType, TBaseScalar, TDType_co, TScalar_co, ZDType, ) import json from dataclasses import dataclass, field, fields, replace import numpy as np from zarr.core.array_spec import ArrayConfig, ArraySpec from zarr.core.chunk_key_encodings import parse_separator from zarr.core.common import ( JSON, ZARRAY_JSON, ZATTRS_JSON, MemoryOrder, parse_shapelike, ) from zarr.core.config import config, parse_indexing_order from zarr.core.metadata.common import parse_attributes class ArrayV2MetadataDict(TypedDict): """ A typed dictionary model for Zarr format 2 metadata. """ zarr_format: Literal[2] attributes: dict[str, JSON] # Union of acceptable types for v2 compressors CompressorLikev2: TypeAlias = dict[str, JSON] | Numcodec | None @dataclass(frozen=True, kw_only=True) class ArrayV2Metadata(Metadata): shape: tuple[int, ...] chunks: tuple[int, ...] dtype: ZDType[TBaseDType, TBaseScalar] fill_value: int | float | str | bytes | None = None order: MemoryOrder = "C" filters: tuple[Numcodec, ...] | None = None dimension_separator: Literal[".", "/"] = "." compressor: Numcodec | None attributes: dict[str, JSON] = field(default_factory=dict) zarr_format: Literal[2] = field(init=False, default=2) def __init__( self, *, shape: tuple[int, ...], dtype: ZDType[TDType_co, TScalar_co], chunks: tuple[int, ...], fill_value: Any, order: MemoryOrder, dimension_separator: Literal[".", "/"] = ".", compressor: CompressorLikev2 = None, filters: Iterable[Numcodec | dict[str, JSON]] | None = None, attributes: dict[str, JSON] | None = None, ) -> None: """ Metadata for a Zarr format 2 array. """ shape_parsed = parse_shapelike(shape) chunks_parsed = parse_shapelike(chunks) compressor_parsed = parse_compressor(compressor) order_parsed = parse_indexing_order(order) dimension_separator_parsed = parse_separator(dimension_separator) filters_parsed = parse_filters(filters) fill_value_parsed: TBaseScalar | None if fill_value is not None: fill_value_parsed = dtype.cast_scalar(fill_value) else: fill_value_parsed = fill_value attributes_parsed = parse_attributes(attributes) object.__setattr__(self, "shape", shape_parsed) object.__setattr__(self, "dtype", dtype) object.__setattr__(self, "chunks", chunks_parsed) object.__setattr__(self, "compressor", compressor_parsed) object.__setattr__(self, "order", order_parsed) object.__setattr__(self, "dimension_separator", dimension_separator_parsed) object.__setattr__(self, "filters", filters_parsed) object.__setattr__(self, "fill_value", fill_value_parsed) object.__setattr__(self, "attributes", attributes_parsed) # ensure that the metadata document is consistent _ = parse_metadata(self) @property def ndim(self) -> int: return len(self.shape) @cached_property def chunk_grid(self) -> RegularChunkGrid: return RegularChunkGrid(chunk_shape=self.chunks) @property def shards(self) -> tuple[int, ...] | None: return None def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: zarray_dict = self.to_dict() zattrs_dict = zarray_dict.pop("attributes", {}) json_indent = config.get("json_indent") return { ZARRAY_JSON: prototype.buffer.from_bytes( json.dumps(zarray_dict, indent=json_indent, allow_nan=True).encode() ), ZATTRS_JSON: prototype.buffer.from_bytes( json.dumps(zattrs_dict, indent=json_indent, allow_nan=True).encode() ), } @classmethod def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata: # Make a copy to protect the original from modification. _data = data.copy() # Check that the zarr_format attribute is correct. _ = parse_zarr_format(_data.pop("zarr_format")) # To resolve a numpy object dtype array, we need to search for an object codec, # which could be in filters or as a compressor. # we will reference a hard-coded collection of object codec ids for this search. _filters, _compressor = (data.get("filters"), data.get("compressor")) if _filters is not None: _filters = cast("tuple[dict[str, JSON], ...]", _filters) object_codec_id = get_object_codec_id(tuple(_filters) + (_compressor,)) else: object_codec_id = get_object_codec_id((_compressor,)) # we add a layer of indirection here around the dtype attribute of the array metadata # because we also need to know the object codec id, if any, to resolve the data type dtype_spec: DTypeSpec_V2 = { "name": data["dtype"], "object_codec_id": object_codec_id, } dtype = get_data_type_from_json(dtype_spec, zarr_format=2) _data["dtype"] = dtype fill_value_encoded = _data.get("fill_value") if fill_value_encoded is not None: fill_value = dtype.from_json_scalar(fill_value_encoded, zarr_format=2) _data["fill_value"] = fill_value # zarr v2 allowed arbitrary keys here. # We don't want the ArrayV2Metadata constructor to fail just because someone put an # extra key in the metadata. expected = {x.name for x in fields(cls)} expected |= {"dtype", "chunks"} # check if `filters` is an empty sequence; if so use None instead and raise a warning filters = _data.get("filters") if ( isinstance(filters, Sequence) and not isinstance(filters, (str, bytes)) and len(filters) == 0 ): msg = ( "Found an empty list of filters in the array metadata document. " "This is contrary to the Zarr V2 specification, and will cause an error in the future. " "Use None (or Null in a JSON document) instead of an empty list of filters." ) warnings.warn(msg, ZarrUserWarning, stacklevel=1) _data["filters"] = None _data = {k: v for k, v in _data.items() if k in expected} return cls(**_data) def to_dict(self) -> dict[str, JSON]: zarray_dict = super().to_dict() if _is_numcodec(zarray_dict["compressor"]): codec_config = zarray_dict["compressor"].get_config() # Hotfix for https://github.com/zarr-developers/zarr-python/issues/2647 if codec_config["id"] == "zstd" and not codec_config.get("checksum", False): codec_config.pop("checksum") zarray_dict["compressor"] = codec_config if zarray_dict["filters"] is not None: raw_filters = zarray_dict["filters"] # TODO: remove this when we can stratically type the output JSON data structure # entirely if not isinstance(raw_filters, list | tuple): raise TypeError("Invalid type for filters. Expected a list or tuple.") new_filters = [] for f in raw_filters: if _is_numcodec(f): new_filters.append(f.get_config()) else: new_filters.append(f) zarray_dict["filters"] = new_filters # serialize the fill value after dtype-specific JSON encoding if self.fill_value is not None: fill_value = self.dtype.to_json_scalar(self.fill_value, zarr_format=2) zarray_dict["fill_value"] = fill_value # pull the "name" attribute out of the dtype spec returned by self.dtype.to_json zarray_dict["dtype"] = self.dtype.to_json(zarr_format=2)["name"] return zarray_dict def get_chunk_spec( self, _chunk_coords: tuple[int, ...], array_config: ArrayConfig, prototype: BufferPrototype ) -> ArraySpec: return ArraySpec( shape=self.chunks, dtype=self.dtype, fill_value=self.fill_value, config=array_config, prototype=prototype, ) def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str: chunk_identifier = self.dimension_separator.join(map(str, chunk_coords)) return "0" if chunk_identifier == "" else chunk_identifier def update_shape(self, shape: tuple[int, ...]) -> Self: return replace(self, shape=shape) def update_attributes(self, attributes: dict[str, JSON]) -> Self: return replace(self, attributes=attributes) def parse_dtype(data: npt.DTypeLike) -> np.dtype[Any]: if isinstance(data, list): # this is a valid _VoidDTypeLike check data = [tuple(d) for d in data] return np.dtype(data) def parse_zarr_format(data: object) -> Literal[2]: if data == 2: return 2 raise ValueError(f"Invalid value. Expected 2. Got {data}.") def parse_filters(data: object) -> tuple[Numcodec, ...] | None: """ Parse a potential tuple of filters """ out: list[Numcodec] = [] if data is None: return data if isinstance(data, Iterable): for idx, val in enumerate(data): if _is_numcodec(val): out.append(val) elif isinstance(val, dict): out.append(get_numcodec(val)) # type: ignore[arg-type] else: msg = f"Invalid filter at index {idx}. Expected a numcodecs.abc.Codec or a dict representation of numcodecs.abc.Codec. Got {type(val)} instead." raise TypeError(msg) if len(out) == 0: # Per the v2 spec, an empty tuple is not allowed -- use None to express "no filters" return None else: return tuple(out) # take a single codec instance and wrap it in a tuple if _is_numcodec(data): return (data,) msg = f"Invalid filters. Expected None, an iterable of numcodecs.abc.Codec or dict representations of numcodecs.abc.Codec. Got {type(data)} instead." raise TypeError(msg) def parse_compressor(data: object) -> Numcodec | None: """ Parse a potential compressor. """ if data is None or _is_numcodec(data): return data if isinstance(data, dict): return get_numcodec(data) # type: ignore[arg-type] msg = f"Invalid compressor. Expected None, a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead." raise ValueError(msg) def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata: if (l_chunks := len(data.chunks)) != (l_shape := len(data.shape)): msg = ( f"The `shape` and `chunks` attributes must have the same length. " f"`chunks` has length {l_chunks}, but `shape` has length {l_shape}." ) raise ValueError(msg) return data def get_object_codec_id(maybe_object_codecs: Sequence[JSON]) -> str | None: """ Inspect a sequence of codecs / filters for an "object codec", i.e. a codec that can serialize object arrays to contiguous bytes. Zarr python maintains a hard-coded set of object codec ids. If any element from the input has an id that matches one of the hard-coded object codec ids, that id is returned immediately. """ object_codec_id = None for maybe_object_codec in maybe_object_codecs: if ( isinstance(maybe_object_codec, dict) and maybe_object_codec.get("id") in OBJECT_CODEC_IDS ): return cast("str", maybe_object_codec["id"]) return object_codec_id zarr-python-3.1.5/src/zarr/core/metadata/v3.py000066400000000000000000000415411511007055700212020ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Mapping from typing import TYPE_CHECKING, NotRequired, TypedDict, TypeGuard, cast from zarr.abc.metadata import Metadata from zarr.core.buffer.core import default_buffer_prototype from zarr.core.dtype import VariableLengthUTF8, ZDType, get_data_type_from_json from zarr.core.dtype.common import check_dtype_spec_v3 if TYPE_CHECKING: from typing import Self from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.chunk_grids import ChunkGrid from zarr.core.common import JSON from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar import json from collections.abc import Iterable from dataclasses import dataclass, field, replace from typing import Any, Literal from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec from zarr.core.array_spec import ArrayConfig, ArraySpec from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid from zarr.core.chunk_key_encodings import ( ChunkKeyEncoding, ChunkKeyEncodingLike, parse_chunk_key_encoding, ) from zarr.core.common import ( JSON, ZARR_JSON, DimensionNames, NamedConfig, parse_named_configuration, parse_shapelike, ) from zarr.core.config import config from zarr.core.metadata.common import parse_attributes from zarr.errors import MetadataValidationError, NodeTypeValidationError, UnknownCodecError from zarr.registry import get_codec_class def parse_zarr_format(data: object) -> Literal[3]: if data == 3: return 3 msg = f"Invalid value for 'zarr_format'. Expected '3'. Got '{data}'." raise MetadataValidationError(msg) def parse_node_type_array(data: object) -> Literal["array"]: if data == "array": return "array" msg = f"Invalid value for 'node_type'. Expected 'array'. Got '{data}'." raise NodeTypeValidationError(msg) def parse_codecs(data: object) -> tuple[Codec, ...]: out: tuple[Codec, ...] = () if not isinstance(data, Iterable): raise TypeError(f"Expected iterable, got {type(data)}") for c in data: if isinstance( c, ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec ): # Can't use Codec here because of mypy limitation out += (c,) else: name_parsed, _ = parse_named_configuration(c, require_configuration=False) try: out += (get_codec_class(name_parsed).from_dict(c),) except KeyError as e: raise UnknownCodecError(f"Unknown codec: {e.args[0]!r}") from e return out def validate_array_bytes_codec(codecs: tuple[Codec, ...]) -> ArrayBytesCodec: # ensure that we have at least one ArrayBytesCodec abcs: list[ArrayBytesCodec] = [codec for codec in codecs if isinstance(codec, ArrayBytesCodec)] if len(abcs) == 0: raise ValueError("At least one ArrayBytesCodec is required.") elif len(abcs) > 1: raise ValueError("Only one ArrayBytesCodec is allowed.") return abcs[0] def validate_codecs(codecs: tuple[Codec, ...], dtype: ZDType[TBaseDType, TBaseScalar]) -> None: """Check that the codecs are valid for the given dtype""" from zarr.codecs.sharding import ShardingCodec abc = validate_array_bytes_codec(codecs) # Recursively resolve array-bytes codecs within sharding codecs while isinstance(abc, ShardingCodec): abc = validate_array_bytes_codec(abc.codecs) # we need to have special codecs if we are decoding vlen strings or bytestrings # TODO: use codec ID instead of class name codec_class_name = abc.__class__.__name__ # TODO: Fix typing here if isinstance(dtype, VariableLengthUTF8) and not codec_class_name == "VLenUTF8Codec": # type: ignore[unreachable] raise ValueError( f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`." ) def parse_dimension_names(data: object) -> tuple[str | None, ...] | None: if data is None: return data elif isinstance(data, Iterable) and all(isinstance(x, type(None) | str) for x in data): return tuple(data) else: msg = f"Expected either None or a iterable of str, got {type(data)}" raise TypeError(msg) def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]: """ Parse storage_transformers. Zarr python cannot use storage transformers at this time, so this function doesn't attempt to validate them. """ if data is None: return () if isinstance(data, Iterable): if len(tuple(data)) >= 1: return data # type: ignore[return-value] else: return () raise TypeError( f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead." ) class AllowedExtraField(TypedDict): """ This class models allowed extra fields in array metadata. They are ignored by Zarr Python. """ must_understand: Literal[False] def check_allowed_extra_field(data: object) -> TypeGuard[AllowedExtraField]: """ Check if the extra field is allowed according to the Zarr v3 spec. The object must be a mapping with a "must_understand" key set to `False`. """ return isinstance(data, Mapping) and data.get("must_understand") is False def parse_extra_fields( data: Mapping[str, AllowedExtraField] | None, ) -> dict[str, AllowedExtraField]: if data is None: return {} else: conflict_keys = ARRAY_METADATA_KEYS & set(data.keys()) if len(conflict_keys) > 0: msg = ( "Invalid extra fields. " "The following keys: " f"{sorted(conflict_keys)} " "are invalid because they collide with keys reserved for use by the " "array metadata document." ) raise ValueError(msg) return dict(data) class ArrayMetadataJSON_V3(TypedDict): """ A typed dictionary model for zarr v3 metadata. """ zarr_format: Literal[3] node_type: Literal["array"] data_type: str | NamedConfig[str, Mapping[str, object]] shape: tuple[int, ...] chunk_grid: NamedConfig[str, Mapping[str, object]] chunk_key_encoding: NamedConfig[str, Mapping[str, object]] fill_value: object codecs: tuple[str | NamedConfig[str, Mapping[str, object]], ...] attributes: NotRequired[Mapping[str, JSON]] storage_transformers: NotRequired[tuple[NamedConfig[str, Mapping[str, object]], ...]] dimension_names: NotRequired[tuple[str | None]] ARRAY_METADATA_KEYS = set(ArrayMetadataJSON_V3.__annotations__.keys()) @dataclass(frozen=True, kw_only=True) class ArrayV3Metadata(Metadata): shape: tuple[int, ...] data_type: ZDType[TBaseDType, TBaseScalar] chunk_grid: ChunkGrid chunk_key_encoding: ChunkKeyEncoding fill_value: Any codecs: tuple[Codec, ...] attributes: dict[str, Any] = field(default_factory=dict) dimension_names: tuple[str | None, ...] | None = None zarr_format: Literal[3] = field(default=3, init=False) node_type: Literal["array"] = field(default="array", init=False) storage_transformers: tuple[dict[str, JSON], ...] extra_fields: dict[str, AllowedExtraField] def __init__( self, *, shape: Iterable[int], data_type: ZDType[TBaseDType, TBaseScalar], chunk_grid: dict[str, JSON] | ChunkGrid | NamedConfig[str, Any], chunk_key_encoding: ChunkKeyEncodingLike, fill_value: object, codecs: Iterable[Codec | dict[str, JSON] | NamedConfig[str, Any] | str], attributes: dict[str, JSON] | None, dimension_names: DimensionNames, storage_transformers: Iterable[dict[str, JSON]] | None = None, extra_fields: Mapping[str, AllowedExtraField] | None = None, ) -> None: """ Because the class is a frozen dataclass, we set attributes using object.__setattr__ """ shape_parsed = parse_shapelike(shape) chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid) chunk_key_encoding_parsed = parse_chunk_key_encoding(chunk_key_encoding) dimension_names_parsed = parse_dimension_names(dimension_names) # Note: relying on a type method is numpy-specific fill_value_parsed = data_type.cast_scalar(fill_value) attributes_parsed = parse_attributes(attributes) codecs_parsed_partial = parse_codecs(codecs) storage_transformers_parsed = parse_storage_transformers(storage_transformers) extra_fields_parsed = parse_extra_fields(extra_fields) array_spec = ArraySpec( shape=shape_parsed, dtype=data_type, fill_value=fill_value_parsed, config=ArrayConfig.from_dict({}), # TODO: config is not needed here. prototype=default_buffer_prototype(), # TODO: prototype is not needed here. ) codecs_parsed = tuple(c.evolve_from_array_spec(array_spec) for c in codecs_parsed_partial) validate_codecs(codecs_parsed_partial, data_type) object.__setattr__(self, "shape", shape_parsed) object.__setattr__(self, "data_type", data_type) object.__setattr__(self, "chunk_grid", chunk_grid_parsed) object.__setattr__(self, "chunk_key_encoding", chunk_key_encoding_parsed) object.__setattr__(self, "codecs", codecs_parsed) object.__setattr__(self, "dimension_names", dimension_names_parsed) object.__setattr__(self, "fill_value", fill_value_parsed) object.__setattr__(self, "attributes", attributes_parsed) object.__setattr__(self, "storage_transformers", storage_transformers_parsed) object.__setattr__(self, "extra_fields", extra_fields_parsed) self._validate_metadata() def _validate_metadata(self) -> None: if isinstance(self.chunk_grid, RegularChunkGrid) and len(self.shape) != len( self.chunk_grid.chunk_shape ): raise ValueError( "`chunk_shape` and `shape` need to have the same number of dimensions." ) if self.dimension_names is not None and len(self.shape) != len(self.dimension_names): raise ValueError( "`dimension_names` and `shape` need to have the same number of dimensions." ) if self.fill_value is None: raise ValueError("`fill_value` is required.") for codec in self.codecs: codec.validate(shape=self.shape, dtype=self.data_type, chunk_grid=self.chunk_grid) @property def ndim(self) -> int: return len(self.shape) @property def dtype(self) -> ZDType[TBaseDType, TBaseScalar]: return self.data_type @property def chunks(self) -> tuple[int, ...]: if isinstance(self.chunk_grid, RegularChunkGrid): from zarr.codecs.sharding import ShardingCodec if len(self.codecs) == 1 and isinstance(self.codecs[0], ShardingCodec): sharding_codec = self.codecs[0] assert isinstance(sharding_codec, ShardingCodec) # for mypy return sharding_codec.chunk_shape else: return self.chunk_grid.chunk_shape msg = ( f"The `chunks` attribute is only defined for arrays using `RegularChunkGrid`." f"This array has a {self.chunk_grid} instead." ) raise NotImplementedError(msg) @property def shards(self) -> tuple[int, ...] | None: if isinstance(self.chunk_grid, RegularChunkGrid): from zarr.codecs.sharding import ShardingCodec if len(self.codecs) == 1 and isinstance(self.codecs[0], ShardingCodec): return self.chunk_grid.chunk_shape else: return None msg = ( f"The `shards` attribute is only defined for arrays using `RegularChunkGrid`." f"This array has a {self.chunk_grid} instead." ) raise NotImplementedError(msg) @property def inner_codecs(self) -> tuple[Codec, ...]: if isinstance(self.chunk_grid, RegularChunkGrid): from zarr.codecs.sharding import ShardingCodec if len(self.codecs) == 1 and isinstance(self.codecs[0], ShardingCodec): return self.codecs[0].codecs return self.codecs def get_chunk_spec( self, _chunk_coords: tuple[int, ...], array_config: ArrayConfig, prototype: BufferPrototype ) -> ArraySpec: assert isinstance(self.chunk_grid, RegularChunkGrid), ( "Currently, only regular chunk grid is supported" ) return ArraySpec( shape=self.chunk_grid.chunk_shape, dtype=self.dtype, fill_value=self.fill_value, config=array_config, prototype=prototype, ) def encode_chunk_key(self, chunk_coords: tuple[int, ...]) -> str: return self.chunk_key_encoding.encode_chunk_key(chunk_coords) def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]: json_indent = config.get("json_indent") d = self.to_dict() return { ZARR_JSON: prototype.buffer.from_bytes( json.dumps(d, allow_nan=True, indent=json_indent).encode() ) } @classmethod def from_dict(cls, data: dict[str, JSON]) -> Self: # make a copy because we are modifying the dict _data = data.copy() # check that the zarr_format attribute is correct _ = parse_zarr_format(_data.pop("zarr_format")) # check that the node_type attribute is correct _ = parse_node_type_array(_data.pop("node_type")) data_type_json = _data.pop("data_type") if not check_dtype_spec_v3(data_type_json): raise ValueError(f"Invalid data_type: {data_type_json!r}") data_type = get_data_type_from_json(data_type_json, zarr_format=3) # check that the fill value is consistent with the data type try: fill = _data.pop("fill_value") fill_value_parsed = data_type.from_json_scalar(fill, zarr_format=3) except ValueError as e: raise TypeError(f"Invalid fill_value: {fill!r}") from e # check if there are extra keys extra_keys = set(_data.keys()) - ARRAY_METADATA_KEYS allowed_extra_fields: dict[str, AllowedExtraField] = {} invalid_extra_fields = {} for key in extra_keys: val = _data[key] if check_allowed_extra_field(val): allowed_extra_fields[key] = val else: invalid_extra_fields[key] = val if len(invalid_extra_fields) > 0: msg = ( "Got a Zarr V3 metadata document with the following disallowed extra fields:" f"{sorted(invalid_extra_fields.keys())}." 'Extra fields are not allowed unless they are a dict with a "must_understand" key' "which is assigned the value `False`." ) raise MetadataValidationError(msg) # TODO: replace this with a real type check! _data_typed = cast(ArrayMetadataJSON_V3, _data) return cls( shape=_data_typed["shape"], chunk_grid=_data_typed["chunk_grid"], chunk_key_encoding=_data_typed["chunk_key_encoding"], codecs=_data_typed["codecs"], attributes=_data_typed.get("attributes", {}), # type: ignore[arg-type] dimension_names=_data_typed.get("dimension_names", None), fill_value=fill_value_parsed, data_type=data_type, extra_fields=allowed_extra_fields, storage_transformers=_data_typed.get("storage_transformers", ()), # type: ignore[arg-type] ) def to_dict(self) -> dict[str, JSON]: out_dict = super().to_dict() extra_fields = out_dict.pop("extra_fields") out_dict = out_dict | extra_fields # type: ignore[operator] out_dict["fill_value"] = self.data_type.to_json_scalar( self.fill_value, zarr_format=self.zarr_format ) if not isinstance(out_dict, dict): raise TypeError(f"Expected dict. Got {type(out_dict)}.") # if `dimension_names` is `None`, we do not include it in # the metadata document if out_dict["dimension_names"] is None: out_dict.pop("dimension_names") # TODO: replace the `to_dict` / `from_dict` on the `Metadata`` class with # to_json, from_json, and have ZDType inherit from `Metadata` # until then, we have this hack here, which relies on the fact that to_dict will pass through # any non-`Metadata` fields as-is. dtype_meta = out_dict["data_type"] if isinstance(dtype_meta, ZDType): out_dict["data_type"] = dtype_meta.to_json(zarr_format=3) # type: ignore[unreachable] return out_dict def update_shape(self, shape: tuple[int, ...]) -> Self: return replace(self, shape=shape) def update_attributes(self, attributes: dict[str, JSON]) -> Self: return replace(self, attributes=attributes) zarr-python-3.1.5/src/zarr/core/sync.py000066400000000000000000000162271511007055700200510ustar00rootroot00000000000000from __future__ import annotations import asyncio import atexit import logging import os import threading from concurrent.futures import ThreadPoolExecutor, wait from typing import TYPE_CHECKING, TypeVar from typing_extensions import ParamSpec from zarr.core.config import config if TYPE_CHECKING: from collections.abc import AsyncIterator, Awaitable, Callable, Coroutine from typing import Any logger = logging.getLogger(__name__) P = ParamSpec("P") T = TypeVar("T") # From https://github.com/fsspec/filesystem_spec/blob/master/fsspec/asyn.py iothread: list[threading.Thread | None] = [None] # dedicated IO thread loop: list[asyncio.AbstractEventLoop | None] = [ None ] # global event loop for any non-async instance _lock: threading.Lock | None = None # global lock placeholder _executor: ThreadPoolExecutor | None = None # global executor placeholder class SyncError(Exception): pass def _get_lock() -> threading.Lock: """Allocate or return a threading lock. The lock is allocated on first use to allow setting one lock per forked process. """ global _lock if not _lock: _lock = threading.Lock() return _lock def _get_executor() -> ThreadPoolExecutor: """Return Zarr Thread Pool Executor The executor is allocated on first use. """ global _executor if not _executor: max_workers = config.get("threading.max_workers", None) logger.debug("Creating Zarr ThreadPoolExecutor with max_workers=%s", max_workers) _executor = ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix="zarr_pool") _get_loop().set_default_executor(_executor) return _executor def cleanup_resources() -> None: global _executor if _executor: _executor.shutdown(wait=True, cancel_futures=True) _executor = None if loop[0] is not None: with _get_lock(): # Stop the event loop safely loop[0].call_soon_threadsafe(loop[0].stop) # Stop loop from another thread if iothread[0] is not None: iothread[0].join(timeout=0.2) # Add a timeout to avoid hanging if iothread[0].is_alive(): logger.warning( "Thread did not finish cleanly; forcefully closing the event loop." ) # Forcefully close the event loop to release resources loop[0].close() # dereference the loop and iothread loop[0] = None iothread[0] = None atexit.register(cleanup_resources) def reset_resources_after_fork() -> None: """ Ensure that global resources are reset after a fork. Without this function, forked processes will retain invalid references to the parent process's resources. """ global loop, iothread, _executor # These lines are excluded from coverage because this function only runs in a child process, # which is not observed by the test coverage instrumentation. Despite the apparent lack of # test coverage, this function should be adequately tested by any test that uses Zarr IO with # multiprocessing. loop[0] = None # pragma: no cover iothread[0] = None # pragma: no cover _executor = None # pragma: no cover # this is only available on certain operating systems if hasattr(os, "register_at_fork"): os.register_at_fork(after_in_child=reset_resources_after_fork) async def _runner(coro: Coroutine[Any, Any, T]) -> T | BaseException: """ Await a coroutine and return the result of running it. If awaiting the coroutine raises an exception, the exception will be returned. """ try: return await coro except Exception as ex: return ex def sync( coro: Coroutine[Any, Any, T], loop: asyncio.AbstractEventLoop | None = None, timeout: float | None = None, ) -> T: """ Make loop run coroutine until it returns. Runs in other thread """ if loop is None: # NB: if the loop is not running *yet*, it is OK to submit work # and we will wait for it loop = _get_loop() if _executor is None and config.get("threading.max_workers", None) is not None: # trigger executor creation and attach to loop _ = _get_executor() if not isinstance(loop, asyncio.AbstractEventLoop): raise TypeError(f"loop cannot be of type {type(loop)}") if loop.is_closed(): raise RuntimeError("Loop is not running") try: loop0 = asyncio.events.get_running_loop() if loop0 is loop: raise SyncError("Calling sync() from within a running loop") except RuntimeError: pass future = asyncio.run_coroutine_threadsafe(_runner(coro), loop) finished, unfinished = wait([future], return_when=asyncio.ALL_COMPLETED, timeout=timeout) if len(unfinished) > 0: raise TimeoutError(f"Coroutine {coro} failed to finish within {timeout} s") assert len(finished) == 1 return_result = next(iter(finished)).result() if isinstance(return_result, BaseException): raise return_result else: return return_result def _get_loop() -> asyncio.AbstractEventLoop: """Create or return the default fsspec IO loop The loop will be running on a separate thread. """ if loop[0] is None: with _get_lock(): # repeat the check just in case the loop got filled between the # previous two calls from another thread if loop[0] is None: logger.debug("Creating Zarr event loop") new_loop = asyncio.new_event_loop() loop[0] = new_loop iothread[0] = threading.Thread(target=new_loop.run_forever, name="zarr_io") assert iothread[0] is not None iothread[0].daemon = True iothread[0].start() assert loop[0] is not None return loop[0] async def _collect_aiterator(data: AsyncIterator[T]) -> tuple[T, ...]: """ Collect an entire async iterator into a tuple """ result = [x async for x in data] return tuple(result) def collect_aiterator(data: AsyncIterator[T]) -> tuple[T, ...]: """ Synchronously collect an entire async iterator into a tuple. """ return sync(_collect_aiterator(data)) class SyncMixin: def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: # TODO: refactor this to to take *args and **kwargs and pass those to the method # this should allow us to better type the sync wrapper return sync( coroutine, timeout=config.get("async.timeout"), ) def _sync_iter(self, async_iterator: AsyncIterator[T]) -> list[T]: async def iter_to_list() -> list[T]: return [item async for item in async_iterator] return self._sync(iter_to_list()) async def _with_semaphore( func: Callable[[], Awaitable[T]], semaphore: asyncio.Semaphore | None = None ) -> T: """ Await the result of invoking the no-argument-callable ``func`` within the context manager provided by a Semaphore, if one is provided. Otherwise, just await the result of invoking ``func``. """ if semaphore is None: return await func() async with semaphore: return await func() zarr-python-3.1.5/src/zarr/core/sync_group.py000066400000000000000000000136571511007055700212710ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING from zarr.core.group import Group, GroupMetadata, _parse_async_node from zarr.core.group import create_hierarchy as create_hierarchy_async from zarr.core.group import create_nodes as create_nodes_async from zarr.core.group import create_rooted_hierarchy as create_rooted_hierarchy_async from zarr.core.group import get_node as get_node_async from zarr.core.sync import _collect_aiterator, sync if TYPE_CHECKING: from collections.abc import Iterator from zarr.abc.store import Store from zarr.core.common import ZarrFormat from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata from zarr.types import AnyArray def create_nodes( *, store: Store, nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata] ) -> Iterator[tuple[str, Group | AnyArray]]: """Create a collection of arrays and / or groups concurrently. Note: no attempt is made to validate that these arrays and / or groups collectively form a valid Zarr hierarchy. It is the responsibility of the caller of this function to ensure that the ``nodes`` parameter satisfies any correctness constraints. Parameters ---------- store : Store The storage backend to use. nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata] A dictionary defining the hierarchy. The keys are the paths of the nodes in the hierarchy, and the values are the metadata of the nodes. The metadata must be either an instance of GroupMetadata, ArrayV3Metadata or ArrayV2Metadata. Yields ------ Group | Array The created nodes. """ coro = create_nodes_async(store=store, nodes=nodes) for key, value in sync(_collect_aiterator(coro)): yield key, _parse_async_node(value) def create_hierarchy( *, store: Store, nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata], overwrite: bool = False, ) -> Iterator[tuple[str, Group | AnyArray]]: """ Create a complete zarr hierarchy from a collection of metadata objects. This function will parse its input to ensure that the hierarchy is complete. Any implicit groups will be inserted as needed. For example, an input like ```{'a/b': GroupMetadata}``` will be parsed to ```{'': GroupMetadata, 'a': GroupMetadata, 'b': Groupmetadata}``` After input parsing, this function then creates all the nodes in the hierarchy concurrently. Arrays and Groups are yielded in the order they are created. This order is not stable and should not be relied on. Parameters ---------- store : Store The storage backend to use. nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata] A dictionary defining the hierarchy. The keys are the paths of the nodes in the hierarchy, relative to the root of the ``Store``. The root of the store can be specified with the empty string ``''``. The values are instances of ``GroupMetadata`` or ``ArrayMetadata``. Note that all values must have the same ``zarr_format`` -- it is an error to mix zarr versions in the same hierarchy. Leading "/" characters from keys will be removed. overwrite : bool Whether to overwrite existing nodes. Defaults to ``False``, in which case an error is raised instead of overwriting an existing array or group. This function will not erase an existing group unless that group is explicitly named in ``nodes``. If ``nodes`` defines implicit groups, e.g. ``{`'a/b/c': GroupMetadata}``, and a group already exists at path ``a``, then this function will leave the group at ``a`` as-is. Yields ------ tuple[str, Group | Array] This function yields (path, node) pairs, in the order the nodes were created. Examples -------- ```python from zarr import create_hierarchy from zarr.storage import MemoryStore from zarr.core.group import GroupMetadata store = MemoryStore() nodes = {'a': GroupMetadata(attributes={'name': 'leaf'})} nodes_created = dict(create_hierarchy(store=store, nodes=nodes)) print(nodes) # {'a': GroupMetadata(attributes={'name': 'leaf'}, zarr_format=3, consolidated_metadata=None, node_type='group')} ``` """ coro = create_hierarchy_async(store=store, nodes=nodes, overwrite=overwrite) for key, value in sync(_collect_aiterator(coro)): yield key, _parse_async_node(value) def create_rooted_hierarchy( *, store: Store, nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata], overwrite: bool = False, ) -> Group | AnyArray: """ Create a Zarr hierarchy with a root, and return the root node, which could be a ``Group`` or ``Array`` instance. Parameters ---------- store : Store The storage backend to use. nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata] A dictionary defining the hierarchy. The keys are the paths of the nodes in the hierarchy, and the values are the metadata of the nodes. The metadata must be either an instance of GroupMetadata, ArrayV3Metadata or ArrayV2Metadata. overwrite : bool Whether to overwrite existing nodes. Default is ``False``. Returns ------- Group | Array """ async_node = sync(create_rooted_hierarchy_async(store=store, nodes=nodes, overwrite=overwrite)) return _parse_async_node(async_node) def get_node(store: Store, path: str, zarr_format: ZarrFormat) -> AnyArray | Group: """ Get an Array or Group from a path in a Store. Parameters ---------- store : Store The store-like object to read from. path : str The path to the node to read. zarr_format : {2, 3} The zarr format of the node to read. Returns ------- Array | Group """ return _parse_async_node(sync(get_node_async(store=store, path=path, zarr_format=zarr_format))) zarr-python-3.1.5/src/zarr/creation.py000066400000000000000000000014421511007055700177420ustar00rootroot00000000000000""" Helpers for creating arrays. !!! warning "Deprecated" This sub-module is deprecated. All functions here are defined in the top level zarr namespace instead. """ import warnings from zarr.api.synchronous import ( array, create, empty, empty_like, full, full_like, ones, ones_like, open_array, open_like, zeros, zeros_like, ) from zarr.errors import ZarrDeprecationWarning __all__ = [ "array", "create", "empty", "empty_like", "full", "full_like", "ones", "ones_like", "open_array", "open_like", "zeros", "zeros_like", ] warnings.warn( "zarr.creation is deprecated. " "Import these functions from the top level zarr. namespace instead.", ZarrDeprecationWarning, stacklevel=2, ) zarr-python-3.1.5/src/zarr/dtype.py000066400000000000000000000035211511007055700172630ustar00rootroot00000000000000from zarr.core.dtype import ( Bool, Complex64, Complex128, DataTypeValidationError, DateTime64, DateTime64JSON_V2, DateTime64JSON_V3, FixedLengthUTF32, FixedLengthUTF32JSON_V2, FixedLengthUTF32JSON_V3, Float16, Float32, Float64, Int8, Int16, Int32, Int64, NullTerminatedBytes, NullterminatedBytesJSON_V2, NullTerminatedBytesJSON_V3, RawBytes, RawBytesJSON_V2, RawBytesJSON_V3, Structured, StructuredJSON_V2, StructuredJSON_V3, TimeDelta64, TimeDelta64JSON_V2, TimeDelta64JSON_V3, UInt8, UInt16, UInt32, UInt64, VariableLengthBytes, VariableLengthBytesJSON_V2, VariableLengthUTF8, VariableLengthUTF8JSON_V2, ZDType, data_type_registry, # Import for backwards compatibility, but not included in __all__ # so it doesn't show up in the docs parse_data_type, # noqa: F401 parse_dtype, ) __all__ = [ "Bool", "Complex64", "Complex128", "DataTypeValidationError", "DateTime64", "DateTime64JSON_V2", "DateTime64JSON_V3", "FixedLengthUTF32", "FixedLengthUTF32JSON_V2", "FixedLengthUTF32JSON_V3", "Float16", "Float32", "Float64", "Int8", "Int16", "Int32", "Int64", "NullTerminatedBytes", "NullTerminatedBytesJSON_V3", "NullterminatedBytesJSON_V2", "RawBytes", "RawBytesJSON_V2", "RawBytesJSON_V3", "Structured", "StructuredJSON_V2", "StructuredJSON_V3", "TimeDelta64", "TimeDelta64", "TimeDelta64JSON_V2", "TimeDelta64JSON_V3", "UInt8", "UInt16", "UInt32", "UInt64", "VariableLengthBytes", "VariableLengthBytesJSON_V2", "VariableLengthUTF8", "VariableLengthUTF8JSON_V2", "ZDType", "data_type_registry", "data_type_registry", "parse_dtype", ] zarr-python-3.1.5/src/zarr/errors.py000066400000000000000000000071371511007055700174610ustar00rootroot00000000000000__all__ = [ "ArrayIndexError", "ArrayNotFoundError", "BaseZarrError", "BoundsCheckError", "ContainsArrayAndGroupError", "ContainsArrayError", "ContainsGroupError", "GroupNotFoundError", "MetadataValidationError", "NegativeStepError", "NodeTypeValidationError", "UnstableSpecificationWarning", "VindexInvalidSelectionError", "ZarrDeprecationWarning", "ZarrFutureWarning", "ZarrRuntimeWarning", ] class BaseZarrError(ValueError): """ Base error which all zarr errors are sub-classed from. """ _msg: str = "{}" def __init__(self, *args: object) -> None: """ If a single argument is passed, treat it as a pre-formatted message. If multiple arguments are passed, they are used as arguments for a template string class variable. This behavior is deprecated. """ if len(args) == 1: super().__init__(args[0]) else: super().__init__(self._msg.format(*args)) class NodeNotFoundError(BaseZarrError, FileNotFoundError): """ Raised when a node (array or group) is not found at a certain path. """ class ArrayNotFoundError(NodeNotFoundError): """ Raised when an array isn't found at a certain path. """ _msg = "No array found in store {!r} at path {!r}" class GroupNotFoundError(NodeNotFoundError): """ Raised when a group isn't found at a certain path. """ _msg = "No group found in store {!r} at path {!r}" class ContainsGroupError(BaseZarrError): """Raised when a group already exists at a certain path.""" _msg = "A group exists in store {!r} at path {!r}." class ContainsArrayError(BaseZarrError): """Raised when an array already exists at a certain path.""" _msg = "An array exists in store {!r} at path {!r}." class ContainsArrayAndGroupError(BaseZarrError): """Raised when both array and group metadata are found at the same path.""" _msg = ( "Array and group metadata documents (.zarray and .zgroup) were both found in store " "{!r} at path {!r}. " "Only one of these files may be present in a given directory / prefix. " "Remove the .zarray file, or the .zgroup file, or both." ) class MetadataValidationError(BaseZarrError): """Raised when the Zarr metadata is invalid in some way""" _msg = "Invalid value for '{}'. Expected '{}'. Got '{}'." class UnknownCodecError(BaseZarrError): """ Raised when an unknown codec was used. """ class NodeTypeValidationError(MetadataValidationError): """ Specialized exception when the node_type of the metadata document is incorrect. This can be raised when the value is invalid or unexpected given the context, for example an 'array' node when we expected a 'group'. """ class ZarrFutureWarning(FutureWarning): """ A warning intended for end users raised to indicate deprecated features. """ class UnstableSpecificationWarning(ZarrFutureWarning): """ A warning raised to indicate that a feature is outside the Zarr specification. """ class ZarrDeprecationWarning(DeprecationWarning): """ A warning raised to indicate that a feature will be removed in a future release. """ class ZarrUserWarning(UserWarning): """ A warning raised to report problems with user code. """ class ZarrRuntimeWarning(RuntimeWarning): """ A warning for dubious runtime behavior. """ class VindexInvalidSelectionError(IndexError): ... class NegativeStepError(IndexError): ... class BoundsCheckError(IndexError): ... class ArrayIndexError(IndexError): ... zarr-python-3.1.5/src/zarr/experimental/000077500000000000000000000000001511007055700202605ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/experimental/__init__.py000066400000000000000000000001311511007055700223640ustar00rootroot00000000000000"""The experimental module is a site for exporting new or experimental Zarr features.""" zarr-python-3.1.5/src/zarr/experimental/cache_store.py000066400000000000000000000334571511007055700231250ustar00rootroot00000000000000from __future__ import annotations import asyncio import logging import time from collections import OrderedDict from typing import TYPE_CHECKING, Any, Literal from zarr.abc.store import ByteRequest, Store from zarr.storage._wrapper import WrapperStore logger = logging.getLogger(__name__) if TYPE_CHECKING: from zarr.core.buffer.core import Buffer, BufferPrototype class CacheStore(WrapperStore[Store]): """ A dual-store caching implementation for Zarr stores. This cache wraps any Store implementation and uses a separate Store instance as the cache backend. This provides persistent caching capabilities with time-based expiration, size-based eviction, and flexible cache storage options. Parameters ---------- store : Store The underlying store to wrap with caching cache_store : Store The store to use for caching (can be any Store implementation) max_age_seconds : int | None, optional Maximum age of cached entries in seconds. None means no expiration. Default is None. max_size : int | None, optional Maximum size of the cache in bytes. When exceeded, least recently used items are evicted. None means unlimited size. Default is None. Note: Individual values larger than max_size will not be cached. key_insert_times : dict[str, float] | None, optional Dictionary to track insertion times (using monotonic time). Primarily for internal use. Default is None (creates new dict). cache_set_data : bool, optional Whether to cache data when it's written to the store. Default is True. Examples -------- ```python import zarr from zarr.storage import MemoryStore from zarr.experimental.cache_store import CacheStore # Create a cached store source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( store=source_store, cache_store=cache_store, max_age_seconds=60, max_size=1024*1024 ) # Use it like any other store array = zarr.create(shape=(100,), store=cached_store) array[:] = 42 ``` """ _cache: Store max_age_seconds: int | Literal["infinity"] max_size: int | None key_insert_times: dict[str, float] cache_set_data: bool _cache_order: OrderedDict[str, None] # Track access order for LRU _current_size: int # Track current cache size _key_sizes: dict[str, int] # Track size of each cached key _lock: asyncio.Lock _hits: int # Cache hit counter _misses: int # Cache miss counter _evictions: int # Cache eviction counter def __init__( self, store: Store, *, cache_store: Store, max_age_seconds: int | str = "infinity", max_size: int | None = None, key_insert_times: dict[str, float] | None = None, cache_set_data: bool = True, ) -> None: super().__init__(store) if not cache_store.supports_deletes: msg = ( f"The provided cache store {cache_store} does not support deletes. " "The cache_store must support deletes for CacheStore to function properly." ) raise ValueError(msg) self._cache = cache_store # Validate and set max_age_seconds if isinstance(max_age_seconds, str): if max_age_seconds != "infinity": raise ValueError("max_age_seconds string value must be 'infinity'") self.max_age_seconds = "infinity" else: self.max_age_seconds = max_age_seconds self.max_size = max_size if key_insert_times is None: self.key_insert_times = {} else: self.key_insert_times = key_insert_times self.cache_set_data = cache_set_data self._cache_order = OrderedDict() self._current_size = 0 self._key_sizes = {} self._lock = asyncio.Lock() self._hits = 0 self._misses = 0 self._evictions = 0 def _is_key_fresh(self, key: str) -> bool: """Check if a cached key is still fresh based on max_age_seconds. Uses monotonic time for accurate elapsed time measurement. """ if self.max_age_seconds == "infinity": return True now = time.monotonic() elapsed = now - self.key_insert_times.get(key, 0) return elapsed < self.max_age_seconds async def _accommodate_value(self, value_size: int) -> None: """Ensure there is enough space in the cache for a new value. Must be called while holding self._lock. """ if self.max_size is None: return # Remove least recently used items until we have enough space while self._current_size + value_size > self.max_size and self._cache_order: # Get the least recently used key (first in OrderedDict) lru_key = next(iter(self._cache_order)) await self._evict_key(lru_key) async def _evict_key(self, key: str) -> None: """Evict a key from the cache. Must be called while holding self._lock. Updates size tracking atomically with deletion. """ try: key_size = self._key_sizes.get(key, 0) # Delete from cache store await self._cache.delete(key) # Update tracking after successful deletion self._remove_from_tracking(key) self._current_size = max(0, self._current_size - key_size) self._evictions += 1 logger.debug("_evict_key: evicted key %s, freed %d bytes", key, key_size) except Exception: logger.exception("_evict_key: failed to evict key %s", key) raise # Re-raise to signal eviction failure async def _cache_value(self, key: str, value: Buffer) -> None: """Cache a value with size tracking. This method holds the lock for the entire operation to ensure atomicity. """ value_size = len(value) # Check if value exceeds max size if self.max_size is not None and value_size > self.max_size: logger.warning( "_cache_value: value size %d exceeds max_size %d, skipping cache", value_size, self.max_size, ) return async with self._lock: # If key already exists, subtract old size first if key in self._key_sizes: old_size = self._key_sizes[key] self._current_size -= old_size logger.debug("_cache_value: updating existing key %s, old size %d", key, old_size) # Make room for the new value (this calls _evict_key_locked internally) await self._accommodate_value(value_size) # Update tracking atomically self._cache_order[key] = None # OrderedDict to track access order self._current_size += value_size self._key_sizes[key] = value_size self.key_insert_times[key] = time.monotonic() logger.debug("_cache_value: cached key %s with size %d bytes", key, value_size) async def _update_access_order(self, key: str) -> None: """Update the access order for LRU tracking.""" if key in self._cache_order: async with self._lock: # Move to end (most recently used) self._cache_order.move_to_end(key) def _remove_from_tracking(self, key: str) -> None: """Remove a key from all tracking structures. Must be called while holding self._lock. """ self._cache_order.pop(key, None) self.key_insert_times.pop(key, None) self._key_sizes.pop(key, None) async def _get_try_cache( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None ) -> Buffer | None: """Try to get data from cache first, falling back to source store.""" maybe_cached_result = await self._cache.get(key, prototype, byte_range) if maybe_cached_result is not None: logger.debug("_get_try_cache: key %s found in cache (HIT)", key) self._hits += 1 # Update access order for LRU await self._update_access_order(key) return maybe_cached_result else: logger.debug( "_get_try_cache: key %s not found in cache (MISS), fetching from store", key ) self._misses += 1 maybe_fresh_result = await super().get(key, prototype, byte_range) if maybe_fresh_result is None: # Key doesn't exist in source store await self._cache.delete(key) async with self._lock: self._remove_from_tracking(key) else: # Cache the newly fetched value await self._cache.set(key, maybe_fresh_result) await self._cache_value(key, maybe_fresh_result) return maybe_fresh_result async def _get_no_cache( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None ) -> Buffer | None: """Get data directly from source store and update cache.""" self._misses += 1 maybe_fresh_result = await super().get(key, prototype, byte_range) if maybe_fresh_result is None: # Key doesn't exist in source, remove from cache and tracking await self._cache.delete(key) async with self._lock: self._remove_from_tracking(key) else: logger.debug("_get_no_cache: key %s found in store, setting in cache", key) await self._cache.set(key, maybe_fresh_result) await self._cache_value(key, maybe_fresh_result) return maybe_fresh_result async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None, ) -> Buffer | None: """ Retrieve data from the store, using cache when appropriate. Parameters ---------- key : str The key to retrieve prototype : BufferPrototype Buffer prototype for creating the result buffer byte_range : ByteRequest, optional Byte range to retrieve Returns ------- Buffer | None The retrieved data, or None if not found """ if not self._is_key_fresh(key): logger.debug("get: key %s is not fresh, fetching from store", key) return await self._get_no_cache(key, prototype, byte_range) else: logger.debug("get: key %s is fresh, trying cache", key) return await self._get_try_cache(key, prototype, byte_range) async def set(self, key: str, value: Buffer) -> None: """ Store data in the underlying store and optionally in cache. Parameters ---------- key : str The key to store under value : Buffer The data to store """ logger.debug("set: setting key %s in store", key) await super().set(key, value) if self.cache_set_data: logger.debug("set: setting key %s in cache", key) await self._cache.set(key, value) await self._cache_value(key, value) else: logger.debug("set: deleting key %s from cache", key) await self._cache.delete(key) async with self._lock: self._remove_from_tracking(key) async def delete(self, key: str) -> None: """ Delete data from both the underlying store and cache. Parameters ---------- key : str The key to delete """ logger.debug("delete: deleting key %s from store", key) await super().delete(key) logger.debug("delete: deleting key %s from cache", key) await self._cache.delete(key) async with self._lock: self._remove_from_tracking(key) def cache_info(self) -> dict[str, Any]: """Return information about the cache state.""" return { "cache_store_type": type(self._cache).__name__, "max_age_seconds": "infinity" if self.max_age_seconds == "infinity" else self.max_age_seconds, "max_size": self.max_size, "current_size": self._current_size, "cache_set_data": self.cache_set_data, "tracked_keys": len(self.key_insert_times), "cached_keys": len(self._cache_order), } def cache_stats(self) -> dict[str, Any]: """Return cache performance statistics.""" total_requests = self._hits + self._misses hit_rate = self._hits / total_requests if total_requests > 0 else 0.0 return { "hits": self._hits, "misses": self._misses, "evictions": self._evictions, "total_requests": total_requests, "hit_rate": hit_rate, } async def clear_cache(self) -> None: """Clear all cached data and tracking information.""" # Clear the cache store if it supports clear if hasattr(self._cache, "clear"): await self._cache.clear() # Reset tracking async with self._lock: self.key_insert_times.clear() self._cache_order.clear() self._key_sizes.clear() self._current_size = 0 logger.debug("clear_cache: cleared all cache data") def __repr__(self) -> str: """Return string representation of the cache store.""" return ( f"{self.__class__.__name__}(" f"store={self._store!r}, " f"cache_store={self._cache!r}, " f"max_age_seconds={self.max_age_seconds}, " f"max_size={self.max_size}, " f"current_size={self._current_size}, " f"cached_keys={len(self._cache_order)})" ) zarr-python-3.1.5/src/zarr/metadata/000077500000000000000000000000001511007055700173435ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/metadata/__init__.py000066400000000000000000000000001511007055700214420ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/metadata/migrate_v3.py000066400000000000000000000243411511007055700217610ustar00rootroot00000000000000import asyncio import logging from typing import Literal, cast import numcodecs.abc import zarr from zarr import Group from zarr.abc.codec import ArrayArrayCodec, BytesBytesCodec, Codec from zarr.abc.store import Store from zarr.codecs.blosc import BloscCodec, BloscShuffle from zarr.codecs.bytes import BytesCodec from zarr.codecs.gzip import GzipCodec from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec from zarr.core.buffer.core import default_buffer_prototype from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding from zarr.core.common import ( ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZMETADATA_V2_JSON, ZarrFormat, ) from zarr.core.dtype.common import HasEndianness from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType from zarr.core.group import GroupMetadata from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import sync from zarr.registry import get_codec_class from zarr.storage import StorePath from zarr.types import AnyArray _logger = logging.getLogger(__name__) def migrate_v2_to_v3( *, input_store: Store, output_store: Store | None = None, dry_run: bool = False, ) -> None: """Migrate all v2 metadata in a Zarr store to v3. This will create a zarr.json file at each level of a Zarr hierarchy (for every group / array). v2 files (.zarray, .zattrs etc.) will be left as-is. Parameters ---------- input_store : Store Input Zarr to migrate. output_store : Store, optional Output location to write v3 metadata (no array data will be copied). If not provided, v3 metadata will be written to input_store. dry_run : bool, optional Enable a 'dry run' - files that would be created are logged, but no files are created or changed. """ zarr_v2 = zarr.open(store=input_store, mode="r+") if output_store is not None: # w- access to not allow overwrite of existing data output_path = sync(StorePath.open(output_store, path="", mode="w-")) else: output_path = zarr_v2.store_path migrate_to_v3(zarr_v2, output_path, dry_run=dry_run) def migrate_to_v3(zarr_v2: AnyArray | Group, output_path: StorePath, dry_run: bool = False) -> None: """Migrate all v2 metadata in a Zarr array/group to v3. Note - if a group is provided, then all arrays / groups within this group will also be converted. A zarr.json file will be created for each level and written to output_path, with any v2 files (.zarray, .zattrs etc.) left as-is. Parameters ---------- zarr_v2 : Array | Group An array or group with zarr_format = 2 output_path : StorePath The store path to write generated v3 metadata to. dry_run : bool, optional Enable a 'dry run' - files that would be created are logged, but no files are created or changed. """ if not zarr_v2.metadata.zarr_format == 2: raise TypeError("Only arrays / groups with zarr v2 metadata can be converted") if isinstance(zarr_v2.metadata, GroupMetadata): _convert_group(zarr_v2, output_path, dry_run) else: _convert_array(zarr_v2, output_path, dry_run) async def remove_metadata( store: Store, zarr_format: ZarrFormat, force: bool = False, dry_run: bool = False, ) -> None: """Remove all v2 (.zarray, .zattrs, .zgroup, .zmetadata) or v3 (zarr.json) metadata files from the given Zarr. Note - this will remove metadata files at all levels of the hierarchy (every group and array). Parameters ---------- store : Store Zarr to remove metadata from. zarr_format : ZarrFormat Which format's metadata to remove - 2 or 3. force : bool, optional When False, metadata can only be removed if a valid alternative exists e.g. deletion of v2 metadata will only be allowed when v3 metadata is also present. When True, metadata can be removed when there is no alternative. dry_run : bool, optional Enable a 'dry run' - files that would be deleted are logged, but no files are removed or changed. """ if not store.supports_deletes: raise ValueError("Store must support deletes to remove metadata") store_path = await StorePath.open(store, path="", mode="r+") metadata_files_all = { 2: [ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON, ZMETADATA_V2_JSON], 3: [ZARR_JSON], } if zarr_format == 2: alternative_metadata = 3 else: alternative_metadata = 2 awaitables = [] async for file_path in store.list(): parent_path, _, file_name = file_path.rpartition("/") if file_name not in metadata_files_all[zarr_format]: continue if force or await _metadata_exists( cast(Literal[2, 3], alternative_metadata), store_path / parent_path ): _logger.info("Deleting metadata at %s", store_path / file_path) if not dry_run: awaitables.append((store_path / file_path).delete()) else: raise ValueError( f"Cannot remove v{zarr_format} metadata at {store_path / file_path} - no v{alternative_metadata} " "metadata exists. To delete anyway, use the 'force' option." ) await asyncio.gather(*awaitables) def _convert_group(zarr_v2: Group, output_path: StorePath, dry_run: bool) -> None: if zarr_v2.metadata.consolidated_metadata is not None: raise NotImplementedError("Migration of consolidated metadata isn't supported.") # process members of the group for key in zarr_v2: migrate_to_v3(zarr_v2[key], output_path=output_path / key, dry_run=dry_run) # write group's converted metadata group_metadata_v3 = GroupMetadata( attributes=zarr_v2.metadata.attributes, zarr_format=3, consolidated_metadata=None ) sync(_save_v3_metadata(group_metadata_v3, output_path, dry_run=dry_run)) def _convert_array(zarr_v2: AnyArray, output_path: StorePath, dry_run: bool) -> None: array_metadata_v3 = _convert_array_metadata(cast(ArrayV2Metadata, zarr_v2.metadata)) sync(_save_v3_metadata(array_metadata_v3, output_path, dry_run=dry_run)) async def _metadata_exists(zarr_format: ZarrFormat, store_path: StorePath) -> bool: metadata_files_required = {2: [ZARRAY_JSON, ZGROUP_JSON], 3: [ZARR_JSON]} for metadata_file in metadata_files_required[zarr_format]: if await (store_path / metadata_file).exists(): return True return False def _convert_array_metadata(metadata_v2: ArrayV2Metadata) -> ArrayV3Metadata: chunk_key_encoding = V2ChunkKeyEncoding(separator=metadata_v2.dimension_separator) codecs: list[Codec] = [] # array-array codecs if metadata_v2.order == "F": # F is equivalent to order: n-1, ... 1, 0 codecs.append(TransposeCodec(order=list(range(len(metadata_v2.shape) - 1, -1, -1)))) if metadata_v2.filters is not None: codecs.extend(_convert_filters(metadata_v2.filters)) # array-bytes codecs if not isinstance(metadata_v2.dtype, HasEndianness): codecs.append(BytesCodec(endian=None)) else: codecs.append(BytesCodec(endian=metadata_v2.dtype.endianness)) # bytes-bytes codecs if metadata_v2.compressor is not None: bytes_bytes_codec = _convert_compressor(metadata_v2.compressor, metadata_v2.dtype) codecs.append(bytes_bytes_codec) return ArrayV3Metadata( shape=metadata_v2.shape, data_type=metadata_v2.dtype, chunk_grid=metadata_v2.chunk_grid, chunk_key_encoding=chunk_key_encoding, fill_value=metadata_v2.fill_value, codecs=codecs, attributes=metadata_v2.attributes, dimension_names=None, storage_transformers=None, ) def _convert_filters(filters: tuple[numcodecs.abc.Codec, ...]) -> list[ArrayArrayCodec]: filters_codecs = [_find_numcodecs_zarr3(filter) for filter in filters] for codec in filters_codecs: if not isinstance(codec, ArrayArrayCodec): raise TypeError(f"Filter {type(codec)} is not an ArrayArrayCodec") return cast(list[ArrayArrayCodec], filters_codecs) def _convert_compressor( compressor: numcodecs.abc.Codec, dtype: ZDType[TBaseDType, TBaseScalar] ) -> BytesBytesCodec: match compressor.codec_id: case "blosc": return BloscCodec( typesize=dtype.to_native_dtype().itemsize, cname=compressor.cname, clevel=compressor.clevel, shuffle=BloscShuffle.from_int(compressor.shuffle), blocksize=compressor.blocksize, ) case "zstd": return ZstdCodec( level=compressor.level, checksum=compressor.checksum, ) case "gzip": return GzipCodec(level=compressor.level) case _: # If possible, find matching zarr.codecs.numcodecs codec compressor_codec = _find_numcodecs_zarr3(compressor) if not isinstance(compressor_codec, BytesBytesCodec): raise TypeError(f"Compressor {type(compressor_codec)} is not a BytesBytesCodec") return compressor_codec def _find_numcodecs_zarr3(numcodecs_codec: numcodecs.abc.Codec) -> Codec: """Find matching zarr.codecs.numcodecs codec (if it exists)""" numcodec_name = f"numcodecs.{numcodecs_codec.codec_id}" numcodec_dict = { "name": numcodec_name, "configuration": numcodecs_codec.get_config(), } try: codec_v3 = get_codec_class(numcodec_name) except KeyError as exc: raise ValueError( f"Couldn't find corresponding zarr.codecs.numcodecs codec for {numcodecs_codec.codec_id}" ) from exc return codec_v3.from_dict(numcodec_dict) async def _save_v3_metadata( metadata_v3: ArrayV3Metadata | GroupMetadata, output_path: StorePath, dry_run: bool = False ) -> None: zarr_json_path = output_path / ZARR_JSON if await zarr_json_path.exists(): raise ValueError(f"{ZARR_JSON} already exists at {zarr_json_path}") _logger.info("Saving metadata to %s", zarr_json_path) to_save = metadata_v3.to_buffer_dict(default_buffer_prototype()) if not dry_run: await zarr_json_path.set_if_not_exists(to_save[ZARR_JSON]) zarr-python-3.1.5/src/zarr/py.typed000066400000000000000000000000001511007055700172500ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/registry.py000066400000000000000000000263421511007055700200140ustar00rootroot00000000000000from __future__ import annotations import warnings from collections import defaultdict from importlib.metadata import entry_points as get_entry_points from typing import TYPE_CHECKING, Any, Generic, TypeVar from zarr.core.config import BadConfigError, config from zarr.core.dtype import data_type_registry from zarr.errors import ZarrUserWarning if TYPE_CHECKING: from importlib.metadata import EntryPoint from zarr.abc.codec import ( ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec, CodecJSON_V2, CodecPipeline, ) from zarr.abc.numcodec import Numcodec from zarr.core.buffer import Buffer, NDBuffer from zarr.core.chunk_key_encodings import ChunkKeyEncoding from zarr.core.common import JSON __all__ = [ "Registry", "get_buffer_class", "get_chunk_key_encoding_class", "get_codec_class", "get_ndbuffer_class", "get_pipeline_class", "register_buffer", "register_chunk_key_encoding", "register_codec", "register_ndbuffer", "register_pipeline", ] T = TypeVar("T") class Registry(dict[str, type[T]], Generic[T]): def __init__(self) -> None: super().__init__() self.lazy_load_list: list[EntryPoint] = [] def lazy_load(self, use_entrypoint_name: bool = False) -> None: for e in self.lazy_load_list: self.register(e.load(), qualname=e.name if use_entrypoint_name else None) self.lazy_load_list.clear() def register(self, cls: type[T], qualname: str | None = None) -> None: if qualname is None: qualname = fully_qualified_name(cls) self[qualname] = cls __codec_registries: dict[str, Registry[Codec]] = defaultdict(Registry) __pipeline_registry: Registry[CodecPipeline] = Registry() __buffer_registry: Registry[Buffer] = Registry() __ndbuffer_registry: Registry[NDBuffer] = Registry() __chunk_key_encoding_registry: Registry[ChunkKeyEncoding] = Registry() """ The registry module is responsible for managing implementations of codecs, pipelines, buffers, ndbuffers, and chunk key encodings and collecting them from entrypoints. The implementation used is determined by the config. The registry module is also responsible for managing dtypes. """ def _collect_entrypoints() -> list[Registry[Any]]: """ Collects codecs, pipelines, dtypes, buffers and ndbuffers from entrypoints. Entry points can either be single items or groups of items. Allowed syntax for entry_points.txt is e.g. [zarr.codecs] gzip = package:EntrypointGzipCodec1 [zarr.codecs.gzip] some_name = package:EntrypointGzipCodec2 another = package:EntrypointGzipCodec3 [zarr] buffer = package:TestBuffer1 [zarr.buffer] xyz = package:TestBuffer2 abc = package:TestBuffer3 ... """ entry_points = get_entry_points() __buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.buffer")) __buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="buffer")) __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.ndbuffer")) __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="ndbuffer")) data_type_registry._lazy_load_list.extend(entry_points.select(group="zarr.data_type")) data_type_registry._lazy_load_list.extend(entry_points.select(group="zarr", name="data_type")) __chunk_key_encoding_registry.lazy_load_list.extend( entry_points.select(group="zarr.chunk_key_encoding") ) __chunk_key_encoding_registry.lazy_load_list.extend( entry_points.select(group="zarr", name="chunk_key_encoding") ) __pipeline_registry.lazy_load_list.extend(entry_points.select(group="zarr.codec_pipeline")) __pipeline_registry.lazy_load_list.extend( entry_points.select(group="zarr", name="codec_pipeline") ) for e in entry_points.select(group="zarr.codecs"): __codec_registries[e.name].lazy_load_list.append(e) for group in entry_points.groups: if group.startswith("zarr.codecs."): codec_name = group.split(".")[2] __codec_registries[codec_name].lazy_load_list.extend(entry_points.select(group=group)) return [ *__codec_registries.values(), __pipeline_registry, __buffer_registry, __ndbuffer_registry, __chunk_key_encoding_registry, ] def _reload_config() -> None: config.refresh() def fully_qualified_name(cls: type) -> str: module = cls.__module__ return module + "." + cls.__qualname__ def register_codec(key: str, codec_cls: type[Codec], *, qualname: str | None = None) -> None: if key not in __codec_registries: __codec_registries[key] = Registry() __codec_registries[key].register(codec_cls, qualname=qualname) def register_pipeline(pipe_cls: type[CodecPipeline]) -> None: __pipeline_registry.register(pipe_cls) def register_ndbuffer(cls: type[NDBuffer], qualname: str | None = None) -> None: __ndbuffer_registry.register(cls, qualname) def register_buffer(cls: type[Buffer], qualname: str | None = None) -> None: __buffer_registry.register(cls, qualname) def register_chunk_key_encoding(key: str, cls: type) -> None: __chunk_key_encoding_registry.register(cls, key) def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]: if reload_config: _reload_config() if key in __codec_registries: # logger.debug("Auto loading codec '%s' from entrypoint", codec_id) __codec_registries[key].lazy_load() codec_classes = __codec_registries[key] if not codec_classes: raise KeyError(key) config_entry = config.get("codecs", {}).get(key) if config_entry is None: if len(codec_classes) == 1: return next(iter(codec_classes.values())) warnings.warn( f"Codec '{key}' not configured in config. Selecting any implementation.", stacklevel=2, category=ZarrUserWarning, ) return list(codec_classes.values())[-1] selected_codec_cls = codec_classes[config_entry] if selected_codec_cls: return selected_codec_cls raise KeyError(key) def _resolve_codec(data: dict[str, JSON]) -> Codec: """ Get a codec instance from a dict representation of that codec. """ # TODO: narrow the type of the input to only those dicts that map on to codec class instances. return get_codec_class(data["name"]).from_dict(data) # type: ignore[arg-type] def _parse_bytes_bytes_codec(data: dict[str, JSON] | Codec) -> BytesBytesCodec: """ Normalize the input to a ``BytesBytesCodec`` instance. If the input is already a ``BytesBytesCodec``, it is returned as is. If the input is a dict, it is converted to a ``BytesBytesCodec`` instance via the ``_resolve_codec`` function. """ from zarr.abc.codec import BytesBytesCodec if isinstance(data, dict): result = _resolve_codec(data) if not isinstance(result, BytesBytesCodec): msg = f"Expected a dict representation of a BytesBytesCodec; got a dict representation of a {type(result)} instead." raise TypeError(msg) else: if not isinstance(data, BytesBytesCodec): raise TypeError(f"Expected a BytesBytesCodec. Got {type(data)} instead.") result = data return result def _parse_array_bytes_codec(data: dict[str, JSON] | Codec) -> ArrayBytesCodec: """ Normalize the input to a ``ArrayBytesCodec`` instance. If the input is already a ``ArrayBytesCodec``, it is returned as is. If the input is a dict, it is converted to a ``ArrayBytesCodec`` instance via the ``_resolve_codec`` function. """ from zarr.abc.codec import ArrayBytesCodec if isinstance(data, dict): result = _resolve_codec(data) if not isinstance(result, ArrayBytesCodec): msg = f"Expected a dict representation of an ArrayBytesCodec; got a dict representation of a {type(result)} instead." raise TypeError(msg) else: if not isinstance(data, ArrayBytesCodec): raise TypeError(f"Expected an ArrayBytesCodec. Got {type(data)} instead.") result = data return result def _parse_array_array_codec(data: dict[str, JSON] | Codec) -> ArrayArrayCodec: """ Normalize the input to a ``ArrayArrayCodec`` instance. If the input is already a ``ArrayArrayCodec``, it is returned as is. If the input is a dict, it is converted to a ``ArrayArrayCodec`` instance via the ``_resolve_codec`` function. """ from zarr.abc.codec import ArrayArrayCodec if isinstance(data, dict): result = _resolve_codec(data) if not isinstance(result, ArrayArrayCodec): msg = f"Expected a dict representation of an ArrayArrayCodec; got a dict representation of a {type(result)} instead." raise TypeError(msg) else: if not isinstance(data, ArrayArrayCodec): raise TypeError(f"Expected an ArrayArrayCodec. Got {type(data)} instead.") result = data return result def get_pipeline_class(reload_config: bool = False) -> type[CodecPipeline]: if reload_config: _reload_config() __pipeline_registry.lazy_load() path = config.get("codec_pipeline.path") pipeline_class = __pipeline_registry.get(path) if pipeline_class: return pipeline_class raise BadConfigError( f"Pipeline class '{path}' not found in registered pipelines: {list(__pipeline_registry)}." ) def get_buffer_class(reload_config: bool = False) -> type[Buffer]: if reload_config: _reload_config() __buffer_registry.lazy_load() path = config.get("buffer") buffer_class = __buffer_registry.get(path) if buffer_class: return buffer_class raise BadConfigError( f"Buffer class '{path}' not found in registered buffers: {list(__buffer_registry)}." ) def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]: if reload_config: _reload_config() __ndbuffer_registry.lazy_load() path = config.get("ndbuffer") ndbuffer_class = __ndbuffer_registry.get(path) if ndbuffer_class: return ndbuffer_class raise BadConfigError( f"NDBuffer class '{path}' not found in registered buffers: {list(__ndbuffer_registry)}." ) def get_chunk_key_encoding_class(key: str) -> type[ChunkKeyEncoding]: __chunk_key_encoding_registry.lazy_load(use_entrypoint_name=True) if key not in __chunk_key_encoding_registry: raise KeyError( f"Chunk key encoding '{key}' not found in registered chunk key encodings: {list(__chunk_key_encoding_registry)}." ) return __chunk_key_encoding_registry[key] _collect_entrypoints() def get_numcodec(data: CodecJSON_V2[str]) -> Numcodec: """ Resolve a numcodec codec from the numcodecs registry. This requires the Numcodecs package to be installed. Parameters ---------- data : CodecJSON_V2 The JSON metadata for the codec. Returns ------- codec : Numcodec Examples -------- ```python from zarr.registry import get_numcodec codec = get_numcodec({'id': 'zlib', 'level': 1}) codec # Zlib(level=1) ``` """ from numcodecs.registry import get_codec return get_codec(data) # type: ignore[no-any-return] zarr-python-3.1.5/src/zarr/storage/000077500000000000000000000000001511007055700172275ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/storage/__init__.py000066400000000000000000000024631511007055700213450ustar00rootroot00000000000000import sys import warnings from types import ModuleType from typing import Any from zarr.errors import ZarrDeprecationWarning from zarr.storage._common import StoreLike, StorePath from zarr.storage._fsspec import FsspecStore from zarr.storage._local import LocalStore from zarr.storage._logging import LoggingStore from zarr.storage._memory import GpuMemoryStore, MemoryStore from zarr.storage._obstore import ObjectStore from zarr.storage._wrapper import WrapperStore from zarr.storage._zip import ZipStore __all__ = [ "FsspecStore", "GpuMemoryStore", "LocalStore", "LoggingStore", "MemoryStore", "ObjectStore", "StoreLike", "StorePath", "WrapperStore", "ZipStore", ] class VerboseModule(ModuleType): def __setattr__(self, attr: str, value: Any) -> None: if attr == "default_compressor": warnings.warn( "setting zarr.storage.default_compressor is deprecated, use " "zarr.config to configure array.v2_default_compressor " "e.g. config.set({'codecs.zstd':'numcodecs.Zstd', 'array.v2_default_compressor.numeric': 'zstd'})", ZarrDeprecationWarning, stacklevel=1, ) else: super().__setattr__(attr, value) sys.modules[__name__].__class__ = VerboseModule zarr-python-3.1.5/src/zarr/storage/_common.py000066400000000000000000000511161511007055700212340ustar00rootroot00000000000000from __future__ import annotations import importlib.util import json from pathlib import Path from typing import TYPE_CHECKING, Any, Literal, Self, TypeAlias from zarr.abc.store import ByteRequest, Store from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.common import ( ANY_ACCESS_MODE, ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, AccessModeLiteral, ZarrFormat, ) from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError from zarr.storage._local import LocalStore from zarr.storage._memory import MemoryStore from zarr.storage._utils import normalize_path _has_fsspec = importlib.util.find_spec("fsspec") if _has_fsspec: from fsspec.mapping import FSMap else: FSMap = None if TYPE_CHECKING: from zarr.core.buffer import BufferPrototype def _dereference_path(root: str, path: str) -> str: if not isinstance(root, str): msg = f"{root=} is not a string ({type(root)=})" # type: ignore[unreachable] raise TypeError(msg) if not isinstance(path, str): msg = f"{path=} is not a string ({type(path)=})" # type: ignore[unreachable] raise TypeError(msg) root = root.rstrip("/") path = f"{root}/{path}" if root else path return path.rstrip("/") class StorePath: """ Path-like interface for a Store. Parameters ---------- store : Store The store to use. path : str The path within the store. """ store: Store path: str def __init__(self, store: Store, path: str = "") -> None: self.store = store self.path = normalize_path(path) @property def read_only(self) -> bool: return self.store.read_only @classmethod async def _create_open_instance(cls, store: Store, path: str) -> Self: """Helper to create and return a StorePath instance.""" await store._ensure_open() return cls(store, path) @classmethod async def open(cls, store: Store, path: str, mode: AccessModeLiteral | None = None) -> Self: """ Open StorePath based on the provided mode. * If the mode is None, return an opened version of the store with no changes. * If the mode is 'r+', 'w-', 'w', or 'a' and the store is read-only, raise a ValueError. * If the mode is 'r' and the store is not read-only, return a copy of the store with read_only set to True. * If the mode is 'w-' and the store is not read-only and the StorePath contains keys, raise a FileExistsError. * If the mode is 'w' and the store is not read-only, delete all keys nested within the StorePath. Parameters ---------- mode : AccessModeLiteral The mode to use when initializing the store path. The accepted values are: - ``'r'``: read only (must exist) - ``'r+'``: read/write (must exist) - ``'a'``: read/write (create if doesn't exist) - ``'w'``: read/write (overwrite if exists) - ``'w-'``: read/write (create if doesn't exist). Raises ------ FileExistsError If the mode is 'w-' and the store path already exists. ValueError If the mode is not "r" and the store is read-only, or """ # fastpath if mode is None if mode is None: return await cls._create_open_instance(store, path) if mode not in ANY_ACCESS_MODE: raise ValueError(f"Invalid mode: {mode}, expected one of {ANY_ACCESS_MODE}") if store.read_only: # Don't allow write operations on a read-only store if mode != "r": raise ValueError( f"Store is read-only but mode is {mode!r}. Create a writable store or use 'r' mode." ) self = await cls._create_open_instance(store, path) elif mode == "r": # Create read-only copy for read mode on writable store try: read_only_store = store.with_read_only(True) except NotImplementedError as e: raise ValueError( "Store is not read-only but mode is 'r'. Unable to create a read-only copy of the store. " "Please use a read-only store or a storage class that implements .with_read_only()." ) from e self = await cls._create_open_instance(read_only_store, path) else: # writable store and writable mode self = await cls._create_open_instance(store, path) # Handle mode-specific operations match mode: case "w-": if not await self.is_empty(): raise FileExistsError( f"Cannot create '{path}' with mode 'w-' because it already contains data. " f"Use mode 'w' to overwrite or 'a' to append." ) case "w": await self.delete_dir() return self async def get( self, prototype: BufferPrototype | None = None, byte_range: ByteRequest | None = None, ) -> Buffer | None: """ Read bytes from the store. Parameters ---------- prototype : BufferPrototype, optional The buffer prototype to use when reading the bytes. byte_range : ByteRequest, optional The range of bytes to read. Returns ------- buffer : Buffer or None The read bytes, or None if the key does not exist. """ if prototype is None: prototype = default_buffer_prototype() return await self.store.get(self.path, prototype=prototype, byte_range=byte_range) async def set(self, value: Buffer) -> None: """ Write bytes to the store. Parameters ---------- value : Buffer The buffer to write. """ await self.store.set(self.path, value) async def delete(self) -> None: """ Delete the key from the store. Raises ------ NotImplementedError If the store does not support deletion. """ await self.store.delete(self.path) async def delete_dir(self) -> None: """ Delete all keys with the given prefix from the store. """ await self.store.delete_dir(self.path) async def set_if_not_exists(self, default: Buffer) -> None: """ Store a key to ``value`` if the key is not already present. Parameters ---------- default : Buffer The buffer to store if the key is not already present. """ await self.store.set_if_not_exists(self.path, default) async def exists(self) -> bool: """ Check if the key exists in the store. Returns ------- bool True if the key exists in the store, False otherwise. """ return await self.store.exists(self.path) async def is_empty(self) -> bool: """ Check if any keys exist in the store with the given prefix. Returns ------- bool True if no keys exist in the store with the given prefix, False otherwise. """ return await self.store.is_empty(self.path) def __truediv__(self, other: str) -> StorePath: """Combine this store path with another path""" return self.__class__(self.store, _dereference_path(self.path, other)) def __str__(self) -> str: return _dereference_path(str(self.store), self.path) def __repr__(self) -> str: return f"StorePath({self.store.__class__.__name__}, '{self}')" def __eq__(self, other: object) -> bool: """ Check if two StorePath objects are equal. Returns ------- bool True if the two objects are equal, False otherwise. Notes ----- Two StorePath objects are considered equal if their stores are equal and their paths are equal. """ try: return self.store == other.store and self.path == other.path # type: ignore[attr-defined, no-any-return] except Exception: pass return False StoreLike: TypeAlias = Store | StorePath | FSMap | Path | str | dict[str, Buffer] async def make_store( store_like: StoreLike | None, *, mode: AccessModeLiteral | None = None, storage_options: dict[str, Any] | None = None, ) -> Store: """ Convert a `StoreLike` object into a Store object. `StoreLike` objects are converted to `Store` as follows: - `Store` or `StorePath` = `Store` object. - `Path` or `str` = `LocalStore` object. - `str` that starts with a protocol = `FsspecStore` object. - `dict[str, Buffer]` = `MemoryStore` object. - `None` = `MemoryStore` object. - `FSMap` = `FsspecStore` object. Parameters ---------- store_like : StoreLike | None The `StoreLike` object to convert to a `Store` object. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. mode : StoreAccessMode | None, optional The mode to use when creating the `Store` object. If None, the default mode is 'r'. storage_options : dict[str, Any] | None, optional The storage options to use when creating the `RemoteStore` object. If None, the default storage options are used. Returns ------- Store The converted Store object. Raises ------ TypeError If the StoreLike object is not one of the supported types, or if storage_options is provided but not used. """ from zarr.storage._fsspec import FsspecStore # circular import if ( not (isinstance(store_like, str) and _is_fsspec_uri(store_like)) and storage_options is not None ): raise TypeError( "'storage_options' was provided but unused. " "'storage_options' is only used when the store is passed as a FSSpec URI string.", ) assert mode in (None, "r", "r+", "a", "w", "w-") _read_only = mode == "r" if isinstance(store_like, StorePath): # Get underlying store return store_like.store elif isinstance(store_like, Store): # Already a Store return store_like elif isinstance(store_like, dict): # Already a dictionary that can be a MemoryStore # # We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings. # By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate. return await MemoryStore.open(store_dict=store_like, read_only=_read_only) elif store_like is None: # Create a new in-memory store return await make_store({}, mode=mode, storage_options=storage_options) elif isinstance(store_like, Path): # Create a new LocalStore return await LocalStore.open(root=store_like, mode=mode, read_only=_read_only) elif isinstance(store_like, str): # Either a FSSpec URI or a local filesystem path if _is_fsspec_uri(store_like): return FsspecStore.from_url( store_like, storage_options=storage_options, read_only=_read_only ) else: # Assume a filesystem path return await make_store(Path(store_like), mode=mode, storage_options=storage_options) elif _has_fsspec and isinstance(store_like, FSMap): return FsspecStore.from_mapper(store_like, read_only=_read_only) else: raise TypeError(f"Unsupported type for store_like: '{type(store_like).__name__}'") async def make_store_path( store_like: StoreLike | None, *, path: str | None = "", mode: AccessModeLiteral | None = None, storage_options: dict[str, Any] | None = None, ) -> StorePath: """ Convert a `StoreLike` object into a StorePath object. This function takes a `StoreLike` object and returns a `StorePath` object. See `make_store` for details of which `Store` is used for each type of `store_like` object. Parameters ---------- store_like : StoreLike or None, default=None The `StoreLike` object to convert to a `StorePath` object. See the [storage documentation in the user guide][user-guide-store-like] for a description of all valid StoreLike values. path : str | None, optional The path to use when creating the `StorePath` object. If None, the default path is the empty string. mode : StoreAccessMode | None, optional The mode to use when creating the `StorePath` object. If None, the default mode is 'r'. storage_options : dict[str, Any] | None, optional The storage options to use when creating the `RemoteStore` object. If None, the default storage options are used. Returns ------- StorePath The converted StorePath object. Raises ------ TypeError If the StoreLike object is not one of the supported types, or if storage_options is provided but not used. ValueError If path is provided for a store that does not support it. See Also -------- make_store """ path_normalized = normalize_path(path) if isinstance(store_like, StorePath): # Already a StorePath if storage_options: raise TypeError( "'storage_options' was provided but unused. " "'storage_options' is only used when the store is passed as a FSSpec URI string.", ) return store_like / path_normalized elif _has_fsspec and isinstance(store_like, FSMap) and path: raise ValueError( "'path' was provided but is not used for FSMap store_like objects. Specify the path when creating the FSMap instance instead." ) else: store = await make_store(store_like, mode=mode, storage_options=storage_options) return await StorePath.open(store, path=path_normalized, mode=mode) def _is_fsspec_uri(uri: str) -> bool: """ Check if a URI looks like a non-local fsspec URI. Examples -------- ```python from zarr.storage._common import _is_fsspec_uri _is_fsspec_uri("s3://bucket") # True _is_fsspec_uri("my-directory") # False _is_fsspec_uri("local://my-directory") # False ``` """ return "://" in uri or ("::" in uri and "local://" not in uri) async def ensure_no_existing_node( store_path: StorePath, zarr_format: ZarrFormat, node_type: Literal["array", "group"] | None = None, ) -> None: """ Check if a store_path is safe for array / group creation. Returns `None` or raises an exception. Parameters ---------- store_path : StorePath The storage location to check. zarr_format : ZarrFormat The Zarr format to check. node_type : str | None, optional Raise an error if an "array", or "group" exists. By default (when None), raises an error for either. Raises ------ ContainsArrayError, ContainsGroupError, ContainsArrayAndGroupError """ if zarr_format == 2: extant_node = await _contains_node_v2(store_path) elif zarr_format == 3: extant_node = await _contains_node_v3(store_path) match extant_node: case "array": if node_type != "group": msg = f"An array exists in store {store_path.store!r} at path {store_path.path!r}." raise ContainsArrayError(msg) case "group": if node_type != "array": msg = f"A group exists in store {store_path.store!r} at path {store_path.path!r}." raise ContainsGroupError(msg) case "nothing": return case _: msg = f"Invalid value for extant_node: {extant_node}" # type: ignore[unreachable] raise ValueError(msg) async def _contains_node_v3(store_path: StorePath) -> Literal["array", "group", "nothing"]: """ Check if a store_path contains nothing, an array, or a group. This function returns the string "array", "group", or "nothing" to denote containing an array, a group, or nothing. Parameters ---------- store_path : StorePath The location in storage to check. Returns ------- Literal["array", "group", "nothing"] A string representing the zarr node found at store_path. """ result: Literal["array", "group", "nothing"] = "nothing" extant_meta_bytes = await (store_path / ZARR_JSON).get() # if no metadata document could be loaded, then we just return "nothing" if extant_meta_bytes is not None: try: extant_meta_json = json.loads(extant_meta_bytes.to_bytes()) # avoid constructing a full metadata document here in the name of speed. if extant_meta_json["node_type"] == "array": result = "array" elif extant_meta_json["node_type"] == "group": result = "group" except (KeyError, json.JSONDecodeError): # either of these errors is consistent with no array or group present. pass return result async def _contains_node_v2(store_path: StorePath) -> Literal["array", "group", "nothing"]: """ Check if a store_path contains nothing, an array, a group, or both. If both an array and a group are detected, a `ContainsArrayAndGroup` exception is raised. Otherwise, this function returns the string "array", "group", or "nothing" to denote containing an array, a group, or nothing. Parameters ---------- store_path : StorePath The location in storage to check. Returns ------- Literal["array", "group", "nothing"] A string representing the zarr node found at store_path. """ _array = await contains_array(store_path=store_path, zarr_format=2) _group = await contains_group(store_path=store_path, zarr_format=2) if _array and _group: msg = ( "Array and group metadata documents (.zarray and .zgroup) were both found in store " f"{store_path.store!r} at path {store_path.path!r}. " "Only one of these files may be present in a given directory / prefix. " "Remove the .zarray file, or the .zgroup file, or both." ) raise ContainsArrayAndGroupError(msg) elif _array: return "array" elif _group: return "group" else: return "nothing" async def contains_array(store_path: StorePath, zarr_format: ZarrFormat) -> bool: """ Check if an array exists at a given StorePath. Parameters ---------- store_path : StorePath The StorePath to check for an existing group. zarr_format : The zarr format to check for. Returns ------- bool True if the StorePath contains a group, False otherwise. """ if zarr_format == 3: extant_meta_bytes = await (store_path / ZARR_JSON).get() if extant_meta_bytes is None: return False else: try: extant_meta_json = json.loads(extant_meta_bytes.to_bytes()) # we avoid constructing a full metadata document here in the name of speed. if extant_meta_json["node_type"] == "array": return True except (ValueError, KeyError): return False elif zarr_format == 2: return await (store_path / ZARRAY_JSON).exists() msg = f"Invalid zarr_format provided. Got {zarr_format}, expected 2 or 3" raise ValueError(msg) async def contains_group(store_path: StorePath, zarr_format: ZarrFormat) -> bool: """ Check if a group exists at a given StorePath. Parameters ---------- store_path : StorePath The StorePath to check for an existing group. zarr_format : The zarr format to check for. Returns ------- bool True if the StorePath contains a group, False otherwise """ if zarr_format == 3: extant_meta_bytes = await (store_path / ZARR_JSON).get() if extant_meta_bytes is None: return False else: try: extant_meta_json = json.loads(extant_meta_bytes.to_bytes()) # we avoid constructing a full metadata document here in the name of speed. result: bool = extant_meta_json["node_type"] == "group" except (ValueError, KeyError): return False else: return result elif zarr_format == 2: return await (store_path / ZGROUP_JSON).exists() msg = f"Invalid zarr_format provided. Got {zarr_format}, expected 2 or 3" # type: ignore[unreachable] raise ValueError(msg) zarr-python-3.1.5/src/zarr/storage/_fsspec.py000066400000000000000000000345531511007055700212350ustar00rootroot00000000000000from __future__ import annotations import json import warnings from contextlib import suppress from typing import TYPE_CHECKING, Any from packaging.version import parse as parse_version from zarr.abc.store import ( ByteRequest, OffsetByteRequest, RangeByteRequest, Store, SuffixByteRequest, ) from zarr.core.buffer import Buffer from zarr.errors import ZarrUserWarning from zarr.storage._common import _dereference_path if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterable from fsspec import AbstractFileSystem from fsspec.asyn import AsyncFileSystem from fsspec.mapping import FSMap from zarr.core.buffer import BufferPrototype ALLOWED_EXCEPTIONS: tuple[type[Exception], ...] = ( FileNotFoundError, IsADirectoryError, NotADirectoryError, ) def _make_async(fs: AbstractFileSystem) -> AsyncFileSystem: """Convert a sync FSSpec filesystem to an async FFSpec filesystem If the filesystem class supports async operations, a new async instance is created from the existing instance. If the filesystem class does not support async operations, the existing instance is wrapped with AsyncFileSystemWrapper. """ import fsspec fsspec_version = parse_version(fsspec.__version__) if fs.async_impl and fs.asynchronous: # Already an async instance of an async filesystem, nothing to do return fs if fs.async_impl: # Convert sync instance of an async fs to an async instance fs_dict = json.loads(fs.to_json()) fs_dict["asynchronous"] = True return fsspec.AbstractFileSystem.from_json(json.dumps(fs_dict)) if fsspec_version < parse_version("2024.12.0"): raise ImportError( f"The filesystem '{fs}' is synchronous, and the required " "AsyncFileSystemWrapper is not available. Upgrade fsspec to version " "2024.12.0 or later to enable this functionality." ) from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper return AsyncFileSystemWrapper(fs, asynchronous=True) class FsspecStore(Store): """ Store for remote data based on FSSpec. Parameters ---------- fs : AsyncFileSystem The Async FSSpec filesystem to use with this store. read_only : bool Whether the store is read-only path : str The root path of the store. This should be a relative path and must not include the filesystem scheme. allowed_exceptions : tuple[type[Exception], ...] When fetching data, these cases will be deemed to correspond to missing keys. Attributes ---------- fs allowed_exceptions supports_writes supports_deletes supports_listing Raises ------ TypeError If the Filesystem does not support async operations. ValueError If the path argument includes a scheme. Warns ----- ZarrUserWarning If the file system (fs) was not created with `asynchronous=True`. See Also -------- FsspecStore.from_upath FsspecStore.from_url """ # based on FSSpec supports_writes: bool = True supports_deletes: bool = True supports_listing: bool = True fs: AsyncFileSystem allowed_exceptions: tuple[type[Exception], ...] path: str def __init__( self, fs: AsyncFileSystem, read_only: bool = False, path: str = "/", allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> None: super().__init__(read_only=read_only) self.fs = fs self.path = path self.allowed_exceptions = allowed_exceptions if not self.fs.async_impl: raise TypeError("Filesystem needs to support async operations.") if not self.fs.asynchronous: warnings.warn( f"fs ({fs}) was not created with `asynchronous=True`, this may lead to surprising behavior", category=ZarrUserWarning, stacklevel=2, ) @classmethod def from_upath( cls, upath: Any, read_only: bool = False, allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> FsspecStore: """ Create a FsspecStore from an upath object. Parameters ---------- upath : UPath The upath to the root of the store. read_only : bool Whether the store is read-only, defaults to False. allowed_exceptions : tuple, optional The exceptions that are allowed to be raised when accessing the store. Defaults to ALLOWED_EXCEPTIONS. Returns ------- FsspecStore """ return cls( fs=upath.fs, path=upath.path.rstrip("/"), read_only=read_only, allowed_exceptions=allowed_exceptions, ) @classmethod def from_mapper( cls, fs_map: FSMap, read_only: bool = False, allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> FsspecStore: """ Create a FsspecStore from a FSMap object. Parameters ---------- fs_map : FSMap Fsspec mutable mapping object. read_only : bool Whether the store is read-only, defaults to False. allowed_exceptions : tuple, optional The exceptions that are allowed to be raised when accessing the store. Defaults to ALLOWED_EXCEPTIONS. Returns ------- FsspecStore """ fs = _make_async(fs_map.fs) return cls( fs=fs, path=fs_map.root, read_only=read_only, allowed_exceptions=allowed_exceptions, ) @classmethod def from_url( cls, url: str, storage_options: dict[str, Any] | None = None, read_only: bool = False, allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> FsspecStore: """ Create a FsspecStore from a URL. The type of store is determined from the URL scheme. Parameters ---------- url : str The URL to the root of the store. storage_options : dict, optional The options to pass to fsspec when creating the filesystem. read_only : bool Whether the store is read-only, defaults to False. allowed_exceptions : tuple, optional The exceptions that are allowed to be raised when accessing the store. Defaults to ALLOWED_EXCEPTIONS. Returns ------- FsspecStore """ try: from fsspec import url_to_fs except ImportError: # before fsspec==2024.3.1 from fsspec.core import url_to_fs opts = storage_options or {} opts = {"asynchronous": True, **opts} fs, path = url_to_fs(url, **opts) if not fs.async_impl: fs = _make_async(fs) return cls(fs=fs, path=path, read_only=read_only, allowed_exceptions=allowed_exceptions) def with_read_only(self, read_only: bool = False) -> FsspecStore: # docstring inherited return type(self)( fs=self.fs, path=self.path, allowed_exceptions=self.allowed_exceptions, read_only=read_only, ) async def clear(self) -> None: # docstring inherited try: for subpath in await self.fs._find(self.path, withdirs=True): if subpath != self.path: await self.fs._rm(subpath, recursive=True) except FileNotFoundError: pass def __repr__(self) -> str: return f"" def __eq__(self, other: object) -> bool: return ( isinstance(other, type(self)) and self.path == other.path and self.read_only == other.read_only and self.fs == other.fs ) async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None, ) -> Buffer | None: # docstring inherited if not self._is_open: await self._open() path = _dereference_path(self.path, key) try: if byte_range is None: value = prototype.buffer.from_bytes(await self.fs._cat_file(path)) elif isinstance(byte_range, RangeByteRequest): value = prototype.buffer.from_bytes( await self.fs._cat_file( path, start=byte_range.start, end=byte_range.end, ) ) elif isinstance(byte_range, OffsetByteRequest): value = prototype.buffer.from_bytes( await self.fs._cat_file(path, start=byte_range.offset, end=None) ) elif isinstance(byte_range, SuffixByteRequest): value = prototype.buffer.from_bytes( await self.fs._cat_file(path, start=-byte_range.suffix, end=None) ) else: raise ValueError(f"Unexpected byte_range, got {byte_range}.") except self.allowed_exceptions: return None except OSError as e: if "not satisfiable" in str(e): # this is an s3-specific condition we probably don't want to leak return prototype.buffer.from_bytes(b"") raise else: return value async def set( self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None, ) -> None: # docstring inherited if not self._is_open: await self._open() self._check_writable() if not isinstance(value, Buffer): raise TypeError( f"FsspecStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead." ) path = _dereference_path(self.path, key) # write data if byte_range: raise NotImplementedError await self.fs._pipe_file(path, value.to_bytes()) async def delete(self, key: str) -> None: # docstring inherited self._check_writable() path = _dereference_path(self.path, key) try: await self.fs._rm(path) except FileNotFoundError: pass except self.allowed_exceptions: pass async def delete_dir(self, prefix: str) -> None: # docstring inherited if not self.supports_deletes: raise NotImplementedError( "This method is only available for stores that support deletes." ) self._check_writable() path_to_delete = _dereference_path(self.path, prefix) with suppress(*self.allowed_exceptions): await self.fs._rm(path_to_delete, recursive=True) async def exists(self, key: str) -> bool: # docstring inherited path = _dereference_path(self.path, key) exists: bool = await self.fs._exists(path) return exists async def get_partial_values( self, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: # docstring inherited if key_ranges: # _cat_ranges expects a list of paths, start, and end ranges, so we need to reformat each ByteRequest. key_ranges = list(key_ranges) paths: list[str] = [] starts: list[int | None] = [] stops: list[int | None] = [] for key, byte_range in key_ranges: paths.append(_dereference_path(self.path, key)) if byte_range is None: starts.append(None) stops.append(None) elif isinstance(byte_range, RangeByteRequest): starts.append(byte_range.start) stops.append(byte_range.end) elif isinstance(byte_range, OffsetByteRequest): starts.append(byte_range.offset) stops.append(None) elif isinstance(byte_range, SuffixByteRequest): starts.append(-byte_range.suffix) stops.append(None) else: raise ValueError(f"Unexpected byte_range, got {byte_range}.") else: return [] # TODO: expectations for exceptions or missing keys? res = await self.fs._cat_ranges(paths, starts, stops, on_error="return") # the following is an s3-specific condition we probably don't want to leak res = [b"" if (isinstance(r, OSError) and "not satisfiable" in str(r)) else r for r in res] for r in res: if isinstance(r, Exception) and not isinstance(r, self.allowed_exceptions): raise r return [None if isinstance(r, Exception) else prototype.buffer.from_bytes(r) for r in res] async def list(self) -> AsyncIterator[str]: # docstring inherited allfiles = await self.fs._find(self.path, detail=False, withdirs=False) for onefile in (a.removeprefix(self.path + "/") for a in allfiles): yield onefile async def list_dir(self, prefix: str) -> AsyncIterator[str]: # docstring inherited prefix = f"{self.path}/{prefix.rstrip('/')}" try: allfiles = await self.fs._ls(prefix, detail=False) except FileNotFoundError: return for onefile in (a.replace(prefix + "/", "") for a in allfiles): yield onefile.removeprefix(self.path).removeprefix("/") async def list_prefix(self, prefix: str) -> AsyncIterator[str]: # docstring inherited for onefile in await self.fs._find( f"{self.path}/{prefix}", detail=False, maxdepth=None, withdirs=False ): yield onefile.removeprefix(f"{self.path}/") async def getsize(self, key: str) -> int: path = _dereference_path(self.path, key) info = await self.fs._info(path) size = info.get("size") if size is None: # Not all filesystems support size. Fall back to reading the entire object return await super().getsize(key) else: # fsspec doesn't have typing. We'll need to assume or verify this is true return int(size) zarr-python-3.1.5/src/zarr/storage/_local.py000066400000000000000000000241521511007055700210360ustar00rootroot00000000000000from __future__ import annotations import asyncio import contextlib import io import os import shutil import sys import uuid from pathlib import Path from typing import TYPE_CHECKING, BinaryIO, Literal, Self from zarr.abc.store import ( ByteRequest, OffsetByteRequest, RangeByteRequest, Store, SuffixByteRequest, ) from zarr.core.buffer import Buffer from zarr.core.buffer.core import default_buffer_prototype from zarr.core.common import AccessModeLiteral, concurrent_map if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterable, Iterator from zarr.core.buffer import BufferPrototype def _get(path: Path, prototype: BufferPrototype, byte_range: ByteRequest | None) -> Buffer: if byte_range is None: return prototype.buffer.from_bytes(path.read_bytes()) with path.open("rb") as f: size = f.seek(0, io.SEEK_END) if isinstance(byte_range, RangeByteRequest): f.seek(byte_range.start) return prototype.buffer.from_bytes(f.read(byte_range.end - f.tell())) elif isinstance(byte_range, OffsetByteRequest): f.seek(byte_range.offset) elif isinstance(byte_range, SuffixByteRequest): f.seek(max(0, size - byte_range.suffix)) else: raise TypeError(f"Unexpected byte_range, got {byte_range}.") return prototype.buffer.from_bytes(f.read()) if sys.platform == "win32": # Per the os.rename docs: # On Windows, if dst exists a FileExistsError is always raised. _safe_move = os.rename else: # On Unix, os.rename silently replace files, so instead we use os.link like # atomicwrites: # https://github.com/untitaker/python-atomicwrites/blob/1.4.1/atomicwrites/__init__.py#L59-L60 # This also raises FileExistsError if dst exists. def _safe_move(src: Path, dst: Path) -> None: os.link(src, dst) os.unlink(src) @contextlib.contextmanager def _atomic_write( path: Path, mode: Literal["r+b", "wb"], exclusive: bool = False, ) -> Iterator[BinaryIO]: tmp_path = path.with_suffix(f".{uuid.uuid4().hex}.partial") try: with tmp_path.open(mode) as f: yield f if exclusive: _safe_move(tmp_path, path) else: tmp_path.replace(path) except Exception: tmp_path.unlink(missing_ok=True) raise def _put(path: Path, value: Buffer, exclusive: bool = False) -> int: path.parent.mkdir(parents=True, exist_ok=True) # write takes any object supporting the buffer protocol view = value.as_buffer_like() with _atomic_write(path, "wb", exclusive=exclusive) as f: return f.write(view) class LocalStore(Store): """ Store for the local file system. Parameters ---------- root : str or Path Directory to use as root of store. read_only : bool Whether the store is read-only Attributes ---------- supports_writes supports_deletes supports_listing root """ supports_writes: bool = True supports_deletes: bool = True supports_listing: bool = True root: Path def __init__(self, root: Path | str, *, read_only: bool = False) -> None: super().__init__(read_only=read_only) if isinstance(root, str): root = Path(root) if not isinstance(root, Path): raise TypeError( f"'root' must be a string or Path instance. Got an instance of {type(root)} instead." ) self.root = root def with_read_only(self, read_only: bool = False) -> Self: # docstring inherited return type(self)( root=self.root, read_only=read_only, ) @classmethod async def open( cls, root: Path | str, *, read_only: bool = False, mode: AccessModeLiteral | None = None ) -> Self: """ Create and open the store. Parameters ---------- root : str or Path Directory to use as root of store. read_only : bool Whether the store is read-only mode : Mode in which to create the store. This only affects opening the store, and the final read-only state of the store is controlled through the read_only parameter. Returns ------- Store The opened store instance. """ # If mode = 'r+', want to open in read only mode (fail if exists), # but return a writeable store if mode is not None: read_only_creation = mode in ["r", "r+"] else: read_only_creation = read_only store = cls(root, read_only=read_only_creation) await store._open() # Set read_only state store = store.with_read_only(read_only) await store._open() return store async def _open(self, *, mode: AccessModeLiteral | None = None) -> None: if not self.read_only: self.root.mkdir(parents=True, exist_ok=True) if not self.root.exists(): raise FileNotFoundError(f"{self.root} does not exist") return await super()._open() async def clear(self) -> None: # docstring inherited self._check_writable() shutil.rmtree(self.root) self.root.mkdir() def __str__(self) -> str: return f"file://{self.root.as_posix()}" def __repr__(self) -> str: return f"LocalStore('{self}')" def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) and self.root == other.root async def get( self, key: str, prototype: BufferPrototype | None = None, byte_range: ByteRequest | None = None, ) -> Buffer | None: # docstring inherited if prototype is None: prototype = default_buffer_prototype() if not self._is_open: await self._open() assert isinstance(key, str) path = self.root / key try: return await asyncio.to_thread(_get, path, prototype, byte_range) except (FileNotFoundError, IsADirectoryError, NotADirectoryError): return None async def get_partial_values( self, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: # docstring inherited args = [] for key, byte_range in key_ranges: assert isinstance(key, str) path = self.root / key args.append((_get, path, prototype, byte_range)) return await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit async def set(self, key: str, value: Buffer) -> None: # docstring inherited return await self._set(key, value) async def set_if_not_exists(self, key: str, value: Buffer) -> None: # docstring inherited try: return await self._set(key, value, exclusive=True) except FileExistsError: pass async def _set(self, key: str, value: Buffer, exclusive: bool = False) -> None: if not self._is_open: await self._open() self._check_writable() assert isinstance(key, str) if not isinstance(value, Buffer): raise TypeError( f"LocalStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead." ) path = self.root / key await asyncio.to_thread(_put, path, value, exclusive=exclusive) async def delete(self, key: str) -> None: """ Remove a key from the store. Parameters ---------- key : str Notes ----- If ``key`` is a directory within this store, the entire directory at ``store.root / key`` is deleted. """ # docstring inherited self._check_writable() path = self.root / key if path.is_dir(): # TODO: support deleting directories? shutil.rmtree? shutil.rmtree(path) else: await asyncio.to_thread(path.unlink, True) # Q: we may want to raise if path is missing async def delete_dir(self, prefix: str) -> None: # docstring inherited self._check_writable() path = self.root / prefix if path.is_dir(): shutil.rmtree(path) elif path.is_file(): raise ValueError(f"delete_dir was passed a {prefix=!r} that is a file") else: # Non-existent directory # This path is tested by test_group:test_create_creates_parents for one pass async def exists(self, key: str) -> bool: # docstring inherited path = self.root / key return await asyncio.to_thread(path.is_file) async def list(self) -> AsyncIterator[str]: # docstring inherited to_strip = self.root.as_posix() + "/" for p in list(self.root.rglob("*")): if p.is_file(): yield p.as_posix().replace(to_strip, "") async def list_prefix(self, prefix: str) -> AsyncIterator[str]: # docstring inherited to_strip = self.root.as_posix() + "/" prefix = prefix.rstrip("/") for p in (self.root / prefix).rglob("*"): if p.is_file(): yield p.as_posix().replace(to_strip, "") async def list_dir(self, prefix: str) -> AsyncIterator[str]: # docstring inherited base = self.root / prefix try: key_iter = base.iterdir() for key in key_iter: yield key.relative_to(base).as_posix() except (FileNotFoundError, NotADirectoryError): pass async def move(self, dest_root: Path | str) -> None: """ Move the store to another path. The old root directory is deleted. """ if isinstance(dest_root, str): dest_root = Path(dest_root) os.makedirs(dest_root.parent, exist_ok=True) if os.path.exists(dest_root): raise FileExistsError(f"Destination root {dest_root} already exists.") shutil.move(self.root, dest_root) self.root = dest_root async def getsize(self, key: str) -> int: return os.path.getsize(self.root / key) zarr-python-3.1.5/src/zarr/storage/_logging.py000066400000000000000000000163471511007055700214010ustar00rootroot00000000000000from __future__ import annotations import inspect import logging import sys import time from collections import defaultdict from contextlib import contextmanager from typing import TYPE_CHECKING, Any, Self, TypeVar from zarr.abc.store import Store from zarr.storage._wrapper import WrapperStore if TYPE_CHECKING: from collections.abc import AsyncGenerator, Generator, Iterable from zarr.abc.store import ByteRequest from zarr.core.buffer import Buffer, BufferPrototype counter: defaultdict[str, int] T_Store = TypeVar("T_Store", bound=Store) class LoggingStore(WrapperStore[T_Store]): """ Store that logs all calls to another wrapped store. Parameters ---------- store : Store Store to wrap log_level : str Log level log_handler : logging.Handler Log handler Attributes ---------- counter : dict Counter of number of times each method has been called """ counter: defaultdict[str, int] def __init__( self, store: T_Store, log_level: str = "DEBUG", log_handler: logging.Handler | None = None, ) -> None: super().__init__(store) self.counter = defaultdict(int) self.log_level = log_level self.log_handler = log_handler self._configure_logger(log_level, log_handler) def _configure_logger( self, log_level: str = "DEBUG", log_handler: logging.Handler | None = None ) -> None: self.log_level = log_level self.logger = logging.getLogger(f"LoggingStore({self._store})") self.logger.setLevel(log_level) if not self.logger.hasHandlers(): if not log_handler: log_handler = self._default_handler() # Add handler to logger self.logger.addHandler(log_handler) def _default_handler(self) -> logging.Handler: """Define a default log handler""" handler = logging.StreamHandler(stream=sys.stdout) handler.setLevel(self.log_level) handler.setFormatter( logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") ) return handler @contextmanager def log(self, hint: Any = "") -> Generator[None, None, None]: """Context manager to log method calls Each call to the wrapped store is logged to the configured logger and added to the counter dict. """ method = inspect.stack()[2].function op = f"{type(self._store).__name__}.{method}" if hint: op = f"{op}({hint})" self.logger.info(" Calling %s", op) start_time = time.time() try: self.counter[method] += 1 yield finally: end_time = time.time() self.logger.info("Finished %s [%.2f s]", op, end_time - start_time) @classmethod async def open(cls: type[Self], store_cls: type[T_Store], *args: Any, **kwargs: Any) -> Self: log_level = kwargs.pop("log_level", "DEBUG") log_handler = kwargs.pop("log_handler", None) store = store_cls(*args, **kwargs) await store._open() return cls(store=store, log_level=log_level, log_handler=log_handler) @property def supports_writes(self) -> bool: with self.log(): return self._store.supports_writes @property def supports_deletes(self) -> bool: with self.log(): return self._store.supports_deletes @property def supports_listing(self) -> bool: with self.log(): return self._store.supports_listing @property def read_only(self) -> bool: with self.log(): return self._store.read_only @property def _is_open(self) -> bool: with self.log(): return self._store._is_open @_is_open.setter def _is_open(self, value: bool) -> None: raise NotImplementedError("LoggingStore must be opened via the `_open` method") async def _open(self) -> None: with self.log(): return await self._store._open() async def _ensure_open(self) -> None: with self.log(): return await self._store._ensure_open() async def is_empty(self, prefix: str = "") -> bool: # docstring inherited with self.log(): return await self._store.is_empty(prefix=prefix) async def clear(self) -> None: # docstring inherited with self.log(): return await self._store.clear() def __str__(self) -> str: return f"logging-{self._store}" def __repr__(self) -> str: return f"LoggingStore({self._store.__class__.__name__}, '{self._store}')" def __eq__(self, other: object) -> bool: with self.log(other): return type(self) is type(other) and self._store.__eq__(other._store) # type: ignore[attr-defined] async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None, ) -> Buffer | None: # docstring inherited with self.log(key): return await self._store.get(key=key, prototype=prototype, byte_range=byte_range) async def get_partial_values( self, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: # docstring inherited keys = ",".join([k[0] for k in key_ranges]) with self.log(keys): return await self._store.get_partial_values(prototype=prototype, key_ranges=key_ranges) async def exists(self, key: str) -> bool: # docstring inherited with self.log(key): return await self._store.exists(key) async def set(self, key: str, value: Buffer) -> None: # docstring inherited with self.log(key): return await self._store.set(key=key, value=value) async def set_if_not_exists(self, key: str, value: Buffer) -> None: # docstring inherited with self.log(key): return await self._store.set_if_not_exists(key=key, value=value) async def delete(self, key: str) -> None: # docstring inherited with self.log(key): return await self._store.delete(key=key) async def list(self) -> AsyncGenerator[str, None]: # docstring inherited with self.log(): async for key in self._store.list(): yield key async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited with self.log(prefix): async for key in self._store.list_prefix(prefix=prefix): yield key async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited with self.log(prefix): async for key in self._store.list_dir(prefix=prefix): yield key async def delete_dir(self, prefix: str) -> None: # docstring inherited with self.log(prefix): await self._store.delete_dir(prefix=prefix) async def getsize(self, key: str) -> int: with self.log(key): return await self._store.getsize(key) async def getsize_prefix(self, prefix: str) -> int: with self.log(prefix): return await self._store.getsize_prefix(prefix) zarr-python-3.1.5/src/zarr/storage/_memory.py000066400000000000000000000173161511007055700212600ustar00rootroot00000000000000from __future__ import annotations from logging import getLogger from typing import TYPE_CHECKING, Self from zarr.abc.store import ByteRequest, Store from zarr.core.buffer import Buffer, gpu from zarr.core.buffer.core import default_buffer_prototype from zarr.core.common import concurrent_map from zarr.storage._utils import _normalize_byte_range_index if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterable, MutableMapping from zarr.core.buffer import BufferPrototype logger = getLogger(__name__) class MemoryStore(Store): """ Store for local memory. Parameters ---------- store_dict : dict Initial data read_only : bool Whether the store is read-only Attributes ---------- supports_writes supports_deletes supports_listing """ supports_writes: bool = True supports_deletes: bool = True supports_listing: bool = True _store_dict: MutableMapping[str, Buffer] def __init__( self, store_dict: MutableMapping[str, Buffer] | None = None, *, read_only: bool = False, ) -> None: super().__init__(read_only=read_only) if store_dict is None: store_dict = {} self._store_dict = store_dict def with_read_only(self, read_only: bool = False) -> MemoryStore: # docstring inherited return type(self)( store_dict=self._store_dict, read_only=read_only, ) async def clear(self) -> None: # docstring inherited self._store_dict.clear() def __str__(self) -> str: return f"memory://{id(self._store_dict)}" def __repr__(self) -> str: return f"MemoryStore('{self}')" def __eq__(self, other: object) -> bool: return ( isinstance(other, type(self)) and self._store_dict == other._store_dict and self.read_only == other.read_only ) async def get( self, key: str, prototype: BufferPrototype | None = None, byte_range: ByteRequest | None = None, ) -> Buffer | None: # docstring inherited if prototype is None: prototype = default_buffer_prototype() if not self._is_open: await self._open() assert isinstance(key, str) try: value = self._store_dict[key] start, stop = _normalize_byte_range_index(value, byte_range) return prototype.buffer.from_buffer(value[start:stop]) except KeyError: return None async def get_partial_values( self, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: # docstring inherited # All the key-ranges arguments goes with the same prototype async def _get(key: str, byte_range: ByteRequest | None) -> Buffer | None: return await self.get(key, prototype=prototype, byte_range=byte_range) return await concurrent_map(key_ranges, _get, limit=None) async def exists(self, key: str) -> bool: # docstring inherited return key in self._store_dict async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: # docstring inherited self._check_writable() await self._ensure_open() assert isinstance(key, str) if not isinstance(value, Buffer): raise TypeError( f"MemoryStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead." ) if byte_range is not None: buf = self._store_dict[key] buf[byte_range[0] : byte_range[1]] = value self._store_dict[key] = buf else: self._store_dict[key] = value async def set_if_not_exists(self, key: str, value: Buffer) -> None: # docstring inherited self._check_writable() await self._ensure_open() self._store_dict.setdefault(key, value) async def delete(self, key: str) -> None: # docstring inherited self._check_writable() try: del self._store_dict[key] except KeyError: logger.debug("Key %s does not exist.", key) async def list(self) -> AsyncIterator[str]: # docstring inherited for key in self._store_dict: yield key async def list_prefix(self, prefix: str) -> AsyncIterator[str]: # docstring inherited # note: we materialize all dict keys into a list here so we can mutate the dict in-place (e.g. in delete_prefix) for key in list(self._store_dict): if key.startswith(prefix): yield key async def list_dir(self, prefix: str) -> AsyncIterator[str]: # docstring inherited prefix = prefix.rstrip("/") if prefix == "": keys_unique = {k.split("/")[0] for k in self._store_dict} else: # Our dictionary doesn't contain directory markers, but we want to include # a pseudo directory when there's a nested item and we're listing an # intermediate level. keys_unique = { key.removeprefix(prefix + "/").split("/")[0] for key in self._store_dict if key.startswith(prefix + "/") and key != prefix } for key in keys_unique: yield key class GpuMemoryStore(MemoryStore): """ Store for GPU memory. Stores every chunk in GPU memory irrespective of the original location. The dictionary of buffers to initialize this memory store with *must* be GPU Buffers. Writing data to this store through ``.set`` will move the buffer to the GPU if necessary. Parameters ---------- store_dict : MutableMapping, optional A mutable mapping with string keys and [zarr.core.buffer.gpu.Buffer][] values. read_only : bool Whether to open the store in read-only mode. """ _store_dict: MutableMapping[str, gpu.Buffer] # type: ignore[assignment] def __init__( self, store_dict: MutableMapping[str, gpu.Buffer] | None = None, *, read_only: bool = False, ) -> None: super().__init__(store_dict=store_dict, read_only=read_only) # type: ignore[arg-type] def __str__(self) -> str: return f"gpumemory://{id(self._store_dict)}" def __repr__(self) -> str: return f"GpuMemoryStore('{self}')" @classmethod def from_dict(cls, store_dict: MutableMapping[str, Buffer]) -> Self: """ Create a GpuMemoryStore from a dictionary of buffers at any location. The dictionary backing the newly created ``GpuMemoryStore`` will not be the same as ``store_dict``. Parameters ---------- store_dict : mapping A mapping of strings keys to arbitrary Buffers. The buffer data will be moved into a [`gpu.Buffer`][zarr.core.buffer.gpu.Buffer]. Returns ------- GpuMemoryStore """ gpu_store_dict = {k: gpu.Buffer.from_buffer(v) for k, v in store_dict.items()} return cls(gpu_store_dict) async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: # docstring inherited self._check_writable() assert isinstance(key, str) if not isinstance(value, Buffer): raise TypeError( f"GpuMemoryStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead." ) # Convert to gpu.Buffer gpu_value = value if isinstance(value, gpu.Buffer) else gpu.Buffer.from_buffer(value) await super().set(key, gpu_value, byte_range=byte_range) zarr-python-3.1.5/src/zarr/storage/_obstore.py000066400000000000000000000420251511007055700214200ustar00rootroot00000000000000from __future__ import annotations import asyncio import contextlib import pickle from collections import defaultdict from typing import TYPE_CHECKING, Generic, Self, TypedDict, TypeVar from zarr.abc.store import ( ByteRequest, OffsetByteRequest, RangeByteRequest, Store, SuffixByteRequest, ) from zarr.core.common import concurrent_map from zarr.core.config import config if TYPE_CHECKING: from collections.abc import AsyncGenerator, Coroutine, Iterable, Sequence from typing import Any from obstore import ListResult, ListStream, ObjectMeta, OffsetRange, SuffixRange from obstore.store import ObjectStore as _UpstreamObjectStore from zarr.core.buffer import Buffer, BufferPrototype __all__ = ["ObjectStore"] _ALLOWED_EXCEPTIONS: tuple[type[Exception], ...] = ( FileNotFoundError, IsADirectoryError, NotADirectoryError, ) T_Store = TypeVar("T_Store", bound="_UpstreamObjectStore") class ObjectStore(Store, Generic[T_Store]): """ Store that uses obstore for fast read/write from AWS, GCP, Azure. Parameters ---------- store : obstore.store.ObjectStore An obstore store instance that is set up with the proper credentials. read_only : bool Whether to open the store in read-only mode. Warnings -------- ObjectStore is experimental and subject to API changes without notice. Please raise an issue with any comments/concerns about the store. """ store: T_Store """The underlying obstore instance.""" def __eq__(self, value: object) -> bool: if not isinstance(value, ObjectStore): return False if not self.read_only == value.read_only: return False return self.store == value.store # type: ignore[no-any-return] def __init__(self, store: T_Store, *, read_only: bool = False) -> None: if not store.__class__.__module__.startswith("obstore"): raise TypeError(f"expected ObjectStore class, got {store!r}") super().__init__(read_only=read_only) self.store = store def with_read_only(self, read_only: bool = False) -> Self: # docstring inherited return type(self)( store=self.store, read_only=read_only, ) def __str__(self) -> str: return f"object_store://{self.store}" def __repr__(self) -> str: return f"{type(self).__name__}({self})" def __getstate__(self) -> dict[Any, Any]: state = self.__dict__.copy() state["store"] = pickle.dumps(self.store) return state def __setstate__(self, state: dict[Any, Any]) -> None: state["store"] = pickle.loads(state["store"]) self.__dict__.update(state) async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None ) -> Buffer | None: # docstring inherited import obstore as obs try: if byte_range is None: resp = await obs.get_async(self.store, key) return prototype.buffer.from_bytes(await resp.bytes_async()) # type: ignore[arg-type] elif isinstance(byte_range, RangeByteRequest): bytes = await obs.get_range_async( self.store, key, start=byte_range.start, end=byte_range.end ) return prototype.buffer.from_bytes(bytes) # type: ignore[arg-type] elif isinstance(byte_range, OffsetByteRequest): resp = await obs.get_async( self.store, key, options={"range": {"offset": byte_range.offset}} ) return prototype.buffer.from_bytes(await resp.bytes_async()) # type: ignore[arg-type] elif isinstance(byte_range, SuffixByteRequest): # some object stores (Azure) don't support suffix requests. In this # case, our workaround is to first get the length of the object and then # manually request the byte range at the end. try: resp = await obs.get_async( self.store, key, options={"range": {"suffix": byte_range.suffix}} ) return prototype.buffer.from_bytes(await resp.bytes_async()) # type: ignore[arg-type] except obs.exceptions.NotSupportedError: head_resp = await obs.head_async(self.store, key) file_size = head_resp["size"] suffix_len = byte_range.suffix buffer = await obs.get_range_async( self.store, key, start=file_size - suffix_len, length=suffix_len, ) return prototype.buffer.from_bytes(buffer) # type: ignore[arg-type] else: raise ValueError(f"Unexpected byte_range, got {byte_range}") except _ALLOWED_EXCEPTIONS: return None async def get_partial_values( self, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: # docstring inherited return await _get_partial_values(self.store, prototype=prototype, key_ranges=key_ranges) async def exists(self, key: str) -> bool: # docstring inherited import obstore as obs try: await obs.head_async(self.store, key) except FileNotFoundError: return False else: return True @property def supports_writes(self) -> bool: # docstring inherited return True async def set(self, key: str, value: Buffer) -> None: # docstring inherited import obstore as obs self._check_writable() buf = value.as_buffer_like() await obs.put_async(self.store, key, buf) async def set_if_not_exists(self, key: str, value: Buffer) -> None: # docstring inherited import obstore as obs self._check_writable() buf = value.as_buffer_like() with contextlib.suppress(obs.exceptions.AlreadyExistsError): await obs.put_async(self.store, key, buf, mode="create") @property def supports_deletes(self) -> bool: # docstring inherited return True async def delete(self, key: str) -> None: # docstring inherited import obstore as obs self._check_writable() # Some obstore stores such as local filesystems, GCP and Azure raise an error # when deleting a non-existent key, while others such as S3 and in-memory do # not. We suppress the error to make the behavior consistent across all obstore # stores. This is also in line with the behavior of the other Zarr store adapters. with contextlib.suppress(FileNotFoundError): await obs.delete_async(self.store, key) async def delete_dir(self, prefix: str) -> None: # docstring inherited import obstore as obs self._check_writable() if prefix != "" and not prefix.endswith("/"): prefix += "/" metas = await obs.list(self.store, prefix).collect_async() keys = [(m["path"],) for m in metas] await concurrent_map(keys, self.delete, limit=config.get("async.concurrency")) @property def supports_listing(self) -> bool: # docstring inherited return True async def _list(self, prefix: str | None = None) -> AsyncGenerator[ObjectMeta, None]: import obstore as obs objects: ListStream[Sequence[ObjectMeta]] = obs.list(self.store, prefix=prefix) async for batch in objects: for item in batch: yield item def list(self) -> AsyncGenerator[str, None]: # docstring inherited return (obj["path"] async for obj in self._list()) def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited return (obj["path"] async for obj in self._list(prefix)) def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited import obstore as obs coroutine = obs.list_with_delimiter_async(self.store, prefix=prefix) return _transform_list_dir(coroutine, prefix) async def getsize(self, key: str) -> int: # docstring inherited import obstore as obs resp = await obs.head_async(self.store, key) return resp["size"] async def getsize_prefix(self, prefix: str) -> int: # docstring inherited sizes = [obj["size"] async for obj in self._list(prefix=prefix)] return sum(sizes) async def _transform_list_dir( list_result_coroutine: Coroutine[Any, Any, ListResult[Sequence[ObjectMeta]]], prefix: str ) -> AsyncGenerator[str, None]: """ Transform the result of list_with_delimiter into an async generator of paths. """ list_result = await list_result_coroutine # We assume that the underlying object-store implementation correctly handles the # prefix, so we don't double-check that the returned results actually start with the # given prefix. prefixes = [obj.lstrip(prefix).lstrip("/") for obj in list_result["common_prefixes"]] objects = [obj["path"].removeprefix(prefix).lstrip("/") for obj in list_result["objects"]] for item in prefixes + objects: yield item class _BoundedRequest(TypedDict): """Range request with a known start and end byte. These requests can be multiplexed natively on the Rust side with `obstore.get_ranges_async`. """ original_request_index: int """The positional index in the original key_ranges input""" start: int """Start byte offset.""" end: int """End byte offset.""" class _OtherRequest(TypedDict): """Offset or suffix range requests. These requests cannot be concurrent on the Rust side, and each need their own call to `obstore.get_async`, passing in the `range` parameter. """ original_request_index: int """The positional index in the original key_ranges input""" path: str """The path to request from.""" range: OffsetRange | None # Note: suffix requests are handled separately because some object stores (Azure) # don't support them """The range request type.""" class _SuffixRequest(TypedDict): """Offset or suffix range requests. These requests cannot be concurrent on the Rust side, and each need their own call to `obstore.get_async`, passing in the `range` parameter. """ original_request_index: int """The positional index in the original key_ranges input""" path: str """The path to request from.""" range: SuffixRange """The suffix range.""" class _Response(TypedDict): """A response buffer associated with the original index that it should be restored to.""" original_request_index: int """The positional index in the original key_ranges input""" buffer: Buffer """The buffer returned from obstore's range request.""" async def _make_bounded_requests( store: _UpstreamObjectStore, path: str, requests: list[_BoundedRequest], prototype: BufferPrototype, semaphore: asyncio.Semaphore, ) -> list[_Response]: """Make all bounded requests for a specific file. `obstore.get_ranges_async` allows for making concurrent requests for multiple ranges within a single file, and will e.g. merge concurrent requests. This only uses one single Python coroutine. """ import obstore as obs starts = [r["start"] for r in requests] ends = [r["end"] for r in requests] async with semaphore: responses = await obs.get_ranges_async(store, path=path, starts=starts, ends=ends) buffer_responses: list[_Response] = [] for request, response in zip(requests, responses, strict=True): buffer_responses.append( { "original_request_index": request["original_request_index"], "buffer": prototype.buffer.from_bytes(response), # type: ignore[arg-type] } ) return buffer_responses async def _make_other_request( store: _UpstreamObjectStore, request: _OtherRequest, prototype: BufferPrototype, semaphore: asyncio.Semaphore, ) -> list[_Response]: """Make offset or full-file requests. We return a `list[_Response]` for symmetry with `_make_bounded_requests` so that all futures can be gathered together. """ import obstore as obs async with semaphore: if request["range"] is None: resp = await obs.get_async(store, request["path"]) else: resp = await obs.get_async(store, request["path"], options={"range": request["range"]}) buffer = await resp.bytes_async() return [ { "original_request_index": request["original_request_index"], "buffer": prototype.buffer.from_bytes(buffer), # type: ignore[arg-type] } ] async def _make_suffix_request( store: _UpstreamObjectStore, request: _SuffixRequest, prototype: BufferPrototype, semaphore: asyncio.Semaphore, ) -> list[_Response]: """Make suffix requests. This is separated out from `_make_other_request` because some object stores (Azure) don't support suffix requests. In this case, our workaround is to first get the length of the object and then manually request the byte range at the end. We return a `list[_Response]` for symmetry with `_make_bounded_requests` so that all futures can be gathered together. """ import obstore as obs async with semaphore: try: resp = await obs.get_async(store, request["path"], options={"range": request["range"]}) buffer = await resp.bytes_async() except obs.exceptions.NotSupportedError: head_resp = await obs.head_async(store, request["path"]) file_size = head_resp["size"] suffix_len = request["range"]["suffix"] buffer = await obs.get_range_async( store, request["path"], start=file_size - suffix_len, length=suffix_len, ) return [ { "original_request_index": request["original_request_index"], "buffer": prototype.buffer.from_bytes(buffer), # type: ignore[arg-type] } ] async def _get_partial_values( store: _UpstreamObjectStore, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: """Make multiple range requests. ObjectStore has a `get_ranges` method that will additionally merge nearby ranges, but it's _per_ file. So we need to split these key_ranges into **per-file** key ranges, and then reassemble the results in the original order. We separate into different requests: - One call to `obstore.get_ranges_async` **per target file** - One call to `obstore.get_async` for each other request. """ key_ranges = list(key_ranges) per_file_bounded_requests: dict[str, list[_BoundedRequest]] = defaultdict(list) other_requests: list[_OtherRequest] = [] suffix_requests: list[_SuffixRequest] = [] for idx, (path, byte_range) in enumerate(key_ranges): if byte_range is None: other_requests.append( { "original_request_index": idx, "path": path, "range": None, } ) elif isinstance(byte_range, RangeByteRequest): per_file_bounded_requests[path].append( {"original_request_index": idx, "start": byte_range.start, "end": byte_range.end} ) elif isinstance(byte_range, OffsetByteRequest): other_requests.append( { "original_request_index": idx, "path": path, "range": {"offset": byte_range.offset}, } ) elif isinstance(byte_range, SuffixByteRequest): suffix_requests.append( { "original_request_index": idx, "path": path, "range": {"suffix": byte_range.suffix}, } ) else: raise ValueError(f"Unsupported range input: {byte_range}") semaphore = asyncio.Semaphore(config.get("async.concurrency")) futs: list[Coroutine[Any, Any, list[_Response]]] = [] for path, bounded_ranges in per_file_bounded_requests.items(): futs.append( _make_bounded_requests(store, path, bounded_ranges, prototype, semaphore=semaphore) ) for request in other_requests: futs.append(_make_other_request(store, request, prototype, semaphore=semaphore)) # noqa: PERF401 for suffix_request in suffix_requests: futs.append(_make_suffix_request(store, suffix_request, prototype, semaphore=semaphore)) # noqa: PERF401 buffers: list[Buffer | None] = [None] * len(key_ranges) for responses in await asyncio.gather(*futs): for resp in responses: buffers[resp["original_request_index"]] = resp["buffer"] return buffers zarr-python-3.1.5/src/zarr/storage/_utils.py000066400000000000000000000115521511007055700211040ustar00rootroot00000000000000from __future__ import annotations import re from pathlib import Path from typing import TYPE_CHECKING, TypeVar from zarr.abc.store import OffsetByteRequest, RangeByteRequest, SuffixByteRequest if TYPE_CHECKING: from collections.abc import Iterable, Mapping from zarr.abc.store import ByteRequest from zarr.core.buffer import Buffer def normalize_path(path: str | bytes | Path | None) -> str: if path is None: result = "" elif isinstance(path, bytes): result = str(path, "ascii") # handle pathlib.Path elif isinstance(path, Path): result = str(path) elif isinstance(path, str): result = path else: raise TypeError(f'Object {path} has an invalid type for "path": {type(path).__name__}') # convert backslash to forward slash result = result.replace("\\", "/") # remove leading and trailing slashes result = result.strip("/") # collapse any repeated slashes pat = re.compile(r"//+") result = pat.sub("/", result) # disallow path segments with just '.' or '..' segments = result.split("/") if any(s in {".", ".."} for s in segments): raise ValueError( f"The path {path!r} is invalid because its string representation contains '.' or '..' segments." ) return result def _normalize_byte_range_index(data: Buffer, byte_range: ByteRequest | None) -> tuple[int, int]: """ Convert an ByteRequest into an explicit start and stop """ if byte_range is None: start = 0 stop = len(data) + 1 elif isinstance(byte_range, RangeByteRequest): start = byte_range.start stop = byte_range.end elif isinstance(byte_range, OffsetByteRequest): start = byte_range.offset stop = len(data) + 1 elif isinstance(byte_range, SuffixByteRequest): start = len(data) - byte_range.suffix stop = len(data) + 1 else: raise ValueError(f"Unexpected byte_range, got {byte_range}.") return (start, stop) def _join_paths(paths: Iterable[str]) -> str: """ Filter out instances of '' and join the remaining strings with '/'. Parameters ---------- paths : Iterable[str] Returns ------- str Examples -------- ```python from zarr.storage._utils import _join_paths _join_paths(["", "a", "b"]) # 'a/b' _join_paths(["a", "b", "c"]) # 'a/b/c' ``` """ return "/".join(filter(lambda v: v != "", paths)) def _relativize_path(*, path: str, prefix: str) -> str: """ Make a "/"-delimited path relative to some prefix. If the prefix is '', then the path is returned as-is. Otherwise, the prefix is removed from the path as well as the separator string "/". If ``prefix`` is not the empty string and ``path`` does not start with ``prefix`` followed by a "/" character, then an error is raised. This function assumes that the prefix does not end with "/". Parameters ---------- path : str The path to make relative to the prefix. prefix : str The prefix to make the path relative to. Returns ------- str Examples -------- ```python from zarr.storage._utils import _relativize_path _relativize_path(path="a/b", prefix="") # 'a/b' _relativize_path(path="a/b/c", prefix="a/b") # 'c' ``` """ if prefix == "": return path else: _prefix = prefix + "/" if not path.startswith(_prefix): raise ValueError(f"The first component of {path} does not start with {prefix}.") return path.removeprefix(f"{prefix}/") def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]: """ Normalize the input paths according to the normalization scheme used for zarr node paths. If any two paths normalize to the same value, raise a ValueError. """ path_map: dict[str, str] = {} for path in paths: parsed = normalize_path(path) if parsed in path_map: msg = ( f"After normalization, the value '{path}' collides with '{path_map[parsed]}'. " f"Both '{path}' and '{path_map[parsed]}' normalize to the same value: '{parsed}'. " f"You should use either '{path}' or '{path_map[parsed]}', but not both." ) raise ValueError(msg) path_map[parsed] = path return tuple(path_map.keys()) T = TypeVar("T") def _normalize_path_keys(data: Mapping[str, T]) -> dict[str, T]: """ Normalize the keys of the input dict according to the normalization scheme used for zarr node paths. If any two keys in the input normalize to the same value, raise a ValueError. Returns a dict where the keys are the elements of the input and the values are the normalized form of each key. """ parsed_keys = _normalize_paths(data.keys()) return dict(zip(parsed_keys, data.values(), strict=True)) zarr-python-3.1.5/src/zarr/storage/_wrapper.py000066400000000000000000000106311511007055700214210ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Generic, TypeVar if TYPE_CHECKING: from collections.abc import AsyncGenerator, AsyncIterator, Iterable from types import TracebackType from typing import Any, Self from zarr.abc.buffer import Buffer from zarr.abc.store import ByteRequest from zarr.core.buffer import BufferPrototype from zarr.abc.store import Store T_Store = TypeVar("T_Store", bound=Store) class WrapperStore(Store, Generic[T_Store]): """ Store that wraps an existing Store. By default all of the store methods are delegated to the wrapped store instance, which is accessible via the ``._store`` attribute of this class. Use this class to modify or extend the behavior of the other store classes. """ _store: T_Store def __init__(self, store: T_Store) -> None: self._store = store @classmethod async def open(cls: type[Self], store_cls: type[T_Store], *args: Any, **kwargs: Any) -> Self: store = store_cls(*args, **kwargs) await store._open() return cls(store=store) def __enter__(self) -> Self: return type(self)(self._store.__enter__()) def __exit__( self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None, ) -> None: return self._store.__exit__(exc_type, exc_value, traceback) async def _open(self) -> None: await self._store._open() async def _ensure_open(self) -> None: await self._store._ensure_open() async def is_empty(self, prefix: str) -> bool: return await self._store.is_empty(prefix) @property def _is_open(self) -> bool: return self._store._is_open @_is_open.setter def _is_open(self, value: bool) -> None: raise NotImplementedError("WrapperStore must be opened via the `_open` method") async def clear(self) -> None: return await self._store.clear() @property def read_only(self) -> bool: return self._store.read_only def _check_writable(self) -> None: return self._store._check_writable() def __eq__(self, value: object) -> bool: return type(self) is type(value) and self._store.__eq__(value._store) # type: ignore[attr-defined] def __str__(self) -> str: return f"wrapping-{self._store}" def __repr__(self) -> str: return f"WrapperStore({self._store.__class__.__name__}, '{self._store}')" async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None ) -> Buffer | None: return await self._store.get(key, prototype, byte_range) async def get_partial_values( self, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: return await self._store.get_partial_values(prototype, key_ranges) async def exists(self, key: str) -> bool: return await self._store.exists(key) async def set(self, key: str, value: Buffer) -> None: await self._store.set(key, value) async def set_if_not_exists(self, key: str, value: Buffer) -> None: return await self._store.set_if_not_exists(key, value) async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None: await self._store._set_many(values) @property def supports_writes(self) -> bool: return self._store.supports_writes @property def supports_deletes(self) -> bool: return self._store.supports_deletes async def delete(self, key: str) -> None: await self._store.delete(key) @property def supports_listing(self) -> bool: return self._store.supports_listing def list(self) -> AsyncIterator[str]: return self._store.list() def list_prefix(self, prefix: str) -> AsyncIterator[str]: return self._store.list_prefix(prefix) def list_dir(self, prefix: str) -> AsyncIterator[str]: return self._store.list_dir(prefix) async def delete_dir(self, prefix: str) -> None: return await self._store.delete_dir(prefix) def close(self) -> None: self._store.close() async def _get_many( self, requests: Iterable[tuple[str, BufferPrototype, ByteRequest | None]] ) -> AsyncGenerator[tuple[str, Buffer | None], None]: async for req in self._store._get_many(requests): yield req zarr-python-3.1.5/src/zarr/storage/_zip.py000066400000000000000000000220201511007055700205360ustar00rootroot00000000000000from __future__ import annotations import os import shutil import threading import time import zipfile from pathlib import Path from typing import TYPE_CHECKING, Any, Literal from zarr.abc.store import ( ByteRequest, OffsetByteRequest, RangeByteRequest, Store, SuffixByteRequest, ) from zarr.core.buffer import Buffer, BufferPrototype if TYPE_CHECKING: from collections.abc import AsyncIterator, Iterable ZipStoreAccessModeLiteral = Literal["r", "w", "a"] class ZipStore(Store): """ Store using a ZIP file. Parameters ---------- path : str Location of file. mode : str, optional One of 'r' to read an existing file, 'w' to truncate and write a new file, 'a' to append to an existing file, or 'x' to exclusively create and write a new file. compression : int, optional Compression method to use when writing to the archive. allowZip64 : bool, optional If True (the default) will create ZIP files that use the ZIP64 extensions when the zipfile is larger than 2 GiB. If False will raise an exception when the ZIP file would require ZIP64 extensions. Attributes ---------- allowed_exceptions supports_writes supports_deletes supports_listing path compression allowZip64 """ supports_writes: bool = True supports_deletes: bool = False supports_listing: bool = True path: Path compression: int allowZip64: bool _zf: zipfile.ZipFile _lock: threading.RLock def __init__( self, path: Path | str, *, mode: ZipStoreAccessModeLiteral = "r", read_only: bool | None = None, compression: int = zipfile.ZIP_STORED, allowZip64: bool = True, ) -> None: if read_only is None: read_only = mode == "r" super().__init__(read_only=read_only) if isinstance(path, str): path = Path(path) assert isinstance(path, Path) self.path = path # root? self._zmode = mode self.compression = compression self.allowZip64 = allowZip64 def _sync_open(self) -> None: if self._is_open: raise ValueError("store is already open") self._lock = threading.RLock() self._zf = zipfile.ZipFile( self.path, mode=self._zmode, compression=self.compression, allowZip64=self.allowZip64, ) self._is_open = True async def _open(self) -> None: self._sync_open() def __getstate__(self) -> dict[str, Any]: # We need a copy to not modify the state of the original store state = self.__dict__.copy() for attr in ["_zf", "_lock"]: state.pop(attr, None) return state def __setstate__(self, state: dict[str, Any]) -> None: self.__dict__ = state self._is_open = False self._sync_open() def close(self) -> None: # docstring inherited super().close() with self._lock: self._zf.close() async def clear(self) -> None: # docstring inherited with self._lock: self._check_writable() self._zf.close() os.remove(self.path) self._zf = zipfile.ZipFile( self.path, mode="w", compression=self.compression, allowZip64=self.allowZip64 ) def __str__(self) -> str: return f"zip://{self.path}" def __repr__(self) -> str: return f"ZipStore('{self}')" def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) and self.path == other.path def _get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None, ) -> Buffer | None: if not self._is_open: self._sync_open() # docstring inherited try: with self._zf.open(key) as f: # will raise KeyError if byte_range is None: return prototype.buffer.from_bytes(f.read()) elif isinstance(byte_range, RangeByteRequest): f.seek(byte_range.start) return prototype.buffer.from_bytes(f.read(byte_range.end - f.tell())) size = f.seek(0, os.SEEK_END) if isinstance(byte_range, OffsetByteRequest): f.seek(byte_range.offset) elif isinstance(byte_range, SuffixByteRequest): f.seek(max(0, size - byte_range.suffix)) else: raise TypeError(f"Unexpected byte_range, got {byte_range}.") return prototype.buffer.from_bytes(f.read()) except KeyError: return None async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None, ) -> Buffer | None: # docstring inherited assert isinstance(key, str) with self._lock: return self._get(key, prototype=prototype, byte_range=byte_range) async def get_partial_values( self, prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRequest | None]], ) -> list[Buffer | None]: # docstring inherited out = [] with self._lock: for key, byte_range in key_ranges: out.append(self._get(key, prototype=prototype, byte_range=byte_range)) return out def _set(self, key: str, value: Buffer) -> None: if not self._is_open: self._sync_open() # generally, this should be called inside a lock keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6]) keyinfo.compress_type = self.compression if keyinfo.filename[-1] == os.sep: keyinfo.external_attr = 0o40775 << 16 # drwxrwxr-x keyinfo.external_attr |= 0x10 # MS-DOS directory flag else: keyinfo.external_attr = 0o644 << 16 # ?rw-r--r-- self._zf.writestr(keyinfo, value.to_bytes()) async def set(self, key: str, value: Buffer) -> None: # docstring inherited self._check_writable() if not self._is_open: self._sync_open() assert isinstance(key, str) if not isinstance(value, Buffer): raise TypeError( f"ZipStore.set(): `value` must be a Buffer instance. Got an instance of {type(value)} instead." ) with self._lock: self._set(key, value) async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._check_writable() with self._lock: members = self._zf.namelist() if key not in members: self._set(key, value) async def delete_dir(self, prefix: str) -> None: # only raise NotImplementedError if any keys are found self._check_writable() if prefix != "" and not prefix.endswith("/"): prefix += "/" async for _ in self.list_prefix(prefix): raise NotImplementedError async def delete(self, key: str) -> None: # docstring inherited # we choose to only raise NotImplementedError here if the key exists # this allows the array/group APIs to avoid the overhead of existence checks self._check_writable() if await self.exists(key): raise NotImplementedError async def exists(self, key: str) -> bool: # docstring inherited with self._lock: try: self._zf.getinfo(key) except KeyError: return False else: return True async def list(self) -> AsyncIterator[str]: # docstring inherited with self._lock: for key in self._zf.namelist(): yield key async def list_prefix(self, prefix: str) -> AsyncIterator[str]: # docstring inherited async for key in self.list(): if key.startswith(prefix): yield key async def list_dir(self, prefix: str) -> AsyncIterator[str]: # docstring inherited prefix = prefix.rstrip("/") keys = self._zf.namelist() seen = set() if prefix == "": keys_unique = {k.split("/")[0] for k in keys} for key in keys_unique: if key not in seen: seen.add(key) yield key else: for key in keys: if key.startswith(prefix + "/") and key.strip("/") != prefix: k = key.removeprefix(prefix + "/").split("/")[0] if k not in seen: seen.add(k) yield k async def move(self, path: Path | str) -> None: """ Move the store to another path. """ if isinstance(path, str): path = Path(path) self.close() os.makedirs(path.parent, exist_ok=True) shutil.move(self.path, path) self.path = path await self._open() zarr-python-3.1.5/src/zarr/testing/000077500000000000000000000000001511007055700172405ustar00rootroot00000000000000zarr-python-3.1.5/src/zarr/testing/__init__.py000066400000000000000000000006641511007055700213570ustar00rootroot00000000000000import importlib.util import warnings from zarr.errors import ZarrUserWarning if importlib.util.find_spec("pytest") is not None: from zarr.testing.store import StoreTests else: warnings.warn( "pytest not installed, skipping test suite", category=ZarrUserWarning, stacklevel=2 ) from zarr.testing.utils import assert_bytes_equal # TODO: import public buffer tests? __all__ = ["StoreTests", "assert_bytes_equal"] zarr-python-3.1.5/src/zarr/testing/buffer.py000066400000000000000000000044311511007055700210650ustar00rootroot00000000000000# mypy: ignore-errors from __future__ import annotations from typing import TYPE_CHECKING, Any, Literal import numpy as np import numpy.typing as npt from zarr.core.buffer import Buffer, BufferPrototype, cpu from zarr.storage import MemoryStore if TYPE_CHECKING: from collections.abc import Iterable from typing import Self __all__ = [ "NDBufferUsingTestNDArrayLike", "StoreExpectingTestBuffer", "TestBuffer", ] class TestNDArrayLike(np.ndarray): """An example of a ndarray-like class""" __test__ = False class TestBuffer(cpu.Buffer): """Example of a custom Buffer that handles ArrayLike""" __test__ = False class NDBufferUsingTestNDArrayLike(cpu.NDBuffer): """Example of a custom NDBuffer that handles MyNDArrayLike""" @classmethod def create( cls, *, shape: Iterable[int], dtype: npt.DTypeLike, order: Literal["C", "F"] = "C", fill_value: Any | None = None, ) -> Self: """Overwrite `NDBuffer.create` to create a TestNDArrayLike instance""" ret = cls(TestNDArrayLike(shape=shape, dtype=dtype, order=order)) if fill_value is not None: ret.fill(fill_value) return ret @classmethod def empty( cls, shape: tuple[int, ...], dtype: npt.DTypeLike, order: Literal["C", "F"] = "C", ) -> Self: return super(cpu.NDBuffer, cls).empty(shape=shape, dtype=dtype, order=order) class StoreExpectingTestBuffer(MemoryStore): """Example of a custom Store that expect MyBuffer for all its non-metadata We assume that keys containing "json" is metadata """ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: if "json" not in key: assert isinstance(value, TestBuffer) await super().set(key, value, byte_range) async def get( self, key: str, prototype: BufferPrototype, byte_range: tuple[int, int | None] | None = None, ) -> Buffer | None: if "json" not in key: assert prototype.buffer is TestBuffer ret = await super().get(key=key, prototype=prototype, byte_range=byte_range) if ret is not None: assert isinstance(ret, prototype.buffer) return ret zarr-python-3.1.5/src/zarr/testing/conftest.py000066400000000000000000000007101511007055700214350ustar00rootroot00000000000000import pytest def pytest_configure(config: pytest.Config) -> None: # The tests in zarr.testing are intended to be run by downstream projects. # To allow those downstream projects to run with `--strict-markers`, we need # to register an entry point with pytest11 and register our "plugin" with it, # which just registers the markers used in zarr.testing config.addinivalue_line("markers", "gpu: mark a test as requiring CuPy and GPU") zarr-python-3.1.5/src/zarr/testing/stateful.py000066400000000000000000000605371511007055700214540ustar00rootroot00000000000000import builtins import functools from collections.abc import Callable from typing import Any, TypeVar, cast import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np from hypothesis import assume, note from hypothesis.stateful import ( RuleBasedStateMachine, initialize, invariant, precondition, rule, ) from hypothesis.strategies import DataObject import zarr from zarr import Array from zarr.abc.store import Store from zarr.codecs.bytes import BytesCodec from zarr.core.buffer import Buffer, BufferPrototype, cpu, default_buffer_prototype from zarr.core.sync import SyncMixin from zarr.storage import LocalStore, MemoryStore from zarr.testing.strategies import ( basic_indices, chunk_paths, dimension_names, key_ranges, node_names, np_array_and_chunks, orthogonal_indices, ) from zarr.testing.strategies import keys as zarr_keys MAX_BINARY_SIZE = 100 F = TypeVar("F", bound=Callable[..., Any]) def with_frequency(frequency: float) -> Callable[[F], F]: """This needs to be deterministic for hypothesis replaying""" def decorator(func: F) -> F: counter_attr = f"__{func.__name__}_counter" @functools.wraps(func) def wrapper(*args: Any, **kwargs: Any) -> Any: return func(*args, **kwargs) @precondition def frequency_check(f: Any) -> Any: if not hasattr(f, counter_attr): setattr(f, counter_attr, 0) current_count = getattr(f, counter_attr) + 1 setattr(f, counter_attr, current_count) return (current_count * frequency) % 1.0 >= (1.0 - frequency) return cast(F, frequency_check(wrapper)) return decorator def split_prefix_name(path: str) -> tuple[str, str]: split = path.rsplit("/", maxsplit=1) if len(split) > 1: prefix, name = split else: prefix = "" (name,) = split return prefix, name class ZarrHierarchyStateMachine(SyncMixin, RuleBasedStateMachine): """ This state machine models operations that modify a zarr store's hierarchy. That is, user actions that modify arrays/groups as well as list operations. It is intended to be used by external stores, and compares their results to a MemoryStore that is assumed to be perfect. """ def __init__(self, store: Store) -> None: super().__init__() self.store = store self.model = MemoryStore() zarr.group(store=self.model) # Track state of the hierarchy, these should contain fully qualified paths self.all_groups: set[str] = set() self.all_arrays: set[str] = set() @initialize() def init_store(self) -> None: # This lets us reuse the fixture provided store. self._sync(self.store.clear()) zarr.group(store=self.store) def can_add(self, path: str) -> bool: return path not in self.all_groups and path not in self.all_arrays # -------------------- store operations ----------------------- @rule(name=node_names, data=st.data()) def add_group(self, name: str, data: DataObject) -> None: # Handle possible case-insensitive file systems (e.g. MacOS) if isinstance(self.store, LocalStore): name = name.lower() if self.all_groups: parent = data.draw(st.sampled_from(sorted(self.all_groups)), label="Group parent") else: parent = "" path = f"{parent}/{name}".lstrip("/") assume(self.can_add(path)) note(f"Adding group: path='{path}'") self.all_groups.add(path) zarr.group(store=self.store, path=path) zarr.group(store=self.model, path=path) @rule(data=st.data(), name=node_names, array_and_chunks=np_array_and_chunks()) def add_array( self, data: DataObject, name: str, array_and_chunks: tuple[np.ndarray[Any, Any], tuple[int, ...]], ) -> None: # Handle possible case-insensitive file systems (e.g. MacOS) if isinstance(self.store, LocalStore): name = name.lower() array, chunks = array_and_chunks fill_value = data.draw(npst.from_dtype(array.dtype)) if self.all_groups: parent = data.draw(st.sampled_from(sorted(self.all_groups)), label="Array parent") else: parent = "" # TODO: support creating deeper paths # TODO: support overwriting potentially by just skipping `self.can_add` path = f"{parent}/{name}".lstrip("/") assume(self.can_add(path)) note(f"Adding array: path='{path}' shape={array.shape} chunks={chunks}") for store in [self.store, self.model]: zarr.array( array, chunks=chunks, path=path, store=store, fill_value=fill_value, zarr_format=3, dimension_names=data.draw( dimension_names(ndim=array.ndim), label="dimension names" ), # Chose bytes codec to avoid wasting time compressing the data being written codecs=[BytesCodec()], ) self.all_arrays.add(path) @rule() @with_frequency(0.25) def clear(self) -> None: note("clearing") import zarr self._sync(self.store.clear()) self._sync(self.model.clear()) assert self._sync(self.store.is_empty("/")) assert self._sync(self.model.is_empty("/")) self.all_groups.clear() self.all_arrays.clear() zarr.group(store=self.store) zarr.group(store=self.model) # TODO: MemoryStore is broken? # assert not self._sync(self.store.is_empty("/")) # assert not self._sync(self.model.is_empty("/")) def draw_directory(self, data: DataObject) -> str: group_st = st.sampled_from(sorted(self.all_groups)) if self.all_groups else st.nothing() array_st = st.sampled_from(sorted(self.all_arrays)) if self.all_arrays else st.nothing() array_or_group = data.draw(st.one_of(group_st, array_st)) if data.draw(st.booleans()) and array_or_group in self.all_arrays: arr = zarr.open_array(path=array_or_group, store=self.model) path = data.draw( st.one_of( st.sampled_from([array_or_group]), chunk_paths(ndim=arr.ndim, numblocks=arr.cdata_shape).map( lambda x: f"{array_or_group}/c/" ), ) ) else: path = array_or_group return path @precondition(lambda self: bool(self.all_groups)) @rule(data=st.data()) def check_list_dir(self, data: DataObject) -> None: path = self.draw_directory(data) note(f"list_dir for {path=!r}") # Consider .list_dir("path/to/array") for an array with a single chunk. # The MemoryStore model will return `"c", "zarr.json"` only if the chunk exists # If that chunk was deleted, then `"c"` is not returned. # LocalStore will not have this behaviour :/ # There are similar consistency issues with delete_dir("/path/to/array/c/0/0") assume(not isinstance(self.store, LocalStore)) model_ls = sorted(self._sync_iter(self.model.list_dir(path))) store_ls = sorted(self._sync_iter(self.store.list_dir(path))) assert model_ls == store_ls, (model_ls, store_ls) @precondition(lambda self: bool(self.all_arrays)) @rule(data=st.data()) def delete_chunk(self, data: DataObject) -> None: array = data.draw(st.sampled_from(sorted(self.all_arrays))) arr = zarr.open_array(path=array, store=self.model) chunk_path = data.draw(chunk_paths(ndim=arr.ndim, numblocks=arr.cdata_shape, subset=False)) path = f"{array}/c/{chunk_path}" note(f"deleting chunk {path=!r}") self._sync(self.model.delete(path)) self._sync(self.store.delete(path)) @precondition(lambda self: bool(self.all_arrays)) @rule(data=st.data()) def check_array(self, data: DataObject) -> None: path = data.draw(st.sampled_from(sorted(self.all_arrays))) actual = zarr.open_array(self.store, path=path)[:] expected = zarr.open_array(self.model, path=path)[:] np.testing.assert_equal(actual, expected) @precondition(lambda self: bool(self.all_arrays)) @rule(data=st.data()) def overwrite_array_basic_indexing(self, data: DataObject) -> None: array = data.draw(st.sampled_from(sorted(self.all_arrays))) model_array = zarr.open_array(path=array, store=self.model) store_array = zarr.open_array(path=array, store=self.store) slicer = data.draw(basic_indices(shape=model_array.shape)) note(f"overwriting array with basic indexer: {slicer=}") new_data = data.draw( npst.arrays(shape=np.shape(model_array[slicer]), dtype=model_array.dtype) ) model_array[slicer] = new_data store_array[slicer] = new_data @precondition(lambda self: bool(self.all_arrays)) @rule(data=st.data()) def overwrite_array_orthogonal_indexing(self, data: DataObject) -> None: array = data.draw(st.sampled_from(sorted(self.all_arrays))) model_array = zarr.open_array(path=array, store=self.model) store_array = zarr.open_array(path=array, store=self.store) indexer, _ = data.draw(orthogonal_indices(shape=model_array.shape)) note(f"overwriting array orthogonal {indexer=}") new_data = data.draw( npst.arrays(shape=model_array.oindex[indexer].shape, dtype=model_array.dtype) # type: ignore[union-attr] ) model_array.oindex[indexer] = new_data store_array.oindex[indexer] = new_data @precondition(lambda self: bool(self.all_arrays)) @rule(data=st.data()) def resize_array(self, data: DataObject) -> None: array = data.draw(st.sampled_from(sorted(self.all_arrays))) model_array = zarr.open_array(path=array, store=self.model) store_array = zarr.open_array(path=array, store=self.store) ndim = model_array.ndim new_shape = tuple( 0 if oldsize == 0 else newsize for newsize, oldsize in zip( data.draw(npst.array_shapes(max_dims=ndim, min_dims=ndim, min_side=0)), model_array.shape, strict=True, ) ) note(f"resizing array from {model_array.shape} to {new_shape}") model_array.resize(new_shape) store_array.resize(new_shape) @precondition(lambda self: bool(self.all_arrays) or bool(self.all_groups)) @rule(data=st.data()) def delete_dir(self, data: DataObject) -> None: path = self.draw_directory(data) note(f"delete_dir with {path=!r}") self._sync(self.model.delete_dir(path)) self._sync(self.store.delete_dir(path)) matches = set() for node in self.all_groups | self.all_arrays: if node.startswith(path): matches.add(node) self.all_groups = self.all_groups - matches self.all_arrays = self.all_arrays - matches # @precondition(lambda self: bool(self.all_groups)) # @precondition(lambda self: bool(self.all_arrays)) # @rule(data=st.data()) # def move_array(self, data): # array_path = data.draw(st.sampled_from(self.all_arrays), label="Array move source") # to_group = data.draw(st.sampled_from(self.all_groups), label="Array move destination") # # fixme renaming to self? # array_name = os.path.basename(array_path) # assume(self.model.can_add(to_group, array_name)) # new_path = f"{to_group}/{array_name}".lstrip("/") # note(f"moving array '{array_path}' -> '{new_path}'") # self.model.rename(array_path, new_path) # self.repo.store.rename(array_path, new_path) # @precondition(lambda self: len(self.all_groups) >= 2) # @rule(data=st.data()) # def move_group(self, data): # from_group = data.draw(st.sampled_from(self.all_groups), label="Group move source") # to_group = data.draw(st.sampled_from(self.all_groups), label="Group move destination") # assume(not to_group.startswith(from_group)) # from_group_name = os.path.basename(from_group) # assume(self.model.can_add(to_group, from_group_name)) # # fixme renaming to self? # new_path = f"{to_group}/{from_group_name}".lstrip("/") # note(f"moving group '{from_group}' -> '{new_path}'") # self.model.rename(from_group, new_path) # self.repo.store.rename(from_group, new_path) @precondition(lambda self: self.store.supports_deletes) @precondition(lambda self: len(self.all_arrays) >= 1) @rule(data=st.data()) def delete_array_using_del(self, data: DataObject) -> None: array_path = data.draw( st.sampled_from(sorted(self.all_arrays)), label="Array deletion target" ) prefix, array_name = split_prefix_name(array_path) note(f"Deleting array '{array_path}' ({prefix=!r}, {array_name=!r}) using del") for store in [self.model, self.store]: group = zarr.open_group(path=prefix, store=store) group[array_name] # check that it exists del group[array_name] self.all_arrays.remove(array_path) @precondition(lambda self: self.store.supports_deletes) @precondition(lambda self: len(self.all_groups) >= 2) # fixme don't delete root @rule(data=st.data()) def delete_group_using_del(self, data: DataObject) -> None: # ensure that we don't include the root group in the list of member names that we try # to delete member_names = tuple(filter(lambda v: "/" in v, sorted(self.all_groups))) group_path = data.draw(st.sampled_from(member_names), label="Group deletion target") prefix, group_name = split_prefix_name(group_path) note(f"Deleting group '{group_path=!r}', {prefix=!r}, {group_name=!r} using delete") members = zarr.open_group(store=self.model, path=group_path).members(max_depth=None) for _, obj in members: if isinstance(obj, Array): self.all_arrays.remove(obj.path) else: self.all_groups.remove(obj.path) for store in [self.store, self.model]: group = zarr.open_group(store=store, path=prefix) group[group_name] # check that it exists del group[group_name] if group_path != "/": # The root group is always present self.all_groups.remove(group_path) # # --------------- assertions ----------------- # def check_group_arrays(self, group): # # note(f"Checking arrays of '{group}'") # g1 = self.model.get_group(group) # g2 = zarr.open_group(path=group, mode="r", store=self.repo.store) # model_arrays = sorted(g1.arrays(), key=itemgetter(0)) # our_arrays = sorted(g2.arrays(), key=itemgetter(0)) # for (n1, a1), (n2, a2) in zip_longest(model_arrays, our_arrays): # assert n1 == n2 # assert_array_equal(a1, a2) # def check_subgroups(self, group_path): # g1 = self.model.get_group(group_path) # g2 = zarr.open_group(path=group_path, mode="r", store=self.repo.store) # g1_children = [name for (name, _) in g1.groups()] # g2_children = [name for (name, _) in g2.groups()] # # note(f"Checking {len(g1_children)} subgroups of group '{group_path}'") # assert g1_children == g2_children # def check_list_prefix_from_group(self, group): # prefix = f"meta/root/{group}" # model_list = sorted(self.model.list_prefix(prefix)) # al_list = sorted(self.repo.store.list_prefix(prefix)) # # note(f"Checking {len(model_list)} keys under '{prefix}'") # assert model_list == al_list # prefix = f"data/root/{group}" # model_list = sorted(self.model.list_prefix(prefix)) # al_list = sorted(self.repo.store.list_prefix(prefix)) # # note(f"Checking {len(model_list)} keys under '{prefix}'") # assert model_list == al_list # @precondition(lambda self: self.model.is_persistent_session()) # @rule(data=st.data()) # def check_group_path(self, data): # t0 = time.time() # group = data.draw(st.sampled_from(self.all_groups)) # self.check_list_prefix_from_group(group) # self.check_subgroups(group) # self.check_group_arrays(group) # t1 = time.time() # note(f"Checks took {t1 - t0} sec.") @invariant() def check_list_prefix_from_root(self) -> None: model_list = self._sync_iter(self.model.list_prefix("")) store_list = self._sync_iter(self.store.list_prefix("")) note(f"Checking {len(model_list)} expected keys vs {len(store_list)} actual keys") assert sorted(model_list) == sorted(store_list), ( sorted(model_list), sorted(store_list), ) # check that our internal state matches that of the store and model assert all(f"{path}/zarr.json" in model_list for path in self.all_groups | self.all_arrays) assert all(f"{path}/zarr.json" in store_list for path in self.all_groups | self.all_arrays) class SyncStoreWrapper(zarr.core.sync.SyncMixin): def __init__(self, store: Store) -> None: """Synchronous Store wrapper This class holds synchronous methods that map to async methods of Store classes. The synchronous wrapper is needed because hypothesis' stateful testing infra does not support asyncio so we redefine sync versions of the Store API. https://github.com/HypothesisWorks/hypothesis/issues/3712#issuecomment-1668999041 """ self.store = store @property def read_only(self) -> bool: return self.store.read_only def set(self, key: str, data_buffer: Buffer) -> None: return self._sync(self.store.set(key, data_buffer)) def list(self) -> builtins.list[str]: return self._sync_iter(self.store.list()) def get(self, key: str, prototype: BufferPrototype) -> Buffer | None: return self._sync(self.store.get(key, prototype=prototype)) def get_partial_values( self, key_ranges: builtins.list[Any], prototype: BufferPrototype ) -> builtins.list[Buffer | None]: return self._sync(self.store.get_partial_values(prototype=prototype, key_ranges=key_ranges)) def delete(self, path: str) -> None: return self._sync(self.store.delete(path)) def is_empty(self, prefix: str) -> bool: return self._sync(self.store.is_empty(prefix=prefix)) def clear(self) -> None: return self._sync(self.store.clear()) def exists(self, key: str) -> bool: return self._sync(self.store.exists(key)) def list_dir(self, prefix: str) -> None: raise NotImplementedError def list_prefix(self, prefix: str) -> None: raise NotImplementedError @property def supports_listing(self) -> bool: return self.store.supports_listing @property def supports_writes(self) -> bool: return self.store.supports_writes @property def supports_deletes(self) -> bool: return self.store.supports_deletes class ZarrStoreStateMachine(RuleBasedStateMachine): """ " Zarr store state machine This is a subclass of a Hypothesis RuleBasedStateMachine. It is testing a framework to ensure that the state of a Zarr store matches an expected state after a set of random operations. It contains a store (currently, a Zarr MemoryStore) and a model, a simplified version of a zarr store (in this case, a dict). It also contains rules which represent actions that can be applied to a zarr store. Rules apply an action to both the store and the model, and invariants assert that the state of the model is equal to the state of the store. Hypothesis then generates sequences of rules, running invariants after each rule. It raises an error if a sequence produces discontinuity between state of the model and state of the store (ie. an invariant is violated). https://hypothesis.readthedocs.io/en/latest/stateful.html """ def __init__(self, store: Store) -> None: super().__init__() self.model: dict[str, Buffer] = {} self.store = SyncStoreWrapper(store) self.prototype = default_buffer_prototype() @initialize() def init_store(self) -> None: self.store.clear() @rule(key=zarr_keys(), data=st.binary(min_size=0, max_size=MAX_BINARY_SIZE)) def set(self, key: str, data: bytes) -> None: note(f"(set) Setting {key!r} with {data!r}") assert not self.store.read_only data_buf = cpu.Buffer.from_bytes(data) self.store.set(key, data_buf) self.model[key] = data_buf @precondition(lambda self: len(self.model.keys()) > 0) @rule(key=zarr_keys(), data=st.data()) def get(self, key: str, data: DataObject) -> None: key = data.draw( st.sampled_from(sorted(self.model.keys())) ) # hypothesis wants to sample from sorted list note("(get)") store_value = self.store.get(key, self.prototype) # to bytes here necessary because data_buf set to model in set() assert self.model[key] == store_value @rule(key=zarr_keys(), data=st.data()) def get_invalid_zarr_keys(self, key: str, data: DataObject) -> None: note("(get_invalid)") assume(key not in self.model) assert self.store.get(key, self.prototype) is None @precondition(lambda self: len(self.model.keys()) > 0) @rule(data=st.data()) def get_partial_values(self, data: DataObject) -> None: key_range = data.draw( key_ranges(keys=st.sampled_from(sorted(self.model.keys())), max_size=MAX_BINARY_SIZE) ) note(f"(get partial) {key_range=}") obs_maybe = self.store.get_partial_values(key_range, self.prototype) observed = [] for obs in obs_maybe: assert obs is not None observed.append(obs.to_bytes()) model_vals_ls = [] for key, byte_range in key_range: start = byte_range.start stop = byte_range.end model_vals_ls.append(self.model[key][start:stop]) assert all( obs == exp.to_bytes() for obs, exp in zip(observed, model_vals_ls, strict=True) ), ( observed, model_vals_ls, ) @precondition(lambda self: self.store.supports_deletes) @precondition(lambda self: len(self.model.keys()) > 0) @rule(data=st.data()) def delete(self, data: DataObject) -> None: key = data.draw(st.sampled_from(sorted(self.model.keys()))) note(f"(delete) Deleting {key=}") self.store.delete(key) del self.model[key] @rule() def clear(self) -> None: assert not self.store.read_only note("(clear)") self.store.clear() self.model.clear() assert self.store.is_empty("") assert len(self.model.keys()) == len(list(self.store.list())) == 0 @rule() # Local store can be non-empty when there are subdirectories but no files @precondition(lambda self: not isinstance(self.store.store, LocalStore)) def is_empty(self) -> None: note("(is_empty)") # make sure they either both are or both aren't empty (same state) assert self.store.is_empty("") == (not self.model) @rule(key=zarr_keys()) def exists(self, key: str) -> None: note("(exists)") assert self.store.exists(key) == (key in self.model) @invariant() def check_paths_equal(self) -> None: note("Checking that paths are equal") paths = sorted(self.store.list()) assert sorted(self.model.keys()) == paths @invariant() def check_vals_equal(self) -> None: note("Checking values equal") for key, val in self.model.items(): store_item = self.store.get(key, self.prototype) assert val == store_item @invariant() def check_num_zarr_keys_equal(self) -> None: note("check num zarr_keys equal") assert len(self.model) == len(list(self.store.list())) @invariant() def check_zarr_keys(self) -> None: keys = list(self.store.list()) if not keys: assert self.store.is_empty("") is True else: assert self.store.is_empty("") is False for key in keys: assert self.store.exists(key) is True note("checking keys / exists / empty") zarr-python-3.1.5/src/zarr/testing/store.py000066400000000000000000000536661511007055700207660ustar00rootroot00000000000000from __future__ import annotations import asyncio import pickle from abc import abstractmethod from typing import TYPE_CHECKING, Generic, TypeVar from zarr.storage import WrapperStore if TYPE_CHECKING: from typing import Any from zarr.abc.store import ByteRequest from zarr.core.buffer.core import BufferPrototype import pytest from zarr.abc.store import ( ByteRequest, OffsetByteRequest, RangeByteRequest, Store, SuffixByteRequest, ) from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.sync import _collect_aiterator from zarr.storage._utils import _normalize_byte_range_index from zarr.testing.utils import assert_bytes_equal __all__ = ["StoreTests"] S = TypeVar("S", bound=Store) B = TypeVar("B", bound=Buffer) class StoreTests(Generic[S, B]): store_cls: type[S] buffer_cls: type[B] @abstractmethod async def set(self, store: S, key: str, value: Buffer) -> None: """ Insert a value into a storage backend, with a specific key. This should not use any store methods. Bypassing the store methods allows them to be tested. """ ... @abstractmethod async def get(self, store: S, key: str) -> Buffer: """ Retrieve a value from a storage backend, by key. This should not use any store methods. Bypassing the store methods allows them to be tested. """ ... @abstractmethod @pytest.fixture def store_kwargs(self, *args: Any, **kwargs: Any) -> dict[str, Any]: """Kwargs for instantiating a store""" ... @abstractmethod def test_store_repr(self, store: S) -> None: ... @abstractmethod def test_store_supports_writes(self, store: S) -> None: ... def test_store_supports_partial_writes(self, store: S) -> None: assert not store.supports_partial_writes @abstractmethod def test_store_supports_listing(self, store: S) -> None: ... @pytest.fixture def open_kwargs(self, store_kwargs: dict[str, Any]) -> dict[str, Any]: return store_kwargs @pytest.fixture async def store(self, open_kwargs: dict[str, Any]) -> Store: return await self.store_cls.open(**open_kwargs) @pytest.fixture async def store_not_open(self, store_kwargs: dict[str, Any]) -> Store: return self.store_cls(**store_kwargs) def test_store_type(self, store: S) -> None: assert isinstance(store, Store) assert isinstance(store, self.store_cls) def test_store_eq(self, store: S, store_kwargs: dict[str, Any]) -> None: # check self equality assert store == store # check store equality with same inputs # asserting this is important for being able to compare (de)serialized stores store2 = self.store_cls(**store_kwargs) assert store == store2 async def test_serializable_store(self, store: S) -> None: new_store: S = pickle.loads(pickle.dumps(store)) assert new_store == store assert new_store.read_only == store.read_only # quickly roundtrip data to a key to test that new store works data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04") key = "foo" await store.set(key, data_buf) observed = await store.get(key, prototype=default_buffer_prototype()) assert_bytes_equal(observed, data_buf) def test_store_read_only(self, store: S) -> None: assert not store.read_only with pytest.raises(AttributeError): store.read_only = False # type: ignore[misc] @pytest.mark.parametrize("read_only", [True, False]) async def test_store_open_read_only(self, open_kwargs: dict[str, Any], read_only: bool) -> None: open_kwargs["read_only"] = read_only store = await self.store_cls.open(**open_kwargs) assert store._is_open assert store.read_only == read_only async def test_store_context_manager(self, open_kwargs: dict[str, Any]) -> None: # Test that the context manager closes the store with await self.store_cls.open(**open_kwargs) as store: assert store._is_open # Test trying to open an already open store with pytest.raises(ValueError, match="store is already open"): await store._open() assert not store._is_open async def test_read_only_store_raises(self, open_kwargs: dict[str, Any]) -> None: kwargs = {**open_kwargs, "read_only": True} store = await self.store_cls.open(**kwargs) assert store.read_only # set with pytest.raises( ValueError, match="store was opened in read-only mode and does not support writing" ): await store.set("foo", self.buffer_cls.from_bytes(b"bar")) # delete with pytest.raises( ValueError, match="store was opened in read-only mode and does not support writing" ): await store.delete("foo") async def test_with_read_only_store(self, open_kwargs: dict[str, Any]) -> None: kwargs = {**open_kwargs, "read_only": True} store = await self.store_cls.open(**kwargs) assert store.read_only # Test that you cannot write to a read-only store with pytest.raises( ValueError, match="store was opened in read-only mode and does not support writing" ): await store.set("foo", self.buffer_cls.from_bytes(b"bar")) # Check if the store implements with_read_only try: writer = store.with_read_only(read_only=False) except NotImplementedError: # Test that stores that do not implement with_read_only raise NotImplementedError with the correct message with pytest.raises( NotImplementedError, match=f"with_read_only is not implemented for the {type(store)} store type.", ): store.with_read_only(read_only=False) return # Test that you can write to a new store copy assert not writer._is_open assert not writer.read_only await writer.set("foo", self.buffer_cls.from_bytes(b"bar")) await writer.delete("foo") # Test that you cannot write to the original store assert store.read_only with pytest.raises( ValueError, match="store was opened in read-only mode and does not support writing" ): await store.set("foo", self.buffer_cls.from_bytes(b"bar")) with pytest.raises( ValueError, match="store was opened in read-only mode and does not support writing" ): await store.delete("foo") # Test that you cannot write to a read-only store copy reader = store.with_read_only(read_only=True) assert reader.read_only with pytest.raises( ValueError, match="store was opened in read-only mode and does not support writing" ): await reader.set("foo", self.buffer_cls.from_bytes(b"bar")) with pytest.raises( ValueError, match="store was opened in read-only mode and does not support writing" ): await reader.delete("foo") @pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"]) @pytest.mark.parametrize( ("data", "byte_range"), [ (b"\x01\x02\x03\x04", None), (b"\x01\x02\x03\x04", RangeByteRequest(1, 4)), (b"\x01\x02\x03\x04", OffsetByteRequest(1)), (b"\x01\x02\x03\x04", SuffixByteRequest(1)), (b"", None), ], ) async def test_get(self, store: S, key: str, data: bytes, byte_range: ByteRequest) -> None: """ Ensure that data can be read from the store using the store.get method. """ data_buf = self.buffer_cls.from_bytes(data) await self.set(store, key, data_buf) observed = await store.get(key, prototype=default_buffer_prototype(), byte_range=byte_range) start, stop = _normalize_byte_range_index(data_buf, byte_range=byte_range) expected = data_buf[start:stop] assert_bytes_equal(observed, expected) async def test_get_not_open(self, store_not_open: S) -> None: """ Ensure that data can be read from the store that isn't yet open using the store.get method. """ assert not store_not_open._is_open data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04") key = "c/0" await self.set(store_not_open, key, data_buf) observed = await store_not_open.get(key, prototype=default_buffer_prototype()) assert_bytes_equal(observed, data_buf) async def test_get_raises(self, store: S) -> None: """ Ensure that a ValueError is raise for invalid byte range syntax """ data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04") await self.set(store, "c/0", data_buf) with pytest.raises((ValueError, TypeError), match=r"Unexpected byte_range, got.*"): await store.get("c/0", prototype=default_buffer_prototype(), byte_range=(0, 2)) # type: ignore[arg-type] async def test_get_many(self, store: S) -> None: """ Ensure that multiple keys can be retrieved at once with the _get_many method. """ keys = tuple(map(str, range(10))) values = tuple(f"{k}".encode() for k in keys) for k, v in zip(keys, values, strict=False): await self.set(store, k, self.buffer_cls.from_bytes(v)) observed_buffers = await _collect_aiterator( store._get_many( zip( keys, (default_buffer_prototype(),) * len(keys), (None,) * len(keys), strict=False, ) ) ) observed_kvs = sorted(((k, b.to_bytes()) for k, b in observed_buffers)) # type: ignore[union-attr] expected_kvs = sorted(((k, b) for k, b in zip(keys, values, strict=False))) assert observed_kvs == expected_kvs @pytest.mark.parametrize("key", ["c/0", "foo/c/0.0", "foo/0/0"]) @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""]) async def test_getsize(self, store: S, key: str, data: bytes) -> None: """ Test the result of store.getsize(). """ data_buf = self.buffer_cls.from_bytes(data) expected = len(data_buf) await self.set(store, key, data_buf) observed = await store.getsize(key) assert observed == expected async def test_getsize_prefix(self, store: S) -> None: """ Test the result of store.getsize_prefix(). """ data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04") keys = ["c/0/0", "c/0/1", "c/1/0", "c/1/1"] keys_values = [(k, data_buf) for k in keys] await store._set_many(keys_values) expected = len(data_buf) * len(keys) observed = await store.getsize_prefix("c") assert observed == expected async def test_getsize_raises(self, store: S) -> None: """ Test that getsize() raise a FileNotFoundError if the key doesn't exist. """ with pytest.raises(FileNotFoundError): await store.getsize("c/1000") @pytest.mark.parametrize("key", ["zarr.json", "c/0", "foo/c/0.0", "foo/0/0"]) @pytest.mark.parametrize("data", [b"\x01\x02\x03\x04", b""]) async def test_set(self, store: S, key: str, data: bytes) -> None: """ Ensure that data can be written to the store using the store.set method. """ assert not store.read_only data_buf = self.buffer_cls.from_bytes(data) await store.set(key, data_buf) observed = await self.get(store, key) assert_bytes_equal(observed, data_buf) async def test_set_not_open(self, store_not_open: S) -> None: """ Ensure that data can be written to the store that's not yet open using the store.set method. """ assert not store_not_open._is_open data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04") key = "c/0" await store_not_open.set(key, data_buf) observed = await self.get(store_not_open, key) assert_bytes_equal(observed, data_buf) async def test_set_many(self, store: S) -> None: """ Test that a dict of key : value pairs can be inserted into the store via the `_set_many` method. """ keys = ["zarr.json", "c/0", "foo/c/0.0", "foo/0/0"] data_buf = [self.buffer_cls.from_bytes(k.encode()) for k in keys] store_dict = dict(zip(keys, data_buf, strict=True)) await store._set_many(store_dict.items()) for k, v in store_dict.items(): assert (await self.get(store, k)).to_bytes() == v.to_bytes() @pytest.mark.parametrize( "key_ranges", [ [], [("zarr.json", RangeByteRequest(0, 2))], [("c/0", RangeByteRequest(0, 2)), ("zarr.json", None)], [ ("c/0/0", RangeByteRequest(0, 2)), ("c/0/1", SuffixByteRequest(2)), ("c/0/2", OffsetByteRequest(2)), ], ], ) async def test_get_partial_values( self, store: S, key_ranges: list[tuple[str, ByteRequest]] ) -> None: # put all of the data for key, _ in key_ranges: await self.set(store, key, self.buffer_cls.from_bytes(bytes(key, encoding="utf-8"))) # read back just part of it observed_maybe = await store.get_partial_values( prototype=default_buffer_prototype(), key_ranges=key_ranges ) observed: list[Buffer] = [] expected: list[Buffer] = [] for obs in observed_maybe: assert obs is not None observed.append(obs) for idx in range(len(observed)): key, byte_range = key_ranges[idx] result = await store.get( key, prototype=default_buffer_prototype(), byte_range=byte_range ) assert result is not None expected.append(result) assert all( obs.to_bytes() == exp.to_bytes() for obs, exp in zip(observed, expected, strict=True) ) async def test_exists(self, store: S) -> None: assert not await store.exists("foo") await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) assert await store.exists("foo/zarr.json") async def test_delete(self, store: S) -> None: if not store.supports_deletes: pytest.skip("store does not support deletes") await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) assert await store.exists("foo/zarr.json") await store.delete("foo/zarr.json") assert not await store.exists("foo/zarr.json") async def test_delete_dir(self, store: S) -> None: if not store.supports_deletes: pytest.skip("store does not support deletes") await store.set("zarr.json", self.buffer_cls.from_bytes(b"root")) await store.set("foo-bar/zarr.json", self.buffer_cls.from_bytes(b"root")) await store.set("foo/zarr.json", self.buffer_cls.from_bytes(b"bar")) await store.set("foo/c/0", self.buffer_cls.from_bytes(b"chunk")) await store.delete_dir("foo") assert await store.exists("zarr.json") assert await store.exists("foo-bar/zarr.json") assert not await store.exists("foo/zarr.json") assert not await store.exists("foo/c/0") async def test_delete_nonexistent_key_does_not_raise(self, store: S) -> None: if not store.supports_deletes: pytest.skip("store does not support deletes") await store.delete("nonexistent_key") async def test_is_empty(self, store: S) -> None: assert await store.is_empty("") await self.set( store, "foo/bar", self.buffer_cls.from_bytes(bytes("something", encoding="utf-8")) ) assert not await store.is_empty("") assert await store.is_empty("fo") assert not await store.is_empty("foo/") assert not await store.is_empty("foo") assert await store.is_empty("spam/") async def test_clear(self, store: S) -> None: await self.set( store, "key", self.buffer_cls.from_bytes(bytes("something", encoding="utf-8")) ) await store.clear() assert await store.is_empty("") async def test_list(self, store: S) -> None: assert await _collect_aiterator(store.list()) == () prefix = "foo" data = self.buffer_cls.from_bytes(b"") store_dict = { prefix + "/zarr.json": data, **{prefix + f"/c/{idx}": data for idx in range(10)}, } await store._set_many(store_dict.items()) expected_sorted = sorted(store_dict.keys()) observed = await _collect_aiterator(store.list()) observed_sorted = sorted(observed) assert observed_sorted == expected_sorted async def test_list_prefix(self, store: S) -> None: """ Test that the `list_prefix` method works as intended. Given a prefix, it should return all the keys in storage that start with this prefix. """ prefixes = ("", "a/", "a/b/", "a/b/c/") data = self.buffer_cls.from_bytes(b"") fname = "zarr.json" store_dict = {p + fname: data for p in prefixes} await store._set_many(store_dict.items()) for prefix in prefixes: observed = tuple(sorted(await _collect_aiterator(store.list_prefix(prefix)))) expected: tuple[str, ...] = () for key in store_dict: if key.startswith(prefix): expected += (key,) expected = tuple(sorted(expected)) assert observed == expected async def test_list_empty_path(self, store: S) -> None: """ Verify that list and list_prefix work correctly when path is an empty string, i.e. no unwanted replacement occurs. """ data = self.buffer_cls.from_bytes(b"") store_dict = { "foo/bar/zarr.json": data, "foo/bar/c/1": data, "foo/baz/c/0": data, } await store._set_many(store_dict.items()) # Test list() observed_list = await _collect_aiterator(store.list()) observed_list_sorted = sorted(observed_list) expected_list_sorted = sorted(store_dict.keys()) assert observed_list_sorted == expected_list_sorted # Test list_prefix() with an empty prefix observed_prefix_empty = await _collect_aiterator(store.list_prefix("")) observed_prefix_empty_sorted = sorted(observed_prefix_empty) expected_prefix_empty_sorted = sorted(store_dict.keys()) assert observed_prefix_empty_sorted == expected_prefix_empty_sorted # Test list_prefix() with a non-empty prefix observed_prefix = await _collect_aiterator(store.list_prefix("foo/bar/")) observed_prefix_sorted = sorted(observed_prefix) expected_prefix_sorted = sorted(k for k in store_dict if k.startswith("foo/bar/")) assert observed_prefix_sorted == expected_prefix_sorted async def test_list_dir(self, store: S) -> None: root = "foo" store_dict = { root + "/zarr.json": self.buffer_cls.from_bytes(b"bar"), root + "/c/1": self.buffer_cls.from_bytes(b"\x01"), } assert await _collect_aiterator(store.list_dir("")) == () assert await _collect_aiterator(store.list_dir(root)) == () await store._set_many(store_dict.items()) keys_observed = await _collect_aiterator(store.list_dir(root)) keys_expected = {k.removeprefix(root + "/").split("/")[0] for k in store_dict} assert sorted(keys_observed) == sorted(keys_expected) keys_observed = await _collect_aiterator(store.list_dir(root + "/")) assert sorted(keys_expected) == sorted(keys_observed) async def test_set_if_not_exists(self, store: S) -> None: key = "k" data_buf = self.buffer_cls.from_bytes(b"0000") await self.set(store, key, data_buf) new = self.buffer_cls.from_bytes(b"1111") await store.set_if_not_exists("k", new) # no error result = await store.get(key, default_buffer_prototype()) assert result == data_buf await store.set_if_not_exists("k2", new) # no error result = await store.get("k2", default_buffer_prototype()) assert result == new class LatencyStore(WrapperStore[Store]): """ A wrapper class that takes any store class in its constructor and adds latency to the `set` and `get` methods. This can be used for performance testing. """ get_latency: float set_latency: float def __init__(self, cls: Store, *, get_latency: float = 0, set_latency: float = 0) -> None: self.get_latency = float(get_latency) self.set_latency = float(set_latency) self._store = cls async def set(self, key: str, value: Buffer) -> None: """ Add latency to the ``set`` method. Calls ``asyncio.sleep(self.set_latency)`` before invoking the wrapped ``set`` method. Parameters ---------- key : str The key to set value : Buffer The value to set Returns ------- None """ await asyncio.sleep(self.set_latency) await self._store.set(key, value) async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None ) -> Buffer | None: """ Add latency to the ``get`` method. Calls ``asyncio.sleep(self.get_latency)`` before invoking the wrapped ``get`` method. Parameters ---------- key : str The key to get prototype : BufferPrototype The BufferPrototype to use. byte_range : ByteRequest, optional An optional byte range. Returns ------- buffer : Buffer or None """ await asyncio.sleep(self.get_latency) return await self._store.get(key, prototype=prototype, byte_range=byte_range) zarr-python-3.1.5/src/zarr/testing/strategies.py000066400000000000000000000362061511007055700217730ustar00rootroot00000000000000import math import sys from collections.abc import Callable, Mapping from typing import Any, Literal import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np import numpy.typing as npt from hypothesis import event from hypothesis.strategies import SearchStrategy import zarr from zarr.abc.store import RangeByteRequest, Store from zarr.codecs.bytes import BytesCodec from zarr.core.array import Array from zarr.core.chunk_grids import RegularChunkGrid from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding from zarr.core.common import JSON, ZarrFormat from zarr.core.dtype import get_data_type_from_native_dtype from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata from zarr.core.sync import sync from zarr.storage import MemoryStore, StoreLike from zarr.storage._common import _dereference_path from zarr.storage._utils import normalize_path from zarr.types import AnyArray # Copied from Xarray _attr_keys = st.text(st.characters(), min_size=1) _attr_values = st.recursive( st.none() | st.booleans() | st.text(st.characters(), max_size=5), lambda children: st.lists(children) | st.dictionaries(_attr_keys, children), max_leaves=3, ) @st.composite def keys(draw: st.DrawFn, *, max_num_nodes: int | None = None) -> str: return draw(st.lists(node_names, min_size=1, max_size=max_num_nodes).map("/".join)) @st.composite def paths(draw: st.DrawFn, *, max_num_nodes: int | None = None) -> str: return draw(st.just("/") | keys(max_num_nodes=max_num_nodes)) def dtypes() -> st.SearchStrategy[np.dtype[Any]]: return ( npst.boolean_dtypes() | npst.integer_dtypes(endianness="=") | npst.unsigned_integer_dtypes(endianness="=") | npst.floating_dtypes(endianness="=") | npst.complex_number_dtypes(endianness="=") | npst.byte_string_dtypes(endianness="=") | npst.unicode_string_dtypes(endianness="=") | npst.datetime64_dtypes(endianness="=") | npst.timedelta64_dtypes(endianness="=") ) def v3_dtypes() -> st.SearchStrategy[np.dtype[Any]]: return dtypes() def v2_dtypes() -> st.SearchStrategy[np.dtype[Any]]: return dtypes() def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]: """Generate UTF-8-safe text constrained to max_len of dtype.""" # account for utf-32 encoding (i.e. 4 bytes/character) max_len = max(1, dtype.itemsize // 4) return st.text( alphabet=st.characters( exclude_categories=["Cs"], # Avoid *technically allowed* surrogates min_codepoint=32, ), min_size=1, max_size=max_len, ) def clear_store(x: Store) -> Store: sync(x.clear()) return x # From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names # 1. must not be the empty string ("") # 2. must not include the character "/" # 3. must not be a string composed only of period characters, e.g. "." or ".." # 4. must not start with the reserved prefix "__" zarr_key_chars = st.sampled_from( ".-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" ) node_names = ( st.text(zarr_key_chars, min_size=1) .filter(lambda t: t not in (".", "..") and not t.startswith("__")) .filter(lambda name: name.lower() != "zarr.json") ) short_node_names = ( st.text(zarr_key_chars, max_size=3, min_size=1) .filter(lambda t: t not in (".", "..") and not t.startswith("__")) .filter(lambda name: name.lower() != "zarr.json") ) array_names = node_names attrs: st.SearchStrategy[Mapping[str, JSON] | None] = st.none() | st.dictionaries( _attr_keys, _attr_values ) # st.builds will only call a new store constructor for different keyword arguments # i.e. stores.examples() will always return the same object per Store class. # So we map a clear to reset the store. stores = st.builds(MemoryStore, st.just({})).map(clear_store) compressors = st.sampled_from([None, "default"]) zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([3, 2]) # We de-prioritize arrays having dim sizes 0, 1, 2 array_shapes = npst.array_shapes(max_dims=4, min_side=3, max_side=5) | npst.array_shapes( max_dims=4, min_side=0 ) @st.composite def dimension_names(draw: st.DrawFn, *, ndim: int | None = None) -> list[None | str] | None: simple_text = st.text(zarr_key_chars, min_size=0) return draw(st.none() | st.lists(st.none() | simple_text, min_size=ndim, max_size=ndim)) # type: ignore[arg-type] @st.composite def array_metadata( draw: st.DrawFn, *, array_shapes: Callable[..., st.SearchStrategy[tuple[int, ...]]] = npst.array_shapes, zarr_formats: st.SearchStrategy[Literal[2, 3]] = zarr_formats, attributes: SearchStrategy[Mapping[str, JSON] | None] = attrs, ) -> ArrayV2Metadata | ArrayV3Metadata: zarr_format = draw(zarr_formats) # separator = draw(st.sampled_from(['/', '\\'])) shape = draw(array_shapes()) ndim = len(shape) chunk_shape = draw(array_shapes(min_dims=ndim, max_dims=ndim)) np_dtype = draw(dtypes()) dtype = get_data_type_from_native_dtype(np_dtype) fill_value = draw(npst.from_dtype(np_dtype)) if zarr_format == 2: return ArrayV2Metadata( shape=shape, chunks=chunk_shape, dtype=dtype, fill_value=fill_value, order=draw(st.sampled_from(["C", "F"])), attributes=draw(attributes), # type: ignore[arg-type] dimension_separator=draw(st.sampled_from([".", "/"])), filters=None, compressor=None, ) else: return ArrayV3Metadata( shape=shape, data_type=dtype, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), fill_value=fill_value, attributes=draw(attributes), # type: ignore[arg-type] dimension_names=draw(dimension_names(ndim=ndim)), chunk_key_encoding=DefaultChunkKeyEncoding(separator="/"), # FIXME codecs=[BytesCodec()], storage_transformers=(), ) @st.composite def numpy_arrays( draw: st.DrawFn, *, shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, dtype: np.dtype[Any] | None = None, ) -> npt.NDArray[Any]: """ Generate numpy arrays that can be saved in the provided Zarr format. """ if dtype is None: dtype = draw(dtypes()) if np.issubdtype(dtype, np.str_): safe_unicode_strings = safe_unicode_for_dtype(dtype) return draw(npst.arrays(dtype=dtype, shape=shapes, elements=safe_unicode_strings)) return draw(npst.arrays(dtype=dtype, shape=shapes)) @st.composite def chunk_shapes(draw: st.DrawFn, *, shape: tuple[int, ...]) -> tuple[int, ...]: # We want this strategy to shrink towards arrays with smaller number of chunks # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks numchunks = draw( st.tuples(*[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in shape]) ) # 2. and now generate the chunks tuple chunks = tuple( size // nchunks if nchunks > 0 else 0 for size, nchunks in zip(shape, numchunks, strict=True) ) for c in chunks: event("chunk size", c) if any((c != 0 and s % c != 0) for s, c in zip(shape, chunks, strict=True)): event("smaller last chunk") return chunks @st.composite def shard_shapes( draw: st.DrawFn, *, shape: tuple[int, ...], chunk_shape: tuple[int, ...] ) -> tuple[int, ...]: # We want this strategy to shrink towards arrays with smaller number of shards # shards must be an integral number of chunks assert all(c != 0 for c in chunk_shape) numchunks = tuple(s // c for s, c in zip(shape, chunk_shape, strict=True)) multiples = tuple(draw(st.integers(min_value=1, max_value=nc)) for nc in numchunks) return tuple(m * c for m, c in zip(multiples, chunk_shape, strict=True)) @st.composite def np_array_and_chunks( draw: st.DrawFn, *, arrays: st.SearchStrategy[npt.NDArray[Any]] = numpy_arrays(), # noqa: B008 ) -> tuple[np.ndarray, tuple[int, ...]]: # type: ignore[type-arg] """A hypothesis strategy to generate small sized random arrays. Returns: a tuple of the array and a suitable random chunking for it. """ array = draw(arrays) return (array, draw(chunk_shapes(shape=array.shape))) @st.composite def arrays( draw: st.DrawFn, *, shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, compressors: st.SearchStrategy = compressors, stores: st.SearchStrategy[StoreLike] = stores, paths: st.SearchStrategy[str] = paths(), # noqa: B008 array_names: st.SearchStrategy = array_names, arrays: st.SearchStrategy | None = None, attrs: st.SearchStrategy = attrs, zarr_formats: st.SearchStrategy = zarr_formats, ) -> AnyArray: store = draw(stores, label="store") path = draw(paths, label="array parent") name = draw(array_names, label="array name") attributes = draw(attrs, label="attributes") zarr_format = draw(zarr_formats, label="zarr format") if arrays is None: arrays = numpy_arrays(shapes=shapes) nparray = draw(arrays, label="array data") chunk_shape = draw(chunk_shapes(shape=nparray.shape), label="chunk shape") dim_names: None | list[str | None] = None if zarr_format == 3 and all(c > 0 for c in chunk_shape): shard_shape = draw( st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunk_shape), label="shard shape", ) dim_names = draw(dimension_names(ndim=nparray.ndim), label="dimension names") else: shard_shape = None # test that None works too. fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)])) # compressor = draw(compressors) expected_attrs = {} if attributes is None else attributes array_path = _dereference_path(path, name) root = zarr.open_group(store, mode="w", zarr_format=zarr_format) a = root.create_array( array_path, shape=nparray.shape, chunks=chunk_shape, shards=shard_shape, dtype=nparray.dtype, attributes=attributes, # compressor=compressor, # FIXME fill_value=fill_value, dimension_names=dim_names, ) assert isinstance(a, Array) if a.metadata.zarr_format == 3: assert a.fill_value is not None assert a.name is not None assert a.path == normalize_path(array_path) assert a.name == "/" + a.path assert isinstance(root[array_path], Array) assert nparray.shape == a.shape assert chunk_shape == a.chunks assert shard_shape == a.shards assert a.basename == name, (a.basename, name) assert dict(a.attrs) == expected_attrs a[:] = nparray return a @st.composite def simple_arrays( draw: st.DrawFn, *, shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes, ) -> Any: return draw( arrays( shapes=shapes, paths=paths(max_num_nodes=2), array_names=short_node_names, attrs=st.none(), compressors=st.sampled_from([None, "default"]), ) ) def is_negative_slice(idx: Any) -> bool: return isinstance(idx, slice) and idx.step is not None and idx.step < 0 @st.composite def end_slices(draw: st.DrawFn, *, shape: tuple[int, ...]) -> Any: """ A strategy that slices ranges that include the last chunk. This is intended to stress-test handling of a possibly smaller last chunk. """ slicers = [] for size in shape: start = draw(st.integers(min_value=size // 2, max_value=size - 1)) length = draw(st.integers(min_value=0, max_value=size - start)) slicers.append(slice(start, start + length)) event("drawing end slice") return tuple(slicers) @st.composite def basic_indices( draw: st.DrawFn, *, shape: tuple[int, ...], min_dims: int = 0, max_dims: int | None = None, allow_newaxis: bool = False, allow_ellipsis: bool = True, ) -> Any: """Basic indices without unsupported negative slices.""" strategy = npst.basic_indices( shape=shape, min_dims=min_dims, max_dims=max_dims, allow_newaxis=allow_newaxis, allow_ellipsis=allow_ellipsis, ).filter( lambda idxr: ( not ( is_negative_slice(idxr) or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr)) # type: ignore[redundant-expr] ) ) ) if math.prod(shape) >= 3: strategy = end_slices(shape=shape) | strategy return draw(strategy) @st.composite def orthogonal_indices( draw: st.DrawFn, *, shape: tuple[int, ...] ) -> tuple[tuple[np.ndarray[Any, Any], ...], tuple[np.ndarray[Any, Any], ...]]: """ Strategy that returns (1) a tuple of integer arrays used for orthogonal indexing of Zarr arrays. (2) a tuple of integer arrays that can be used for equivalent indexing of numpy arrays """ zindexer = [] npindexer = [] ndim = len(shape) for axis, size in enumerate(shape): if size != 0: strategy = npst.integer_array_indices( shape=(size,), result_shape=npst.array_shapes(min_side=1, max_side=size, max_dims=1) ) | basic_indices(min_dims=1, shape=(size,), allow_ellipsis=False) else: strategy = basic_indices(min_dims=1, shape=(size,), allow_ellipsis=False) val = draw( strategy # bare ints, slices .map(lambda x: (x,) if not isinstance(x, tuple) else x) # skip empty tuple .filter(bool) ) (idxr,) = val if isinstance(idxr, int): idxr = np.array([idxr]) zindexer.append(idxr) if isinstance(idxr, slice): idxr = np.arange(*idxr.indices(size)) elif isinstance(idxr, (tuple, int)): idxr = np.array(idxr) newshape = [1] * ndim newshape[axis] = idxr.size npindexer.append(idxr.reshape(newshape)) # casting the output of broadcast_arrays is needed for numpy < 2 return tuple(zindexer), tuple(np.broadcast_arrays(*npindexer)) def key_ranges( keys: SearchStrategy[str] = node_names, max_size: int = sys.maxsize ) -> SearchStrategy[list[tuple[str, RangeByteRequest]]]: """ Function to generate key_ranges strategy for get_partial_values() returns list strategy w/ form:: [(key, (range_start, range_end)), (key, (range_start, range_end)),...] """ def make_request(start: int, length: int) -> RangeByteRequest: return RangeByteRequest(start, end=min(start + length, max_size)) byte_ranges = st.builds( make_request, start=st.integers(min_value=0, max_value=max_size), length=st.integers(min_value=0, max_value=max_size), ) key_tuple = st.tuples(keys, byte_ranges) return st.lists(key_tuple, min_size=1, max_size=10) @st.composite def chunk_paths(draw: st.DrawFn, ndim: int, numblocks: tuple[int, ...], subset: bool = True) -> str: blockidx = draw( st.tuples(*tuple(st.integers(min_value=0, max_value=max(0, b - 1)) for b in numblocks)) ) subset_slicer = slice(draw(st.integers(min_value=0, max_value=ndim))) if subset else slice(None) return "/".join(map(str, blockidx[subset_slicer])) zarr-python-3.1.5/src/zarr/testing/utils.py000066400000000000000000000021411511007055700207500ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, TypeVar, cast import pytest from zarr.core.buffer import Buffer if TYPE_CHECKING: from zarr.core.common import BytesLike __all__ = ["assert_bytes_equal"] def assert_bytes_equal(b1: Buffer | BytesLike | None, b2: Buffer | BytesLike | None) -> None: """Help function to assert if two bytes-like or Buffers are equal Warnings -------- Always copies data, only use for testing and debugging """ if isinstance(b1, Buffer): b1 = b1.to_bytes() if isinstance(b2, Buffer): b2 = b2.to_bytes() assert b1 == b2 def has_cupy() -> bool: try: import cupy return cast("bool", cupy.cuda.runtime.getDeviceCount() > 0) except ImportError: return False except cupy.cuda.runtime.CUDARuntimeError: return False T = TypeVar("T") gpu_mark = pytest.mark.gpu skip_if_no_gpu = pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available") # Decorator for GPU tests def gpu_test(func: T) -> T: return cast(T, gpu_mark(skip_if_no_gpu(func))) zarr-python-3.1.5/src/zarr/types.py000066400000000000000000000012301511007055700172750ustar00rootroot00000000000000from typing import Any, TypeAlias from zarr.core.array import Array, AsyncArray from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata AnyAsyncArray: TypeAlias = AsyncArray[Any] """A Zarr format 2 or 3 `AsyncArray`""" AsyncArrayV2: TypeAlias = AsyncArray[ArrayV2Metadata] """A Zarr format 2 `AsyncArray`""" AsyncArrayV3: TypeAlias = AsyncArray[ArrayV3Metadata] """A Zarr format 3 `AsyncArray`""" AnyArray: TypeAlias = Array[Any] """A Zarr format 2 or 3 `Array`""" ArrayV2: TypeAlias = Array[ArrayV2Metadata] """A Zarr format 2 `Array`""" ArrayV3: TypeAlias = Array[ArrayV3Metadata] """A Zarr format 3 `Array`""" zarr-python-3.1.5/tests/000077500000000000000000000000001511007055700151605ustar00rootroot00000000000000zarr-python-3.1.5/tests/__init__.py000066400000000000000000000000001511007055700172570ustar00rootroot00000000000000zarr-python-3.1.5/tests/conftest.py000066400000000000000000000352731511007055700173710ustar00rootroot00000000000000from __future__ import annotations import math import os import pathlib import sys from collections.abc import Mapping, Sequence from dataclasses import dataclass, field from typing import TYPE_CHECKING, cast import numpy as np import numpy.typing as npt import pytest from hypothesis import HealthCheck, Verbosity, settings import zarr.registry from zarr import AsyncGroup, config from zarr.abc.store import Store from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation from zarr.core.array import ( _parse_chunk_encoding_v2, _parse_chunk_encoding_v3, _parse_chunk_key_encoding, ) from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition from zarr.core.common import ( JSON, DimensionNames, MemoryOrder, ShapeLike, ZarrFormat, parse_shapelike, ) from zarr.core.config import config as zarr_config from zarr.core.dtype import ( get_data_type_from_native_dtype, ) from zarr.core.dtype.common import HasItemSize from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import sync from zarr.storage import FsspecStore, LocalStore, MemoryStore, StorePath, ZipStore if TYPE_CHECKING: from collections.abc import Generator from typing import Any, Literal from _pytest.compat import LEGACY_PATH from zarr.abc.codec import Codec from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike, ShardsLike from zarr.core.chunk_key_encodings import ( ChunkKeyEncoding, ChunkKeyEncodingLike, V2ChunkKeyEncoding, ) from zarr.core.dtype.wrapper import ZDType async def parse_store( store: Literal["local", "memory", "fsspec", "zip"], path: str ) -> LocalStore | MemoryStore | FsspecStore | ZipStore: if store == "local": return await LocalStore.open(path) if store == "memory": return await MemoryStore.open() if store == "fsspec": return await FsspecStore.open(url=path) if store == "zip": return await ZipStore.open(path + "/zarr.zip", mode="w") raise AssertionError @pytest.fixture(params=[str, pathlib.Path]) def path_type(request: pytest.FixtureRequest) -> Any: return request.param # todo: harmonize this with local_store fixture @pytest.fixture async def store_path(tmpdir: LEGACY_PATH) -> StorePath: store = await LocalStore.open(str(tmpdir)) return StorePath(store) @pytest.fixture async def local_store(tmpdir: LEGACY_PATH) -> LocalStore: return await LocalStore.open(str(tmpdir)) @pytest.fixture async def remote_store(url: str) -> FsspecStore: return await FsspecStore.open(url) @pytest.fixture async def memory_store() -> MemoryStore: return await MemoryStore.open() @pytest.fixture async def zip_store(tmpdir: LEGACY_PATH) -> ZipStore: return await ZipStore.open(str(tmpdir / "zarr.zip"), mode="w") @pytest.fixture async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: param = request.param return await parse_store(param, str(tmpdir)) @pytest.fixture async def store2(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: """Fixture to create a second store for testing copy operations between stores""" param = request.param store2_path = tmpdir.mkdir("store2") return await parse_store(param, str(store2_path)) @pytest.fixture(params=["local", "memory", "zip"]) def sync_store(request: pytest.FixtureRequest, tmp_path: LEGACY_PATH) -> Store: result = sync(parse_store(request.param, str(tmp_path))) if not isinstance(result, Store): raise TypeError("Wrong store class returned by test fixture! got " + result + " instead") return result @dataclass class AsyncGroupRequest: zarr_format: ZarrFormat store: Literal["local", "fsspec", "memory", "zip"] attributes: dict[str, Any] = field(default_factory=dict) @pytest.fixture async def async_group(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> AsyncGroup: param: AsyncGroupRequest = request.param store = await parse_store(param.store, str(tmpdir)) return await AsyncGroup.from_store( store, attributes=param.attributes, zarr_format=param.zarr_format, overwrite=False, ) @pytest.fixture(params=["numpy", "cupy"]) def xp(request: pytest.FixtureRequest) -> Any: """Fixture to parametrize over numpy-like libraries""" if request.param == "cupy": request.node.add_marker(pytest.mark.gpu) return pytest.importorskip(request.param) @pytest.fixture(autouse=True) def reset_config() -> Generator[None, None, None]: config.reset() yield config.reset() @dataclass class ArrayRequest: shape: tuple[int, ...] dtype: str order: MemoryOrder @pytest.fixture def array_fixture(request: pytest.FixtureRequest) -> npt.NDArray[Any]: array_request: ArrayRequest = request.param return ( np.arange(np.prod(array_request.shape)) .reshape(array_request.shape, order=array_request.order) .astype(array_request.dtype) ) @pytest.fixture(params=(2, 3), ids=["zarr2", "zarr3"]) def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: if request.param == 2: return 2 elif request.param == 3: return 3 msg = f"Invalid zarr format requested. Got {request.param}, expected on of (2,3)." raise ValueError(msg) def _clear_registries() -> None: registries = zarr.registry._collect_entrypoints() for registry in registries: registry.lazy_load_list.clear() @pytest.fixture def set_path() -> Generator[None, None, None]: tests_dir = str(pathlib.Path(__file__).parent.absolute()) sys.path.append(tests_dir) _clear_registries() zarr.registry._collect_entrypoints() yield sys.path.remove(tests_dir) _clear_registries() zarr.registry._collect_entrypoints() config.reset() def pytest_addoption(parser: Any) -> None: parser.addoption( "--run-slow-hypothesis", action="store_true", default=False, help="run slow hypothesis tests", ) def pytest_collection_modifyitems(config: Any, items: Any) -> None: if config.getoption("--run-slow-hypothesis"): return skip_slow_hyp = pytest.mark.skip(reason="need --run-slow-hypothesis option to run") for item in items: if "slow_hypothesis" in item.keywords: item.add_marker(skip_slow_hyp) settings.register_profile( "default", parent=settings.get_profile("default"), max_examples=300, suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], deadline=None, verbosity=Verbosity.verbose, ) settings.register_profile( "ci", parent=settings.get_profile("ci"), max_examples=300, derandomize=True, # more like regression testing deadline=None, suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], ) settings.register_profile( "nightly", max_examples=500, parent=settings.get_profile("ci"), derandomize=False, stateful_step_count=100, ) settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default")) # TODO: uncomment these overrides when we can get mypy to accept them """ @overload def create_array_metadata( *, shape: ShapeLike, dtype: npt.DTypeLike, chunks: tuple[int, ...] | Literal["auto"], shards: None, filters: FiltersLike, compressors: CompressorsLike, serializer: SerializerLike, fill_value: Any | None, order: MemoryOrder | None, zarr_format: Literal[2], attributes: dict[str, JSON] | None, chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None, dimension_names: None, ) -> ArrayV2Metadata: ... @overload def create_array_metadata( *, shape: ShapeLike, dtype: npt.DTypeLike, chunks: tuple[int, ...] | Literal["auto"], shards: ShardsLike | None, filters: FiltersLike, compressors: CompressorsLike, serializer: SerializerLike, fill_value: Any | None, order: None, zarr_format: Literal[3], attributes: dict[str, JSON] | None, chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None, dimension_names: Iterable[str] | None, ) -> ArrayV3Metadata: ... """ def create_array_metadata( *, shape: ShapeLike, dtype: npt.DTypeLike, chunks: tuple[int, ...] | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", fill_value: Any = 0, order: MemoryOrder | None = None, zarr_format: ZarrFormat, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, ) -> ArrayV2Metadata | ArrayV3Metadata: """ Create array metadata """ dtype_parsed = get_data_type_from_native_dtype(dtype) shape_parsed = parse_shapelike(shape) chunk_key_encoding_parsed = _parse_chunk_key_encoding( chunk_key_encoding, zarr_format=zarr_format ) item_size = 1 if isinstance(dtype_parsed, HasItemSize): item_size = dtype_parsed.item_size shard_shape_parsed, chunk_shape_parsed = _auto_partition( array_shape=shape_parsed, shard_shape=shards, chunk_shape=chunks, item_size=item_size, ) if order is None: order_parsed = zarr_config.get("array.order") else: order_parsed = order chunks_out: tuple[int, ...] if zarr_format == 2: filters_parsed, compressor_parsed = _parse_chunk_encoding_v2( compressor=compressors, filters=filters, dtype=dtype_parsed ) chunk_key_encoding_parsed = cast("V2ChunkKeyEncoding", chunk_key_encoding_parsed) return ArrayV2Metadata( shape=shape_parsed, dtype=dtype_parsed, chunks=chunk_shape_parsed, order=order_parsed, dimension_separator=chunk_key_encoding_parsed.separator, fill_value=fill_value, compressor=compressor_parsed, filters=filters_parsed, attributes=attributes, ) elif zarr_format == 3: array_array, array_bytes, bytes_bytes = _parse_chunk_encoding_v3( compressors=compressors, filters=filters, serializer=serializer, dtype=dtype_parsed, ) sub_codecs: tuple[Codec, ...] = (*array_array, array_bytes, *bytes_bytes) codecs_out: tuple[Codec, ...] if shard_shape_parsed is not None: index_location = None if isinstance(shards, dict): index_location = ShardingCodecIndexLocation(shards.get("index_location", None)) if index_location is None: index_location = ShardingCodecIndexLocation.end sharding_codec = ShardingCodec( chunk_shape=chunk_shape_parsed, codecs=sub_codecs, index_location=index_location, ) sharding_codec.validate( shape=chunk_shape_parsed, dtype=dtype_parsed, chunk_grid=RegularChunkGrid(chunk_shape=shard_shape_parsed), ) codecs_out = (sharding_codec,) chunks_out = shard_shape_parsed else: chunks_out = chunk_shape_parsed codecs_out = sub_codecs return ArrayV3Metadata( shape=shape_parsed, data_type=dtype_parsed, chunk_grid=RegularChunkGrid(chunk_shape=chunks_out), chunk_key_encoding=chunk_key_encoding_parsed, fill_value=fill_value, codecs=codecs_out, attributes=attributes, dimension_names=dimension_names, ) raise ValueError(f"Invalid Zarr format: {zarr_format}") # TODO: uncomment these overrides when we can get mypy to accept them """ @overload def meta_from_array( array: np.ndarray[Any, Any], chunks: tuple[int, ...] | Literal["auto"], shards: None, filters: FiltersLike, compressors: CompressorsLike, serializer: SerializerLike, fill_value: Any | None, order: MemoryOrder | None, zarr_format: Literal[2], attributes: dict[str, JSON] | None, chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None, dimension_names: Iterable[str] | None, ) -> ArrayV2Metadata: ... @overload def meta_from_array( array: np.ndarray[Any, Any], chunks: tuple[int, ...] | Literal["auto"], shards: ShardsLike | None, filters: FiltersLike, compressors: CompressorsLike, serializer: SerializerLike, fill_value: Any | None, order: None, zarr_format: Literal[3], attributes: dict[str, JSON] | None, chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None, dimension_names: Iterable[str] | None, ) -> ArrayV3Metadata: ... """ def meta_from_array( array: np.ndarray[Any, Any], *, chunks: tuple[int, ...] | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", fill_value: Any = 0, order: MemoryOrder | None = None, zarr_format: ZarrFormat = 3, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, dimension_names: DimensionNames = None, ) -> ArrayV3Metadata | ArrayV2Metadata: """ Create array metadata from an array """ return create_array_metadata( shape=array.shape, dtype=array.dtype, chunks=chunks, shards=shards, filters=filters, compressors=compressors, serializer=serializer, fill_value=fill_value, order=order, zarr_format=zarr_format, attributes=attributes, chunk_key_encoding=chunk_key_encoding, dimension_names=dimension_names, ) def skip_object_dtype(dtype: ZDType[Any, Any]) -> None: if dtype.dtype_cls is type(np.dtype("O")): msg = ( f"{dtype} uses the numpy object data type, which is not a valid target for data " "type resolution" ) pytest.skip(msg) def nan_equal(a: object, b: object) -> bool: """ Convenience function for equality comparison between two values ``a`` and ``b``, that might both be NaN. Returns True if both ``a`` and ``b`` are NaN, otherwise returns a == b """ if math.isnan(a) and math.isnan(b): # type: ignore[arg-type] return True return a == b def deep_nan_equal(a: object, b: object) -> bool: if isinstance(a, Mapping) and isinstance(b, Mapping): return all(deep_nan_equal(a[k], b[k]) for k in a) if isinstance(a, Sequence) and isinstance(b, Sequence): return all(deep_nan_equal(a[i], b[i]) for i in range(len(a))) return nan_equal(a, b) zarr-python-3.1.5/tests/package_with_entrypoint-0.1.dist-info/000077500000000000000000000000001511007055700242705ustar00rootroot00000000000000zarr-python-3.1.5/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt000066400000000000000000000012371511007055700275710ustar00rootroot00000000000000[zarr.codecs] test = package_with_entrypoint:TestEntrypointCodec [zarr.codecs.test] another_codec = package_with_entrypoint:TestEntrypointGroup.Codec [zarr] codec_pipeline = package_with_entrypoint:TestEntrypointCodecPipeline ndbuffer = package_with_entrypoint:TestEntrypointNDBuffer buffer = package_with_entrypoint:TestEntrypointBuffer [zarr.buffer] another_buffer = package_with_entrypoint:TestEntrypointGroup.Buffer [zarr.ndbuffer] another_ndbuffer = package_with_entrypoint:TestEntrypointGroup.NDBuffer [zarr.codec_pipeline] another_pipeline = package_with_entrypoint:TestEntrypointGroup.Pipeline [zarr.data_type] new_data_type = package_with_entrypoint:TestDataTypezarr-python-3.1.5/tests/package_with_entrypoint/000077500000000000000000000000001511007055700221015ustar00rootroot00000000000000zarr-python-3.1.5/tests/package_with_entrypoint/__init__.py000066400000000000000000000054371511007055700242230ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING import numpy as np import numpy.typing as npt import zarr.core.buffer from zarr.abc.codec import ArrayBytesCodec, CodecInput, CodecPipeline from zarr.codecs import BytesCodec from zarr.core.buffer import Buffer, NDBuffer from zarr.core.dtype.common import DataTypeValidationError, DTypeJSON, DTypeSpec_V2 from zarr.core.dtype.npy.bool import Bool if TYPE_CHECKING: from collections.abc import Iterable from typing import Any, ClassVar, Literal, Self from zarr.core.array_spec import ArraySpec from zarr.core.common import ZarrFormat class TestEntrypointCodec(ArrayBytesCodec): is_fixed_size = True async def encode( self, chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]], ) -> Iterable[Buffer | None]: return [None] async def decode( self, chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]], ) -> npt.NDArray[Any]: return np.array(1) def compute_encoded_size(self, input_byte_length: int, chunk_spec: ArraySpec) -> int: return input_byte_length class TestEntrypointCodecPipeline(CodecPipeline): def __init__(self, batch_size: int = 1) -> None: pass async def encode( self, chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]] ) -> Iterable[Buffer | None]: return [None] async def decode( self, chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]] ) -> Iterable[NDBuffer | None]: return np.array(1) class TestEntrypointBuffer(Buffer): pass class TestEntrypointNDBuffer(NDBuffer): pass class TestEntrypointGroup: class Codec(BytesCodec): pass class Buffer(zarr.core.buffer.Buffer): pass class NDBuffer(zarr.core.buffer.NDBuffer): pass class Pipeline(CodecPipeline): pass class TestDataType(Bool): """ This is a "data type" that serializes to "test" """ _zarr_v3_name: ClassVar[Literal["test"]] = "test" # type: ignore[assignment] @classmethod def from_json(cls, data: DTypeJSON, *, zarr_format: Literal[2, 3]) -> Self: if zarr_format == 2 and data == {"name": cls._zarr_v3_name, "object_codec_id": None}: return cls() if zarr_format == 3 and data == cls._zarr_v3_name: return cls() raise DataTypeValidationError( f"Invalid JSON representation of {cls.__name__}. Got {data!r}" ) def to_json(self, zarr_format: ZarrFormat) -> str | DTypeSpec_V2: # type: ignore[override] if zarr_format == 2: return {"name": self._zarr_v3_name, "object_codec_id": None} if zarr_format == 3: return self._zarr_v3_name raise ValueError("zarr_format must be 2 or 3") zarr-python-3.1.5/tests/test_abc/000077500000000000000000000000001511007055700167445ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_abc/__init__.py000066400000000000000000000000001511007055700210430ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_abc/test_codec.py000066400000000000000000000005221511007055700214310ustar00rootroot00000000000000from __future__ import annotations from zarr.abc.codec import _check_codecjson_v2 def test_check_codecjson_v2_valid() -> None: """ Test that the _check_codecjson_v2 function works """ assert _check_codecjson_v2({"id": "gzip"}) assert not _check_codecjson_v2({"id": 10}) assert not _check_codecjson_v2([10, 11]) zarr-python-3.1.5/tests/test_api.py000066400000000000000000001513461511007055700173540ustar00rootroot00000000000000from __future__ import annotations import inspect import re from typing import TYPE_CHECKING, Any import zarr.codecs import zarr.storage from zarr.core.array import AsyncArray, init_array from zarr.storage import LocalStore, ZipStore from zarr.storage._common import StorePath if TYPE_CHECKING: from collections.abc import Callable from pathlib import Path from zarr.abc.store import Store from zarr.core.common import JSON, MemoryOrder, ZarrFormat from zarr.types import AnyArray import contextlib from typing import Literal import numpy as np import pytest from numpy.testing import assert_array_equal import zarr import zarr.api.asynchronous import zarr.api.synchronous import zarr.core.group from zarr import Array, Group from zarr.api.synchronous import ( create, create_array, create_group, from_array, group, load, open_group, save, save_array, save_group, ) from zarr.core.buffer import NDArrayLike from zarr.errors import ( ArrayNotFoundError, MetadataValidationError, ZarrDeprecationWarning, ZarrUserWarning, ) from zarr.storage import MemoryStore from zarr.storage._utils import normalize_path from zarr.testing.utils import gpu_test def test_create(memory_store: Store) -> None: store = memory_store # create array z = create(shape=100, store=store) assert isinstance(z, Array) assert z.shape == (100,) # create array, overwrite, specify chunk shape z = create(shape=200, chunk_shape=20, store=store, overwrite=True) assert isinstance(z, Array) assert z.shape == (200,) assert z.chunks == (20,) # create array, overwrite, specify chunk shape via chunks param z = create(shape=400, chunks=40, store=store, overwrite=True) assert isinstance(z, Array) assert z.shape == (400,) assert z.chunks == (40,) # create array with float shape with pytest.raises(TypeError): z = create(shape=(400.5, 100), store=store, overwrite=True) # type: ignore[arg-type] # create array with float chunk shape with pytest.raises(TypeError): z = create(shape=(400, 100), chunks=(16, 16.5), store=store, overwrite=True) # type: ignore[arg-type] @pytest.mark.parametrize( "func", [ zarr.api.asynchronous.zeros_like, zarr.api.asynchronous.ones_like, zarr.api.asynchronous.empty_like, zarr.api.asynchronous.full_like, zarr.api.asynchronous.open_like, ], ) @pytest.mark.parametrize("out_shape", ["keep", (10, 10)]) @pytest.mark.parametrize("out_chunks", ["keep", (10, 10)]) @pytest.mark.parametrize("out_dtype", ["keep", "int8"]) @pytest.mark.parametrize("out_fill", ["keep", 4]) async def test_array_like_creation( zarr_format: ZarrFormat, func: Callable[[Any], Any], out_shape: Literal["keep"] | tuple[int, ...], out_chunks: Literal["keep"] | tuple[int, ...], out_dtype: str, out_fill: Literal["keep"] | int, ) -> None: """ Test zeros_like, ones_like, empty_like, full_like, ensuring that we can override the shape, chunks, dtype and fill_value of the array-like object provided to these functions with appropriate keyword arguments """ ref_fill = 100 ref_arr = zarr.create_array( store={}, shape=(11, 12), dtype="uint8", chunks=(11, 12), zarr_format=zarr_format, fill_value=ref_fill, ) kwargs: dict[str, object] = {} if func is zarr.api.asynchronous.full_like: if out_fill == "keep": expect_fill = ref_fill else: expect_fill = out_fill kwargs["fill_value"] = expect_fill elif func is zarr.api.asynchronous.zeros_like: expect_fill = 0 elif func is zarr.api.asynchronous.ones_like: expect_fill = 1 elif func is zarr.api.asynchronous.empty_like: if out_fill == "keep": expect_fill = ref_fill else: kwargs["fill_value"] = out_fill expect_fill = out_fill elif func is zarr.api.asynchronous.open_like: # type: ignore[comparison-overlap] if out_fill == "keep": expect_fill = ref_fill else: kwargs["fill_value"] = out_fill expect_fill = out_fill kwargs["mode"] = "w" else: raise AssertionError if out_shape != "keep": kwargs["shape"] = out_shape expect_shape = out_shape else: expect_shape = ref_arr.shape if out_chunks != "keep": kwargs["chunks"] = out_chunks expect_chunks = out_chunks else: expect_chunks = ref_arr.chunks if out_dtype != "keep": kwargs["dtype"] = out_dtype expect_dtype = out_dtype else: expect_dtype = ref_arr.dtype # type: ignore[assignment] new_arr = await func(ref_arr, path="foo", zarr_format=zarr_format, **kwargs) # type: ignore[call-arg] assert new_arr.shape == expect_shape assert new_arr.chunks == expect_chunks assert new_arr.dtype == expect_dtype assert np.all(Array(new_arr)[:] == expect_fill) # TODO: parametrize over everything this function takes @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_create_array(store: Store, zarr_format: ZarrFormat) -> None: attrs: dict[str, JSON] = {"foo": 100} # explicit type annotation to avoid mypy error shape = (10, 10) path = "foo" data_val = 1 array_w = create_array( store, name=path, shape=shape, attributes=attrs, chunks=shape, dtype="uint8", zarr_format=zarr_format, ) array_w[:] = data_val assert array_w.shape == shape assert array_w.attrs == attrs assert np.array_equal(array_w[:], np.zeros(shape, dtype=array_w.dtype) + data_val) @pytest.mark.parametrize("write_empty_chunks", [True, False]) def test_write_empty_chunks_warns(write_empty_chunks: bool, zarr_format: ZarrFormat) -> None: """ Test that using the `write_empty_chunks` kwarg on array access will raise a warning. """ match = "The `write_empty_chunks` keyword argument .*" with pytest.warns(RuntimeWarning, match=match): _ = zarr.array( data=np.arange(10), shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks, zarr_format=zarr_format, ) with pytest.warns(RuntimeWarning, match=match): _ = zarr.create( shape=(10,), dtype="uint8", write_empty_chunks=write_empty_chunks, zarr_format=zarr_format, ) @pytest.mark.parametrize("zarr_format", [2, 3]) def test_open_array_respects_write_empty_chunks_config(zarr_format: ZarrFormat) -> None: """Test that zarr.open() respects write_empty_chunks config.""" store = MemoryStore() _ = zarr.create( store=store, path="test_array", shape=(10,), chunks=(5,), dtype="f8", fill_value=0.0, zarr_format=zarr_format, ) arr2 = zarr.open(store=store, path="test_array", config={"write_empty_chunks": True}) assert isinstance(arr2, zarr.Array) assert arr2.async_array._config.write_empty_chunks is True arr2[0:5] = np.zeros(5) assert arr2.nchunks_initialized == 1 @pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"]) @pytest.mark.parametrize("node_type", ["array", "group"]) def test_open_normalized_path( memory_store: MemoryStore, path: str, node_type: Literal["array", "group"] ) -> None: node: Group | AnyArray if node_type == "group": node = group(store=memory_store, path=path) elif node_type == "array": node = create(store=memory_store, path=path, shape=(2,)) assert node.path == normalize_path(path) async def test_open_array(memory_store: MemoryStore, zarr_format: ZarrFormat) -> None: store = memory_store # open array, create if doesn't exist z = zarr.api.synchronous.open(store=store, shape=100, zarr_format=zarr_format) assert isinstance(z, Array) assert z.shape == (100,) # open array, overwrite # store._store_dict = {} store = MemoryStore() z = zarr.api.synchronous.open(store=store, shape=200, zarr_format=zarr_format) assert isinstance(z, Array) assert z.shape == (200,) # open array, read-only store_cls = type(store) ro_store = await store_cls.open(store_dict=store._store_dict, read_only=True) z = zarr.api.synchronous.open(store=ro_store, mode="r") assert isinstance(z, Array) assert z.shape == (200,) assert z.read_only # path not found with pytest.raises(FileNotFoundError): zarr.api.synchronous.open(store="doesnotexist", mode="r", zarr_format=zarr_format) @pytest.mark.asyncio async def test_async_array_open_array_not_found() -> None: """Test that AsyncArray.open raises ArrayNotFoundError when array doesn't exist""" store = MemoryStore() # Try to open an array that does not exist with pytest.raises(ArrayNotFoundError): await AsyncArray.open(store, zarr_format=2) def test_array_open_array_not_found_sync() -> None: """Test that Array.open raises ArrayNotFoundError when array doesn't exist""" store = MemoryStore() # Try to open an array that does not exist with pytest.raises(ArrayNotFoundError): Array.open(store) @pytest.mark.parametrize("store", ["memory", "local", "zip"], indirect=True) def test_v2_and_v3_exist_at_same_path(store: Store) -> None: zarr.create_array(store, shape=(10,), dtype="uint8", zarr_format=3) zarr.create_array(store, shape=(10,), dtype="uint8", zarr_format=2) msg = f"Both zarr.json (Zarr format 3) and .zarray (Zarr format 2) metadata objects exist at {store}. Zarr v3 will be used." with pytest.warns(ZarrUserWarning, match=re.escape(msg)): zarr.open(store=store) @pytest.mark.parametrize("store", ["memory"], indirect=True) async def test_create_group(store: Store, zarr_format: ZarrFormat) -> None: attrs = {"foo": 100} path = "node" node = create_group(store, path=path, attributes=attrs, zarr_format=zarr_format) assert isinstance(node, Group) assert node.attrs == attrs assert node.metadata.zarr_format == zarr_format async def test_open_group(memory_store: MemoryStore) -> None: store = memory_store # open group, create if doesn't exist g = open_group(store=store) g.create_group("foo") assert isinstance(g, Group) assert "foo" in g # open group, overwrite g = open_group(store=store, mode="w") assert isinstance(g, Group) assert "foo" not in g # open group, read-only store_cls = type(store) ro_store = await store_cls.open(store_dict=store._store_dict, read_only=True) g = open_group(store=ro_store, mode="r") assert isinstance(g, Group) assert g.read_only @pytest.mark.parametrize("zarr_format", [None, 2, 3]) async def test_open_group_unspecified_version(tmpdir: Path, zarr_format: ZarrFormat) -> None: """Regression test for https://github.com/zarr-developers/zarr-python/issues/2175""" # create a group with specified zarr format (could be 2, 3, or None) _ = await zarr.api.asynchronous.open_group( store=str(tmpdir), mode="w", zarr_format=zarr_format, attributes={"foo": "bar"} ) # now open that group without specifying the format g2 = await zarr.api.asynchronous.open_group(store=str(tmpdir), mode="r") assert g2.attrs == {"foo": "bar"} if zarr_format is not None: assert g2.metadata.zarr_format == zarr_format @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("n_args", [10, 1, 0]) @pytest.mark.parametrize("n_kwargs", [10, 1, 0]) @pytest.mark.parametrize("path", [None, "some_path"]) def test_save(store: Store, n_args: int, n_kwargs: int, path: None | str) -> None: data = np.arange(10) args = [np.arange(10) for _ in range(n_args)] kwargs = {f"arg_{i}": data for i in range(n_kwargs)} if n_kwargs == 0 and n_args == 0: with pytest.raises(ValueError): save(store, path=path) elif n_args == 1 and n_kwargs == 0: save(store, *args, path=path) array = zarr.api.synchronous.open(store, path=path) assert isinstance(array, Array) assert_array_equal(array[:], data) else: save(store, *args, path=path, **kwargs) # type: ignore[arg-type] group = zarr.api.synchronous.open(store, path=path) assert isinstance(group, Group) for array in group.array_values(): assert_array_equal(array[:], data) for k in kwargs: assert k in group assert group.nmembers() == n_args + n_kwargs def test_save_errors() -> None: with pytest.raises(ValueError): # no arrays provided save_group("data/group.zarr") with pytest.raises(TypeError): # no array provided save_array("data/group.zarr") # type: ignore[call-arg] with pytest.raises(ValueError): # no arrays provided save("data/group.zarr") a = np.arange(10) with pytest.raises(TypeError): # mode is no valid argument and would get handled as an array zarr.save("data/example.zarr", a, mode="w") def test_open_with_mode_r(tmp_path: Path) -> None: # 'r' means read only (must exist) with pytest.raises(FileNotFoundError): zarr.open(store=tmp_path, mode="r") z1 = zarr.ones(store=tmp_path, shape=(3, 3)) assert z1.fill_value == 1 z2 = zarr.open(store=tmp_path, mode="r") assert isinstance(z2, Array) assert z2.fill_value == 1 result = z2[:] assert isinstance(result, NDArrayLike) assert (result == 1).all() with pytest.raises(ValueError): z2[:] = 3 def test_open_with_mode_r_plus(tmp_path: Path) -> None: # 'r+' means read/write (must exist) new_store_path = tmp_path / "new_store.zarr" assert not new_store_path.exists(), "Test should operate on non-existent directory" with pytest.raises(FileNotFoundError): zarr.open(store=new_store_path, mode="r+") assert not new_store_path.exists(), "mode='r+' should not create directory" zarr.ones(store=tmp_path, shape=(3, 3)) z2 = zarr.open(store=tmp_path, mode="r+") assert isinstance(z2, Array) result = z2[:] assert isinstance(result, NDArrayLike) assert (result == 1).all() z2[:] = 3 async def test_open_with_mode_a(tmp_path: Path) -> None: # Open without shape argument should default to group g = zarr.open(store=tmp_path, mode="a") assert isinstance(g, Group) await g.store_path.delete() # 'a' means read/write (create if doesn't exist) arr = zarr.open(store=tmp_path, mode="a", shape=(3, 3)) assert isinstance(arr, Array) arr[...] = 1 z2 = zarr.open(store=tmp_path, mode="a") assert isinstance(z2, Array) result = z2[:] assert isinstance(result, NDArrayLike) assert (result == 1).all() z2[:] = 3 def test_open_with_mode_w(tmp_path: Path) -> None: # 'w' means create (overwrite if exists); arr = zarr.open(store=tmp_path, mode="w", shape=(3, 3)) assert isinstance(arr, Array) arr[...] = 3 z2 = zarr.open(store=tmp_path, mode="w", shape=(3, 3)) assert isinstance(z2, Array) result = z2[:] assert isinstance(result, NDArrayLike) assert not (result == 3).all() z2[:] = 3 def test_open_with_mode_w_minus(tmp_path: Path) -> None: # 'w-' means create (fail if exists) arr = zarr.open(store=tmp_path, mode="w-", shape=(3, 3)) assert isinstance(arr, Array) arr[...] = 1 with pytest.raises(FileExistsError): zarr.open(store=tmp_path, mode="w-") @pytest.mark.parametrize("order", ["C", "F", None]) @pytest.mark.parametrize("config", [{"order": "C"}, {"order": "F"}, {}], ids=["C", "F", "None"]) def test_array_order( order: MemoryOrder | None, config: dict[str, MemoryOrder | None], zarr_format: ZarrFormat ) -> None: """ Check that: - For v2, memory order is taken from the `order` keyword argument. - For v3, memory order is taken from `config`, and when order is passed a warning is raised - The numpy array returned has the expected order - For v2, the order metadata is set correctly """ default_order = zarr.config.get("array.order") ctx: contextlib.AbstractContextManager # type: ignore[type-arg] if zarr_format == 3: if order is None: ctx = contextlib.nullcontext() else: ctx = pytest.warns( RuntimeWarning, match="The `order` keyword argument has no effect for Zarr format 3 arrays", ) expected_order = config.get("order", default_order) if zarr_format == 2: ctx = contextlib.nullcontext() expected_order = order or config.get("order", default_order) with ctx: arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format, config=config) assert arr.order == expected_order vals = np.asarray(arr) if expected_order == "C": assert vals.flags.c_contiguous elif expected_order == "F": assert vals.flags.f_contiguous else: raise AssertionError if zarr_format == 2: assert arr.metadata.zarr_format == 2 assert arr.metadata.order == expected_order async def test_init_order_warns() -> None: with pytest.warns( RuntimeWarning, match="The `order` keyword argument has no effect for Zarr format 3 arrays" ): await init_array( store_path=StorePath(store=MemoryStore()), shape=(1,), dtype="uint8", zarr_format=3, order="F", ) # def test_lazy_loader(): # foo = np.arange(100) # bar = np.arange(100, 0, -1) # store = "data/group.zarr" # save(store, foo=foo, bar=bar) # loader = load(store) # assert "foo" in loader # assert "bar" in loader # assert "baz" not in loader # assert len(loader) == 2 # assert sorted(loader) == ["bar", "foo"] # assert_array_equal(foo, loader["foo"]) # assert_array_equal(bar, loader["bar"]) # assert "LazyLoader: " in repr(loader) def test_load_array(sync_store: Store) -> None: store = sync_store foo = np.arange(100) bar = np.arange(100, 0, -1) save(store, foo=foo, bar=bar) # can also load arrays directly into a numpy array for array_name in ["foo", "bar"]: array = load(store, path=array_name) assert isinstance(array, np.ndarray) if array_name == "foo": assert_array_equal(foo, array) else: assert_array_equal(bar, array) @pytest.mark.parametrize("path", ["data", None]) @pytest.mark.parametrize("load_read_only", [True, False, None]) def test_load_zip(tmp_path: Path, path: str | None, load_read_only: bool | None) -> None: file = tmp_path / "test.zip" data = np.arange(100).reshape(10, 10) with ZipStore(file, mode="w", read_only=False) as zs: save(zs, data, path=path) with ZipStore(file, mode="r", read_only=load_read_only) as zs: result = zarr.load(store=zs, path=path) assert isinstance(result, np.ndarray) assert np.array_equal(result, data) with ZipStore(file, read_only=load_read_only) as zs: result = zarr.load(store=zs, path=path) assert isinstance(result, np.ndarray) assert np.array_equal(result, data) @pytest.mark.parametrize("path", ["data", None]) @pytest.mark.parametrize("load_read_only", [True, False]) def test_load_local(tmp_path: Path, path: str | None, load_read_only: bool) -> None: file = tmp_path / "test.zip" data = np.arange(100).reshape(10, 10) with LocalStore(file, read_only=False) as zs: save(zs, data, path=path) with LocalStore(file, read_only=load_read_only) as zs: result = zarr.load(store=zs, path=path) assert isinstance(result, np.ndarray) assert np.array_equal(result, data) def test_tree() -> None: pytest.importorskip("rich") g1 = zarr.group() g1.create_group("foo") g3 = g1.create_group("bar") g3.create_group("baz") g5 = g3.create_group("qux") g5.create_array("baz", shape=(100,), chunks=(10,), dtype="float64") with pytest.warns(ZarrDeprecationWarning, match=r"Group\.tree instead\."): # noqa: PT031 assert repr(zarr.tree(g1)) == repr(g1.tree()) assert str(zarr.tree(g1)) == str(g1.tree()) # @pytest.mark.parametrize("stores_from_path", [False, True]) # @pytest.mark.parametrize( # "with_chunk_store,listable", # [(False, True), (True, True), (False, False)], # ids=["default-listable", "with_chunk_store-listable", "default-unlistable"], # ) # def test_consolidate_metadata(with_chunk_store, listable, monkeypatch, stores_from_path): # # setup initial data # if stores_from_path: # store = tempfile.mkdtemp() # atexit.register(atexit_rmtree, store) # if with_chunk_store: # chunk_store = tempfile.mkdtemp() # atexit.register(atexit_rmtree, chunk_store) # else: # chunk_store = None # else: # store = MemoryStore() # chunk_store = MemoryStore() if with_chunk_store else None # path = None # z = group(store, chunk_store=chunk_store, path=path) # # Reload the actual store implementation in case str # store_to_copy = z.store # z.create_group("g1") # g2 = z.create_group("g2") # g2.attrs["hello"] = "world" # arr = g2.create_array("arr", shape=(20, 20), chunks=(5, 5), dtype="f8") # assert 16 == arr.nchunks # assert 0 == arr.nchunks_initialized # arr.attrs["data"] = 1 # arr[:] = 1.0 # assert 16 == arr.nchunks_initialized # if stores_from_path: # # get the actual store class for use with consolidate_metadata # store_class = z._store # else: # store_class = store # # perform consolidation # out = consolidate_metadata(store_class, path=path) # assert isinstance(out, Group) # assert ["g1", "g2"] == list(out) # if not stores_from_path: # assert isinstance(out._store, ConsolidatedMetadataStore) # assert ".zmetadata" in store # meta_keys = [ # ".zgroup", # "g1/.zgroup", # "g2/.zgroup", # "g2/.zattrs", # "g2/arr/.zarray", # "g2/arr/.zattrs", # ] # for key in meta_keys: # del store[key] # # https://github.com/zarr-developers/zarr-python/issues/993 # # Make sure we can still open consolidated on an unlistable store: # if not listable: # fs_memory = pytest.importorskip("fsspec.implementations.memory") # monkeypatch.setattr(fs_memory.MemoryFileSystem, "isdir", lambda x, y: False) # monkeypatch.delattr(fs_memory.MemoryFileSystem, "ls") # fs = fs_memory.MemoryFileSystem() # store_to_open = FSStore("", fs=fs) # # copy original store to new unlistable store # store_to_open.update(store_to_copy) # else: # store_to_open = store # # open consolidated # z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path) # assert ["g1", "g2"] == list(z2) # assert "world" == z2.g2.attrs["hello"] # assert 1 == z2.g2.arr.attrs["data"] # assert (z2.g2.arr[:] == 1.0).all() # assert 16 == z2.g2.arr.nchunks # if listable: # assert 16 == z2.g2.arr.nchunks_initialized # else: # with pytest.raises(NotImplementedError): # _ = z2.g2.arr.nchunks_initialized # if stores_from_path: # # path string is note a BaseStore subclass so cannot be used to # # initialize a ConsolidatedMetadataStore. # with pytest.raises(ValueError): # cmd = ConsolidatedMetadataStore(store) # else: # # tests del/write on the store # cmd = ConsolidatedMetadataStore(store) # with pytest.raises(PermissionError): # del cmd[".zgroup"] # with pytest.raises(PermissionError): # cmd[".zgroup"] = None # # test getsize on the store # assert isinstance(getsize(cmd), Integral) # # test new metadata are not writeable # with pytest.raises(PermissionError): # z2.create_group("g3") # with pytest.raises(PermissionError): # z2.create_dataset("spam", shape=42, chunks=7, dtype="i4") # with pytest.raises(PermissionError): # del z2["g2"] # # test consolidated metadata are not writeable # with pytest.raises(PermissionError): # z2.g2.attrs["hello"] = "universe" # with pytest.raises(PermissionError): # z2.g2.arr.attrs["foo"] = "bar" # # test the data are writeable # z2.g2.arr[:] = 2 # assert (z2.g2.arr[:] == 2).all() # # test invalid modes # with pytest.raises(ValueError): # open_consolidated(store, chunk_store=chunk_store, mode="a", path=path) # with pytest.raises(ValueError): # open_consolidated(store, chunk_store=chunk_store, mode="w", path=path) # with pytest.raises(ValueError): # open_consolidated(store, chunk_store=chunk_store, mode="w-", path=path) # # make sure keyword arguments are passed through without error # open_consolidated( # store, # chunk_store=chunk_store, # path=path, # cache_attrs=True, # synchronizer=None, # ) # @pytest.mark.parametrize( # "options", # ( # {"dimension_separator": "/"}, # {"dimension_separator": "."}, # {"dimension_separator": None}, # ), # ) # def test_save_array_separator(tmpdir, options): # data = np.arange(6).reshape((3, 2)) # url = tmpdir.join("test.zarr") # save_array(url, data, **options) # class TestCopyStore(unittest.TestCase): # _version = 2 # def setUp(self): # source = dict() # source["foo"] = b"xxx" # source["bar/baz"] = b"yyy" # source["bar/qux"] = b"zzz" # self.source = source # def _get_dest_store(self): # return dict() # def test_no_paths(self): # source = self.source # dest = self._get_dest_store() # copy_store(source, dest) # assert len(source) == len(dest) # for key in source: # assert source[key] == dest[key] # def test_source_path(self): # source = self.source # # paths should be normalized # for source_path in "bar", "bar/", "/bar", "/bar/": # dest = self._get_dest_store() # copy_store(source, dest, source_path=source_path) # assert 2 == len(dest) # for key in source: # if key.startswith("bar/"): # dest_key = key.split("bar/")[1] # assert source[key] == dest[dest_key] # else: # assert key not in dest # def test_dest_path(self): # source = self.source # # paths should be normalized # for dest_path in "new", "new/", "/new", "/new/": # dest = self._get_dest_store() # copy_store(source, dest, dest_path=dest_path) # assert len(source) == len(dest) # for key in source: # if self._version == 3: # dest_key = key[:10] + "new/" + key[10:] # else: # dest_key = "new/" + key # assert source[key] == dest[dest_key] # def test_source_dest_path(self): # source = self.source # # paths should be normalized # for source_path in "bar", "bar/", "/bar", "/bar/": # for dest_path in "new", "new/", "/new", "/new/": # dest = self._get_dest_store() # copy_store(source, dest, source_path=source_path, dest_path=dest_path) # assert 2 == len(dest) # for key in source: # if key.startswith("bar/"): # dest_key = "new/" + key.split("bar/")[1] # assert source[key] == dest[dest_key] # else: # assert key not in dest # assert ("new/" + key) not in dest # def test_excludes_includes(self): # source = self.source # # single excludes # dest = self._get_dest_store() # excludes = "f.*" # copy_store(source, dest, excludes=excludes) # assert len(dest) == 2 # root = "" # assert root + "foo" not in dest # # multiple excludes # dest = self._get_dest_store() # excludes = "b.z", ".*x" # copy_store(source, dest, excludes=excludes) # assert len(dest) == 1 # assert root + "foo" in dest # assert root + "bar/baz" not in dest # assert root + "bar/qux" not in dest # # excludes and includes # dest = self._get_dest_store() # excludes = "b.*" # includes = ".*x" # copy_store(source, dest, excludes=excludes, includes=includes) # assert len(dest) == 2 # assert root + "foo" in dest # assert root + "bar/baz" not in dest # assert root + "bar/qux" in dest # def test_dry_run(self): # source = self.source # dest = self._get_dest_store() # copy_store(source, dest, dry_run=True) # assert 0 == len(dest) # def test_if_exists(self): # source = self.source # dest = self._get_dest_store() # root = "" # dest[root + "bar/baz"] = b"mmm" # # default ('raise') # with pytest.raises(CopyError): # copy_store(source, dest) # # explicit 'raise' # with pytest.raises(CopyError): # copy_store(source, dest, if_exists="raise") # # skip # copy_store(source, dest, if_exists="skip") # assert 3 == len(dest) # assert dest[root + "foo"] == b"xxx" # assert dest[root + "bar/baz"] == b"mmm" # assert dest[root + "bar/qux"] == b"zzz" # # replace # copy_store(source, dest, if_exists="replace") # assert 3 == len(dest) # assert dest[root + "foo"] == b"xxx" # assert dest[root + "bar/baz"] == b"yyy" # assert dest[root + "bar/qux"] == b"zzz" # # invalid option # with pytest.raises(ValueError): # copy_store(source, dest, if_exists="foobar") # def check_copied_array(original, copied, without_attrs=False, expect_props=None): # # setup # source_h5py = original.__module__.startswith("h5py.") # dest_h5py = copied.__module__.startswith("h5py.") # zarr_to_zarr = not (source_h5py or dest_h5py) # h5py_to_h5py = source_h5py and dest_h5py # zarr_to_h5py = not source_h5py and dest_h5py # h5py_to_zarr = source_h5py and not dest_h5py # if expect_props is None: # expect_props = dict() # else: # expect_props = expect_props.copy() # # common properties in zarr and h5py # for p in "dtype", "shape", "chunks": # expect_props.setdefault(p, getattr(original, p)) # # zarr-specific properties # if zarr_to_zarr: # for p in "compressor", "filters", "order", "fill_value": # expect_props.setdefault(p, getattr(original, p)) # # h5py-specific properties # if h5py_to_h5py: # for p in ( # "maxshape", # "compression", # "compression_opts", # "shuffle", # "scaleoffset", # "fletcher32", # "fillvalue", # ): # expect_props.setdefault(p, getattr(original, p)) # # common properties with some name differences # if h5py_to_zarr: # expect_props.setdefault("fill_value", original.fillvalue) # if zarr_to_h5py: # expect_props.setdefault("fillvalue", original.fill_value) # # compare properties # for k, v in expect_props.items(): # assert v == getattr(copied, k) # # compare data # assert_array_equal(original[:], copied[:]) # # compare attrs # if without_attrs: # for k in original.attrs.keys(): # assert k not in copied.attrs # else: # if dest_h5py and "filters" in original.attrs: # # special case in v3 (storing filters metadata under attributes) # # we explicitly do not copy this info over to HDF5 # original_attrs = original.attrs.asdict().copy() # original_attrs.pop("filters") # else: # original_attrs = original.attrs # assert sorted(original_attrs.items()) == sorted(copied.attrs.items()) # def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): # # setup # if expect_props is None: # expect_props = dict() # else: # expect_props = expect_props.copy() # # compare children # for k, v in original.items(): # if hasattr(v, "shape"): # assert k in copied # check_copied_array(v, copied[k], without_attrs=without_attrs, expect_props=expect_props) # elif shallow: # assert k not in copied # else: # assert k in copied # check_copied_group( # v, # copied[k], # without_attrs=without_attrs, # shallow=shallow, # expect_props=expect_props, # ) # # compare attrs # if without_attrs: # for k in original.attrs.keys(): # assert k not in copied.attrs # else: # assert sorted(original.attrs.items()) == sorted(copied.attrs.items()) # def test_copy_all(): # """ # https://github.com/zarr-developers/zarr-python/issues/269 # copy_all used to not copy attributes as `.keys()` does not return hidden `.zattrs`. # """ # original_group = zarr.group(store=MemoryStore(), overwrite=True) # original_group.attrs["info"] = "group attrs" # original_subgroup = original_group.create_group("subgroup") # original_subgroup.attrs["info"] = "sub attrs" # destination_group = zarr.group(store=MemoryStore(), overwrite=True) # # copy from memory to directory store # copy_all( # original_group, # destination_group, # dry_run=False, # ) # assert "subgroup" in destination_group # assert destination_group.attrs["info"] == "group attrs" # assert destination_group.subgroup.attrs["info"] == "sub attrs" # class TestCopy: # @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) # def source(self, request, tmpdir): # def prep_source(source): # foo = source.create_group("foo") # foo.attrs["experiment"] = "weird science" # baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) # baz.attrs["units"] = "metres" # if request.param: # extra_kws = dict( # compression="gzip", # compression_opts=3, # fillvalue=84, # shuffle=True, # fletcher32=True, # ) # else: # extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) # source.create_dataset( # "spam", # data=np.arange(100, 200).reshape(20, 5), # chunks=(10, 2), # dtype="i2", # **extra_kws, # ) # return source # if request.param: # h5py = pytest.importorskip("h5py") # fn = tmpdir.join("source.h5") # with h5py.File(str(fn), mode="w") as h5f: # yield prep_source(h5f) # else: # yield prep_source(group()) # @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) # def dest(self, request, tmpdir): # if request.param: # h5py = pytest.importorskip("h5py") # fn = tmpdir.join("dest.h5") # with h5py.File(str(fn), mode="w") as h5f: # yield h5f # else: # yield group() # def test_copy_array(self, source, dest): # # copy array with default options # copy(source["foo/bar/baz"], dest) # check_copied_array(source["foo/bar/baz"], dest["baz"]) # copy(source["spam"], dest) # check_copied_array(source["spam"], dest["spam"]) # def test_copy_bad_dest(self, source, dest): # # try to copy to an array, dest must be a group # dest = dest.create_dataset("eggs", shape=(100,)) # with pytest.raises(ValueError): # copy(source["foo/bar/baz"], dest) # def test_copy_array_name(self, source, dest): # # copy array with name # copy(source["foo/bar/baz"], dest, name="qux") # assert "baz" not in dest # check_copied_array(source["foo/bar/baz"], dest["qux"]) # def test_copy_array_create_options(self, source, dest): # dest_h5py = dest.__module__.startswith("h5py.") # # copy array, provide creation options # compressor = Zlib(9) # create_kws = dict(chunks=(10,)) # if dest_h5py: # create_kws.update( # compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 # ) # else: # create_kws.update(compressor=compressor, fill_value=42, order="F", filters=[Adler32()]) # copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) # check_copied_array( # source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws # ) # def test_copy_array_exists_array(self, source, dest): # # copy array, dest array in the way # dest.create_dataset("baz", shape=(10,)) # # raise # with pytest.raises(CopyError): # # should raise by default # copy(source["foo/bar/baz"], dest) # assert (10,) == dest["baz"].shape # with pytest.raises(CopyError): # copy(source["foo/bar/baz"], dest, if_exists="raise") # assert (10,) == dest["baz"].shape # # skip # copy(source["foo/bar/baz"], dest, if_exists="skip") # assert (10,) == dest["baz"].shape # # replace # copy(source["foo/bar/baz"], dest, if_exists="replace") # check_copied_array(source["foo/bar/baz"], dest["baz"]) # # invalid option # with pytest.raises(ValueError): # copy(source["foo/bar/baz"], dest, if_exists="foobar") # def test_copy_array_exists_group(self, source, dest): # # copy array, dest group in the way # dest.create_group("baz") # # raise # with pytest.raises(CopyError): # copy(source["foo/bar/baz"], dest) # assert not hasattr(dest["baz"], "shape") # with pytest.raises(CopyError): # copy(source["foo/bar/baz"], dest, if_exists="raise") # assert not hasattr(dest["baz"], "shape") # # skip # copy(source["foo/bar/baz"], dest, if_exists="skip") # assert not hasattr(dest["baz"], "shape") # # replace # copy(source["foo/bar/baz"], dest, if_exists="replace") # check_copied_array(source["foo/bar/baz"], dest["baz"]) # def test_copy_array_skip_initialized(self, source, dest): # dest_h5py = dest.__module__.startswith("h5py.") # dest.create_dataset("baz", shape=(100,), chunks=(10,), dtype="i8") # assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) # if dest_h5py: # with pytest.raises(ValueError): # # not available with copy to h5py # copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") # else: # # copy array, dest array exists but not yet initialized # copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") # check_copied_array(source["foo/bar/baz"], dest["baz"]) # # copy array, dest array exists and initialized, will be skipped # dest["baz"][:] = np.arange(100, 200) # copy(source["foo/bar/baz"], dest, if_exists="skip_initialized") # assert_array_equal(np.arange(100, 200), dest["baz"][:]) # assert not np.all(source["foo/bar/baz"][:] == dest["baz"][:]) # def test_copy_group(self, source, dest): # # copy group, default options # copy(source["foo"], dest) # check_copied_group(source["foo"], dest["foo"]) # def test_copy_group_no_name(self, source, dest): # with pytest.raises(TypeError): # # need a name if copy root # copy(source, dest) # copy(source, dest, name="root") # check_copied_group(source, dest["root"]) # def test_copy_group_options(self, source, dest): # # copy group, non-default options # copy(source["foo"], dest, name="qux", without_attrs=True) # assert "foo" not in dest # check_copied_group(source["foo"], dest["qux"], without_attrs=True) # def test_copy_group_shallow(self, source, dest): # # copy group, shallow # copy(source, dest, name="eggs", shallow=True) # check_copied_group(source, dest["eggs"], shallow=True) # def test_copy_group_exists_group(self, source, dest): # # copy group, dest groups exist # dest.create_group("foo/bar") # copy(source["foo"], dest) # check_copied_group(source["foo"], dest["foo"]) # def test_copy_group_exists_array(self, source, dest): # # copy group, dest array in the way # dest.create_dataset("foo/bar", shape=(10,)) # # raise # with pytest.raises(CopyError): # copy(source["foo"], dest) # assert dest["foo/bar"].shape == (10,) # with pytest.raises(CopyError): # copy(source["foo"], dest, if_exists="raise") # assert dest["foo/bar"].shape == (10,) # # skip # copy(source["foo"], dest, if_exists="skip") # assert dest["foo/bar"].shape == (10,) # # replace # copy(source["foo"], dest, if_exists="replace") # check_copied_group(source["foo"], dest["foo"]) # def test_copy_group_dry_run(self, source, dest): # # dry run, empty destination # n_copied, n_skipped, n_bytes_copied = copy( # source["foo"], dest, dry_run=True, return_stats=True # ) # assert 0 == len(dest) # assert 3 == n_copied # assert 0 == n_skipped # assert 0 == n_bytes_copied # # dry run, array exists in destination # baz = np.arange(100, 200) # dest.create_dataset("foo/bar/baz", data=baz) # assert not np.all(source["foo/bar/baz"][:] == dest["foo/bar/baz"][:]) # assert 1 == len(dest) # # raise # with pytest.raises(CopyError): # copy(source["foo"], dest, dry_run=True) # assert 1 == len(dest) # # skip # n_copied, n_skipped, n_bytes_copied = copy( # source["foo"], dest, dry_run=True, if_exists="skip", return_stats=True # ) # assert 1 == len(dest) # assert 2 == n_copied # assert 1 == n_skipped # assert 0 == n_bytes_copied # assert_array_equal(baz, dest["foo/bar/baz"]) # # replace # n_copied, n_skipped, n_bytes_copied = copy( # source["foo"], dest, dry_run=True, if_exists="replace", return_stats=True # ) # assert 1 == len(dest) # assert 3 == n_copied # assert 0 == n_skipped # assert 0 == n_bytes_copied # assert_array_equal(baz, dest["foo/bar/baz"]) # def test_logging(self, source, dest, tmpdir): # # callable log # copy(source["foo"], dest, dry_run=True, log=print) # # file name # fn = str(tmpdir.join("log_name")) # copy(source["foo"], dest, dry_run=True, log=fn) # # file # with tmpdir.join("log_file").open(mode="w") as f: # copy(source["foo"], dest, dry_run=True, log=f) # # bad option # with pytest.raises(TypeError): # copy(source["foo"], dest, dry_run=True, log=True) def test_open_falls_back_to_open_group() -> None: # https://github.com/zarr-developers/zarr-python/issues/2309 store = MemoryStore() zarr.open_group(store, attributes={"key": "value"}) group = zarr.open(store) assert isinstance(group, Group) assert group.attrs == {"key": "value"} async def test_open_falls_back_to_open_group_async(zarr_format: ZarrFormat) -> None: # https://github.com/zarr-developers/zarr-python/issues/2309 store = MemoryStore() await zarr.api.asynchronous.open_group( store, attributes={"key": "value"}, zarr_format=zarr_format ) group = await zarr.api.asynchronous.open(store=store) assert isinstance(group, zarr.core.group.AsyncGroup) assert group.metadata.zarr_format == zarr_format assert group.attrs == {"key": "value"} @pytest.mark.parametrize("mode", ["r", "r+", "w", "a"]) def test_open_modes_creates_group(tmp_path: Path, mode: str) -> None: # https://github.com/zarr-developers/zarr-python/issues/2490 zarr_dir = tmp_path / f"mode-{mode}-test.zarr" if mode in ["r", "r+"]: # Expect FileNotFoundError to be raised if 'r' or 'r+' mode with pytest.raises(FileNotFoundError): zarr.open(store=zarr_dir, mode=mode) # type: ignore[arg-type] else: group = zarr.open(store=zarr_dir, mode=mode) # type: ignore[arg-type] assert isinstance(group, Group) async def test_metadata_validation_error() -> None: with pytest.raises( MetadataValidationError, match="Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '3.0'.", ): await zarr.api.asynchronous.open_group(zarr_format="3.0") # type: ignore[arg-type] with pytest.raises( MetadataValidationError, match="Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '3.0'.", ): await zarr.api.asynchronous.open_array(shape=(1,), zarr_format="3.0") # type: ignore[arg-type] @pytest.mark.parametrize( "store", ["local", "memory", "zip"], indirect=True, ) def test_open_array_with_mode_r_plus(store: Store, zarr_format: ZarrFormat) -> None: # 'r+' means read/write (must exist) with pytest.raises(ArrayNotFoundError): zarr.open_array(store=store, mode="r+", zarr_format=zarr_format) zarr.ones(store=store, shape=(3, 3), zarr_format=zarr_format) z2 = zarr.open_array(store=store, mode="r+") assert isinstance(z2, Array) assert z2.metadata.zarr_format == zarr_format result = z2[:] assert isinstance(result, NDArrayLike) assert (result == 1).all() z2[:] = 3 @pytest.mark.parametrize( ("a_func", "b_func"), [ (zarr.api.asynchronous.create_array, zarr.api.synchronous.create_array), (zarr.api.asynchronous.save, zarr.api.synchronous.save), (zarr.api.asynchronous.save_array, zarr.api.synchronous.save_array), (zarr.api.asynchronous.save_group, zarr.api.synchronous.save_group), (zarr.api.asynchronous.open_group, zarr.api.synchronous.open_group), (zarr.api.asynchronous.create, zarr.api.synchronous.create), ], ) def test_consistent_signatures( a_func: Callable[[object], object], b_func: Callable[[object], object] ) -> None: """ Ensure that pairs of functions have the same signature """ base_sig = inspect.signature(a_func) test_sig = inspect.signature(b_func) wrong: dict[str, list[object]] = { "missing_from_test": [], "missing_from_base": [], "wrong_type": [], } for key, value in base_sig.parameters.items(): if key not in test_sig.parameters: wrong["missing_from_test"].append((key, value)) for key, value in test_sig.parameters.items(): if key not in base_sig.parameters: wrong["missing_from_base"].append((key, value)) if base_sig.parameters[key] != value: wrong["wrong_type"].append({key: {"test": value, "base": base_sig.parameters[key]}}) assert wrong["missing_from_base"] == [] assert wrong["missing_from_test"] == [] assert wrong["wrong_type"] == [] def test_api_exports() -> None: """ Test that the sync API and the async API export the same objects """ assert zarr.api.asynchronous.__all__ == zarr.api.synchronous.__all__ @gpu_test @pytest.mark.parametrize( "store", ["local", "memory", "zip"], indirect=True, ) @pytest.mark.parametrize("zarr_format", [None, 2, 3]) def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None: import cupy as cp if zarr_format == 2: # Without this, the zstd codec attempts to convert the cupy # array to bytes. compressors = None else: compressors = "auto" with zarr.config.enable_gpu(): src = cp.random.uniform(size=(100, 100)) # allocate on the device z = zarr.create_array( store, name="a", shape=src.shape, chunks=(10, 10), dtype=src.dtype, overwrite=True, zarr_format=zarr_format, compressors=compressors, # type: ignore[arg-type] ) z[:10, :10] = src[:10, :10] result = z[:10, :10] # assert_array_equal doesn't check the type assert isinstance(result, type(src)) cp.testing.assert_array_equal(result, src[:10, :10]) def test_v2_without_compressor() -> None: # Make sure it's possible to set no compressor for v2 arrays arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None) assert arr.compressors == () def test_v2_with_v3_compressor() -> None: # Check trying to create a v2 array with a v3 compressor fails with pytest.raises( ValueError, match="Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. Use a numcodecs codec directly instead.", ): zarr.create( store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=zarr.codecs.BloscCodec() ) def add_empty_file(path: Path) -> Path: fpath = path / "a.txt" fpath.touch() return fpath @pytest.mark.parametrize("create_function", [create_array, from_array]) @pytest.mark.parametrize("overwrite", [True, False]) def test_no_overwrite_array(tmp_path: Path, create_function: Callable, overwrite: bool) -> None: # type:ignore[type-arg] store = zarr.storage.LocalStore(tmp_path) existing_fpath = add_empty_file(tmp_path) assert existing_fpath.exists() create_function(store=store, data=np.ones(shape=(1,)), overwrite=overwrite) if overwrite: assert not existing_fpath.exists() else: assert existing_fpath.exists() @pytest.mark.parametrize("create_function", [create_group, group]) @pytest.mark.parametrize("overwrite", [True, False]) def test_no_overwrite_group(tmp_path: Path, create_function: Callable, overwrite: bool) -> None: # type:ignore[type-arg] store = zarr.storage.LocalStore(tmp_path) existing_fpath = add_empty_file(tmp_path) assert existing_fpath.exists() create_function(store=store, overwrite=overwrite) if overwrite: assert not existing_fpath.exists() else: assert existing_fpath.exists() @pytest.mark.parametrize("open_func", [zarr.open, open_group]) @pytest.mark.parametrize("mode", ["r", "r+", "a", "w", "w-"]) def test_no_overwrite_open(tmp_path: Path, open_func: Callable, mode: str) -> None: # type:ignore[type-arg] store = zarr.storage.LocalStore(tmp_path) existing_fpath = add_empty_file(tmp_path) assert existing_fpath.exists() with contextlib.suppress(FileExistsError, FileNotFoundError, ZarrUserWarning): open_func(store=store, mode=mode) if mode == "w": assert not existing_fpath.exists() else: assert existing_fpath.exists() def test_no_overwrite_load(tmp_path: Path) -> None: store = zarr.storage.LocalStore(tmp_path) existing_fpath = add_empty_file(tmp_path) assert existing_fpath.exists() with contextlib.suppress(NotImplementedError): zarr.load(store) assert existing_fpath.exists() @pytest.mark.parametrize( "f", [ zarr.array, zarr.create, zarr.create_array, zarr.ones, zarr.ones_like, zarr.empty, zarr.empty_like, zarr.full, zarr.full_like, zarr.zeros, zarr.zeros_like, ], ) def test_auto_chunks(f: Callable[..., AnyArray]) -> None: # Make sure chunks are set automatically across the public API # TODO: test shards with this test too shape = (1000, 1000) dtype = np.uint8 kwargs = {"shape": shape, "dtype": dtype} array = np.zeros(shape, dtype=dtype) store = zarr.storage.MemoryStore() if f in [zarr.full, zarr.full_like]: kwargs["fill_value"] = 0 if f in [zarr.array]: kwargs["data"] = array if f in [zarr.empty_like, zarr.full_like, zarr.empty_like, zarr.ones_like, zarr.zeros_like]: kwargs["a"] = array if f in [zarr.create_array]: kwargs["store"] = store a = f(**kwargs) assert a.chunks == (500, 500) @pytest.mark.parametrize("kwarg_name", ["synchronizer", "chunk_store", "cache_attrs", "meta_array"]) def test_unimplemented_kwarg_warnings(kwarg_name: str) -> None: kwargs = {kwarg_name: 1} with pytest.warns(RuntimeWarning, match=".* is not yet implemented"): zarr.create(shape=(1,), **kwargs) # type: ignore[arg-type] zarr-python-3.1.5/tests/test_api/000077500000000000000000000000001511007055700167705ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_api/test_asynchronous.py000066400000000000000000000067651511007055700231520ustar00rootroot00000000000000from __future__ import annotations import json from dataclasses import dataclass from typing import TYPE_CHECKING import numpy as np import pytest from zarr import create_array from zarr.api.asynchronous import _get_shape_chunks, _like_args, group, open from zarr.core.buffer.core import default_buffer_prototype from zarr.core.group import AsyncGroup if TYPE_CHECKING: from pathlib import Path from typing import Any import numpy.typing as npt from zarr.core.array import AsyncArray from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata from zarr.types import AnyArray @dataclass class WithShape: shape: tuple[int, ...] @dataclass class WithChunks(WithShape): chunks: tuple[int, ...] @dataclass class WithChunkLen(WithShape): chunklen: int @pytest.mark.parametrize( ("observed", "expected"), [ ({}, (None, None)), (WithShape(shape=(1, 2)), ((1, 2), None)), (WithChunks(shape=(1, 2), chunks=(1, 2)), ((1, 2), (1, 2))), (WithChunkLen(shape=(10, 10), chunklen=1), ((10, 10), (1, 10))), ], ) def test_get_shape_chunks( observed: object, expected: tuple[tuple[int, ...] | None, tuple[int, ...] | None] ) -> None: """ Test the _get_shape_chunks function """ assert _get_shape_chunks(observed) == expected @pytest.mark.parametrize( ("observed", "expected"), [ (np.arange(10, dtype=np.dtype("int64")), {"shape": (10,), "dtype": np.dtype("int64")}), (WithChunks(shape=(1, 2), chunks=(1, 2)), {"chunks": (1, 2), "shape": (1, 2)}), ( create_array( {}, chunks=(10,), shape=(100,), dtype="f8", compressors=None, filters=None, zarr_format=2, )._async_array, { "chunks": (10,), "shape": (100,), "dtype": np.dtype("f8"), "compressor": None, "filters": None, "order": "C", }, ), ], ) def test_like_args( observed: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AnyArray | npt.NDArray[Any], expected: object, ) -> None: """ Test the like_args function """ assert _like_args(observed) == expected async def test_open_no_array() -> None: """ Test that zarr.api.asynchronous.open attempts to open a group when no array is found, but shape was specified in kwargs. This behavior makes no sense but we should still test it. """ store = { "zarr.json": default_buffer_prototype().buffer.from_bytes( json.dumps({"zarr_format": 3, "node_type": "group"}).encode("utf-8") ) } with pytest.raises( TypeError, match=r"open_group\(\) got an unexpected keyword argument 'shape'" ): await open(store=store, shape=(1,)) async def test_open_group_new_path(tmp_path: Path) -> None: """ Test that zarr.api.asynchronous.group properly handles a string representation of a local file path that does not yet exist. See https://github.com/zarr-developers/zarr-python/issues/3406 """ # tmp_path exists, but tmp_path / "test.zarr" will not, which is important for this test path = tmp_path / "test.zarr" grp = await group(store=path, attributes={"a": 1}) assert isinstance(grp, AsyncGroup) # Calling group on an existing store should just open that store grp = await group(store=path) assert grp.attrs == {"a": 1} zarr-python-3.1.5/tests/test_api/test_synchronous.py000066400000000000000000000107311511007055700227750ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Any, Final import pytest from numpydoc.docscrape import NumpyDocString import zarr from zarr.api import asynchronous, synchronous if TYPE_CHECKING: from collections.abc import Callable MATCHED_EXPORT_NAMES: Final[tuple[str, ...]] = tuple( sorted(set(synchronous.__all__) | set(asynchronous.__all__)) ) """A sorted tuple of names that are exported by both the sync and async APIs.""" MATCHED_CALLABLE_NAMES: Final[tuple[str, ...]] = tuple( x for x in MATCHED_EXPORT_NAMES if callable(getattr(synchronous, x)) ) """A sorted tuple of callable names that are exported by both the sync and async APIs.""" @pytest.mark.parametrize("callable_name", MATCHED_CALLABLE_NAMES) def test_docstrings_match(callable_name: str) -> None: """ Tests that the docstrings for the sync and async define identical parameters. """ callable_a = getattr(synchronous, callable_name) callable_b = getattr(asynchronous, callable_name) if callable_a.__doc__ is None: assert callable_b.__doc__ is None else: params_a = NumpyDocString(callable_a.__doc__)["Parameters"] params_b = NumpyDocString(callable_b.__doc__)["Parameters"] mismatch = [] for idx, (a, b) in enumerate(zip(params_a, params_b, strict=False)): if a != b: mismatch.append((idx, (a, b))) assert mismatch == [] @pytest.mark.parametrize( ("parameter_name", "array_creation_routines"), [ ( ("store", "path"), ( asynchronous.create_array, synchronous.create_array, asynchronous.create_group, synchronous.create_group, zarr.AsyncGroup.create_array, zarr.Group.create_array, ), ), ( ( "store", "path", ), ( asynchronous.create, synchronous.create, zarr.Group.create, zarr.AsyncArray.create, zarr.Array.create, ), ), ( ( ( "filters", "codecs", "compressors", "compressor", "chunks", "shape", "dtype", "shardsfill_value", ) ), ( asynchronous.create, synchronous.create, asynchronous.create_array, synchronous.create_array, zarr.AsyncGroup.create_array, zarr.Group.create_array, zarr.AsyncGroup.create_dataset, zarr.Group.create_dataset, ), ), ], ids=str, ) def test_docstring_consistent_parameters( parameter_name: str, array_creation_routines: tuple[Callable[[Any], Any], ...] ) -> None: """ Tests that array and group creation routines document the same parameters consistently. This test inspects the docstrings of sets of callables and generates two dicts: - a dict where the keys are parameter descriptions and the values are the names of the routines with those descriptions - a dict where the keys are parameter types and the values are the names of the routines with those types If each dict has just 1 value, then the parameter description and type in the docstring must be identical across different routines. But if these dicts have multiple values, then there must be routines that use the same parameter but document it differently, which will trigger a test failure. """ descs: dict[tuple[str, ...], tuple[str, ...]] = {} types: dict[str, tuple[str, ...]] = {} for routine in array_creation_routines: key = f"{routine.__module__}.{routine.__qualname__}" docstring = NumpyDocString(routine.__doc__) param_dict = {d.name: d for d in docstring["Parameters"]} if parameter_name in param_dict: val = param_dict[parameter_name] if tuple(val.desc) in descs: descs[tuple(val.desc)] = descs[tuple(val.desc)] + (key,) else: descs[tuple(val.desc)] = (key,) if val.type in types: types[val.type] = types[val.type] + (key,) else: types[val.type] = (key,) assert len(descs) <= 1 assert len(types) <= 1 zarr-python-3.1.5/tests/test_array.py000066400000000000000000002301071511007055700177120ustar00rootroot00000000000000import dataclasses import inspect import json import math import multiprocessing as mp import pickle import re import sys from itertools import accumulate from typing import TYPE_CHECKING, Any, Literal from unittest import mock import numcodecs import numpy as np import numpy.typing as npt import pytest from packaging.version import Version import zarr.api.asynchronous import zarr.api.synchronous as sync_api from tests.conftest import skip_object_dtype from zarr import Array, Group from zarr.abc.store import Store from zarr.codecs import ( BytesCodec, GzipCodec, TransposeCodec, ZstdCodec, ) from zarr.core._info import ArrayInfo from zarr.core.array import ( AsyncArray, CompressorsLike, FiltersLike, _iter_chunk_coords, _iter_chunk_regions, _iter_shard_coords, _iter_shard_keys, _iter_shard_regions, _parse_chunk_encoding_v2, _parse_chunk_encoding_v3, _shards_initialized, create_array, default_filters_v2, default_serializer_v3, ) from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.chunk_grids import _auto_partition from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams from zarr.core.common import JSON, ZarrFormat, ceildiv from zarr.core.dtype import ( DateTime64, Float32, Float64, Int16, Structured, TimeDelta64, UInt8, VariableLengthBytes, VariableLengthUTF8, ZDType, parse_dtype, ) from zarr.core.dtype.common import ENDIANNESS_STR, EndiannessStr from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str from zarr.core.dtype.npy.string import UTF8Base from zarr.core.group import AsyncGroup from zarr.core.indexing import BasicIndexer, _iter_grid, _iter_regions from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.core.sync import sync from zarr.errors import ( ContainsArrayError, ContainsGroupError, ZarrUserWarning, ) from zarr.storage import LocalStore, MemoryStore, StorePath from zarr.storage._logging import LoggingStore from zarr.types import AnyArray, AnyAsyncArray from .test_dtype.conftest import zdtype_examples if TYPE_CHECKING: from zarr.abc.codec import CodecJSON_V3 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("overwrite", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_array_creation_existing_node( store: LocalStore | MemoryStore, zarr_format: ZarrFormat, overwrite: bool, extant_node: Literal["array", "group"], ) -> None: """ Check that an existing array or group is handled as expected during array creation. """ spath = StorePath(store) group = Group.from_store(spath, zarr_format=zarr_format) expected_exception: type[ContainsArrayError | ContainsGroupError] if extant_node == "array": expected_exception = ContainsArrayError _ = group.create_array("extant", shape=(10,), dtype="uint8") elif extant_node == "group": expected_exception = ContainsGroupError _ = group.create_group("extant") else: raise AssertionError new_shape = (2, 2) new_dtype = "float32" if overwrite: if not store.supports_deletes: pytest.skip("store does not support deletes") arr_new = zarr.create_array( spath / "extant", shape=new_shape, dtype=new_dtype, overwrite=overwrite, zarr_format=zarr_format, ) assert arr_new.shape == new_shape assert arr_new.dtype == new_dtype else: with pytest.raises(expected_exception): arr_new = zarr.create_array( spath / "extant", shape=new_shape, dtype=new_dtype, overwrite=overwrite, zarr_format=zarr_format, ) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_create_creates_parents( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: # prepare a root node, with some data set await zarr.api.asynchronous.open_group( store=store, path="a", zarr_format=zarr_format, attributes={"key": "value"} ) # create a child node with a couple intermediates await zarr.api.asynchronous.create( shape=(2, 2), store=store, path="a/b/c/d", zarr_format=zarr_format ) parts = ["a", "a/b", "a/b/c"] if zarr_format == 2: files = [".zattrs", ".zgroup"] else: files = ["zarr.json"] expected = [f"{part}/{file}" for file in files for part in parts] if zarr_format == 2: expected.extend([".zattrs", ".zgroup", "a/b/c/d/.zarray", "a/b/c/d/.zattrs"]) else: expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) expected = sorted(expected) result = sorted([x async for x in store.list_prefix("")]) assert result == expected paths = ["a", "a/b", "a/b/c"] for path in paths: g = await zarr.api.asynchronous.open_group(store=store, path=path) assert isinstance(g, AsyncGroup) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [2, 3]) def test_array_name_properties_no_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: arr = zarr.create_array( store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype=">i4" ) assert arr.path == "" assert arr.name == "/" assert arr.basename == "" @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [2, 3]) def test_array_name_properties_with_group( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: root = Group.from_store(store=store, zarr_format=zarr_format) foo = root.create_array("foo", shape=(100,), chunks=(10,), dtype="i4") assert foo.path == "foo" assert foo.name == "/foo" assert foo.basename == "foo" bar = root.create_group("bar") spam = bar.create_array("spam", shape=(100,), chunks=(10,), dtype="i4") assert spam.path == "bar/spam" assert spam.name == "/bar/spam" assert spam.basename == "spam" @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("specifiy_fill_value", [True, False]) @pytest.mark.parametrize( "zdtype", zdtype_examples, ids=tuple(str(type(v)) for v in zdtype_examples) ) def test_array_fill_value_default( store: MemoryStore, specifiy_fill_value: bool, zdtype: ZDType[Any, Any] ) -> None: """ Test that creating an array with the fill_value parameter set to None, or unspecified, results in the expected fill_value attribute of the array, i.e. the default value of the dtype """ shape = (10,) if specifiy_fill_value: arr = zarr.create_array( store=store, shape=shape, dtype=zdtype, zarr_format=3, chunks=shape, fill_value=None, ) else: arr = zarr.create_array(store=store, shape=shape, dtype=zdtype, zarr_format=3, chunks=shape) expected_fill_value = zdtype.default_scalar() if isinstance(expected_fill_value, np.datetime64 | np.timedelta64): if np.isnat(expected_fill_value): assert np.isnat(arr.fill_value) elif isinstance(expected_fill_value, np.floating | np.complexfloating): if np.isnan(expected_fill_value): assert np.isnan(arr.fill_value) else: assert arr.fill_value == expected_fill_value # A simpler check would be to ensure that arr.fill_value.dtype == arr.dtype # But for some numpy data types (namely, U), scalars might not have length. An empty string # scalar from a `>U4` array would have dtype `>U`, and arr.fill_value.dtype == arr.dtype will fail. assert type(arr.fill_value) is type(np.array([arr.fill_value], dtype=arr.dtype)[0]) @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize( ("dtype_str", "fill_value"), [("bool", True), ("uint8", 99), ("float32", -99.9), ("complex64", 3 + 4j)], ) def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str) -> None: shape = (10,) arr = zarr.create_array( store=store, shape=shape, dtype=dtype_str, zarr_format=3, chunks=shape, fill_value=fill_value, ) assert arr.fill_value == np.dtype(dtype_str).type(fill_value) assert arr.fill_value.dtype == arr.dtype @pytest.mark.parametrize("store", ["memory"], indirect=True) async def test_array_v3_nan_fill_value(store: MemoryStore) -> None: shape = (10,) arr = zarr.create_array( store=store, shape=shape, dtype=np.float64, zarr_format=3, chunks=shape, fill_value=np.nan, ) arr[:] = np.nan assert np.isnan(arr.fill_value) assert arr.fill_value.dtype == arr.dtype # all fill value chunk is an empty chunk, and should not be written assert len([a async for a in store.list_prefix("/")]) == 0 @pytest.mark.parametrize("store", ["local"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_serializable_async_array( store: LocalStore | MemoryStore, zarr_format: ZarrFormat ) -> None: expected = await zarr.api.asynchronous.create_array( store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4" ) # await expected.setitems(list(range(100))) p = pickle.dumps(expected) actual = pickle.loads(p) assert actual == expected # np.testing.assert_array_equal(await actual.getitem(slice(None)), await expected.getitem(slice(None))) # TODO: uncomment the parts of this test that will be impacted by the config/prototype changes in flight @pytest.mark.parametrize("store", ["local"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [2, 3]) def test_serializable_sync_array(store: LocalStore, zarr_format: ZarrFormat) -> None: expected = zarr.create_array( store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4" ) expected[:] = list(range(100)) p = pickle.dumps(expected) actual = pickle.loads(p) assert actual == expected np.testing.assert_array_equal(actual[:], expected[:]) @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("zarr_format", [2, 3, "invalid"]) def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat | str) -> None: """ Test that providing an actual storage transformer produces a warning and otherwise passes through """ metadata_dict: dict[str, JSON] if zarr_format == 3: metadata_dict = { "zarr_format": 3, "node_type": "array", "shape": (10,), "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, "data_type": "uint8", "chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}}, "codecs": (BytesCodec().to_dict(),), "fill_value": 0, "storage_transformers": ({"test": "should_raise"}), } else: metadata_dict = { "zarr_format": zarr_format, "shape": (10,), "chunks": (1,), "dtype": "|u1", "dimension_separator": ".", "codecs": (BytesCodec().to_dict(),), "fill_value": 0, "order": "C", "storage_transformers": ({"test": "should_raise"}), } if zarr_format == 3: match = "Arrays with storage transformers are not supported in zarr-python at this time." with pytest.raises(ValueError, match=match): Array.from_dict(StorePath(store), data=metadata_dict) elif zarr_format == 2: # no warning Array.from_dict(StorePath(store), data=metadata_dict) else: match = f"Invalid zarr_format: {zarr_format}. Expected 2 or 3" with pytest.raises(ValueError, match=match): Array.from_dict(StorePath(store), data=metadata_dict) @pytest.mark.parametrize("test_cls", [AnyArray, AnyAsyncArray]) @pytest.mark.parametrize("nchunks", [2, 5, 10]) def test_nchunks(test_cls: type[AnyArray] | type[AnyAsyncArray], nchunks: int) -> None: """ Test that nchunks returns the number of chunks defined for the array. """ store = MemoryStore() shape = 100 arr = zarr.create_array(store, shape=(shape,), chunks=(ceildiv(shape, nchunks),), dtype="i4") expected = nchunks if test_cls == Array: observed = arr.nchunks else: observed = arr.async_array.nchunks assert observed == expected @pytest.mark.parametrize("test_cls", [Array, AsyncArray]) @pytest.mark.parametrize( ("shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((10,), (1,), (1,)), ((40,), (20,), (5,))], ) async def test_nchunks_initialized( test_cls: type[AnyArray] | type[AnyAsyncArray], shape: tuple[int, ...], shard_shape: tuple[int, ...] | None, chunk_shape: tuple[int, ...], ) -> None: """ Test that nchunks_initialized accurately returns the number of stored partitions. """ store = MemoryStore() if shard_shape is None: chunks_per_shard = 1 else: chunks_per_shard = np.prod(np.array(shard_shape) // np.array(chunk_shape)) arr = zarr.create_array(store, shape=shape, shards=shard_shape, chunks=chunk_shape, dtype="i1") # write chunks one at a time for idx, region in enumerate(arr._iter_shard_regions()): arr[region] = 1 expected = idx + 1 if test_cls == Array: observed = arr._nshards_initialized assert observed == arr.nchunks_initialized // chunks_per_shard else: observed = await arr.async_array._nshards_initialized() assert observed == await arr.async_array.nchunks_initialized() // chunks_per_shard assert observed == expected # delete chunks for idx, key in enumerate(arr._iter_shard_keys()): sync(arr.store_path.store.delete(key)) if test_cls == Array: observed = arr._nshards_initialized assert observed == arr.nchunks_initialized // chunks_per_shard else: observed = await arr.async_array._nshards_initialized() assert observed == await arr.async_array.nchunks_initialized() // chunks_per_shard expected = arr._nshards - idx - 1 assert observed == expected @pytest.mark.parametrize("path", ["", "foo"]) @pytest.mark.parametrize( ("shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((10,), (1,), (1,)), ((40,), (20,), (5,))], ) async def test_chunks_initialized( path: str, shape: tuple[int, ...], shard_shape: tuple[int, ...], chunk_shape: tuple[int, ...] ) -> None: """ Test that chunks_initialized accurately returns the keys of stored chunks. """ store = MemoryStore() arr = zarr.create_array( store, name=path, shape=shape, shards=shard_shape, chunks=chunk_shape, dtype="i1" ) chunks_accumulated = tuple( accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_shard_keys())) ) for keys, region in zip(chunks_accumulated, arr._iter_shard_regions(), strict=False): arr[region] = 1 observed = sorted(await _shards_initialized(arr.async_array)) expected = sorted(keys) assert observed == expected def test_nbytes_stored() -> None: arr = zarr.create(shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()]) result = arr.nbytes_stored() assert result == 502 # the size of the metadata document. This is a fragile test. arr[:50] = 1 result = arr.nbytes_stored() assert result == 702 # the size with 5 chunks filled. arr[50:] = 2 result = arr.nbytes_stored() assert result == 902 # the size with all chunks filled. async def test_nbytes_stored_async() -> None: arr = await zarr.api.asynchronous.create( shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()] ) result = await arr.nbytes_stored() assert result == 502 # the size of the metadata document. This is a fragile test. await arr.setitem(slice(50), 1) result = await arr.nbytes_stored() assert result == 702 # the size with 5 chunks filled. await arr.setitem(slice(50, 100), 2) result = await arr.nbytes_stored() assert result == 902 # the size with all chunks filled. @pytest.mark.parametrize("zarr_format", [2, 3]) def test_update_attrs(zarr_format: ZarrFormat) -> None: # regression test for https://github.com/zarr-developers/zarr-python/issues/2328 store = MemoryStore() arr = zarr.create_array( store=store, shape=(5,), chunks=(5,), dtype="f8", zarr_format=zarr_format ) arr.attrs["foo"] = "bar" assert arr.attrs["foo"] == "bar" arr2 = zarr.open_array(store=store, zarr_format=zarr_format) assert arr2.attrs["foo"] == "bar" @pytest.mark.parametrize(("chunks", "shards"), [((2, 2), None), ((2, 2), (4, 4))]) class TestInfo: def test_info_v2(self, chunks: tuple[int, int], shards: tuple[int, int] | None) -> None: arr = zarr.create_array(store={}, shape=(8, 8), dtype="f8", chunks=chunks, zarr_format=2) result = arr.info expected = ArrayInfo( _zarr_format=2, _data_type=arr.async_array._zdtype, _fill_value=arr.fill_value, _shape=(8, 8), _chunk_shape=chunks, _shard_shape=None, _order="C", _read_only=False, _store_type="MemoryStore", _count_bytes=512, _compressors=(numcodecs.Zstd(),), ) assert result == expected def test_info_v3(self, chunks: tuple[int, int], shards: tuple[int, int] | None) -> None: arr = zarr.create_array(store={}, shape=(8, 8), dtype="f8", chunks=chunks, shards=shards) result = arr.info expected = ArrayInfo( _zarr_format=3, _data_type=arr.async_array._zdtype, _fill_value=arr.fill_value, _shape=(8, 8), _chunk_shape=chunks, _shard_shape=shards, _order="C", _read_only=False, _store_type="MemoryStore", _compressors=(ZstdCodec(),), _serializer=BytesCodec(), _count_bytes=512, ) assert result == expected def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] | None) -> None: arr = zarr.create_array( store={}, shape=(8, 8), dtype="f8", chunks=chunks, shards=shards, compressors=(), ) result = arr.info_complete() expected = ArrayInfo( _zarr_format=3, _data_type=arr.async_array._zdtype, _fill_value=arr.fill_value, _shape=(8, 8), _chunk_shape=chunks, _shard_shape=shards, _order="C", _read_only=False, _store_type="MemoryStore", _serializer=BytesCodec(), _count_bytes=512, _count_chunks_initialized=0, _count_bytes_stored=521 if shards is None else 982, # the metadata? ) assert result == expected arr[:4, :4] = 10 result = arr.info_complete() if shards is None: expected = dataclasses.replace( expected, _count_chunks_initialized=4, _count_bytes_stored=649 ) else: expected = dataclasses.replace( expected, _count_chunks_initialized=1, _count_bytes_stored=1178 ) assert result == expected async def test_info_v2_async( self, chunks: tuple[int, int], shards: tuple[int, int] | None ) -> None: arr = await zarr.api.asynchronous.create_array( store={}, shape=(8, 8), dtype="f8", chunks=chunks, zarr_format=2 ) result = arr.info expected = ArrayInfo( _zarr_format=2, _data_type=Float64(), _fill_value=arr.metadata.fill_value, _shape=(8, 8), _chunk_shape=(2, 2), _shard_shape=None, _order="C", _read_only=False, _store_type="MemoryStore", _count_bytes=512, _compressors=(numcodecs.Zstd(),), ) assert result == expected async def test_info_v3_async( self, chunks: tuple[int, int], shards: tuple[int, int] | None ) -> None: arr = await zarr.api.asynchronous.create_array( store={}, shape=(8, 8), dtype="f8", chunks=chunks, shards=shards, ) result = arr.info expected = ArrayInfo( _zarr_format=3, _data_type=arr._zdtype, _fill_value=arr.metadata.fill_value, _shape=(8, 8), _chunk_shape=chunks, _shard_shape=shards, _order="C", _read_only=False, _store_type="MemoryStore", _compressors=(ZstdCodec(),), _serializer=BytesCodec(), _count_bytes=512, ) assert result == expected async def test_info_complete_async( self, chunks: tuple[int, int], shards: tuple[int, int] | None ) -> None: arr = await zarr.api.asynchronous.create_array( store={}, dtype="f8", shape=(8, 8), chunks=chunks, shards=shards, compressors=None, ) result = await arr.info_complete() expected = ArrayInfo( _zarr_format=3, _data_type=arr._zdtype, _fill_value=arr.metadata.fill_value, _shape=(8, 8), _chunk_shape=chunks, _shard_shape=shards, _order="C", _read_only=False, _store_type="MemoryStore", _serializer=BytesCodec(), _count_bytes=512, _count_chunks_initialized=0, _count_bytes_stored=521 if shards is None else 982, # the metadata? ) assert result == expected await arr.setitem((slice(4), slice(4)), 10) result = await arr.info_complete() if shards is None: expected = dataclasses.replace( expected, _count_chunks_initialized=4, _count_bytes_stored=553 ) else: expected = dataclasses.replace( expected, _count_chunks_initialized=1, _count_bytes_stored=1178 ) @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_resize_1d(store: MemoryStore, zarr_format: ZarrFormat) -> None: z = zarr.create( shape=105, chunks=10, dtype="i4", fill_value=0, store=store, zarr_format=zarr_format ) a = np.arange(105, dtype="i4") z[:] = a result = z[:] assert isinstance(result, NDArrayLike) assert (105,) == z.shape assert (105,) == result.shape assert np.dtype("i4") == z.dtype assert np.dtype("i4") == result.dtype assert (10,) == z.chunks np.testing.assert_array_equal(a, result) z.resize(205) result = z[:] assert isinstance(result, NDArrayLike) assert (205,) == z.shape assert (205,) == result.shape assert np.dtype("i4") == z.dtype assert np.dtype("i4") == result.dtype assert (10,) == z.chunks np.testing.assert_array_equal(a, z[:105]) np.testing.assert_array_equal(np.zeros(100, dtype="i4"), z[105:]) z.resize(55) result = z[:] assert isinstance(result, NDArrayLike) assert (55,) == z.shape assert (55,) == result.shape assert np.dtype("i4") == z.dtype assert np.dtype("i4") == result.dtype assert (10,) == z.chunks np.testing.assert_array_equal(a[:55], result) # via shape setter new_shape = (105,) z.shape = new_shape result = z[:] assert isinstance(result, NDArrayLike) assert new_shape == z.shape assert new_shape == result.shape @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_resize_2d(store: MemoryStore, zarr_format: ZarrFormat) -> None: z = zarr.create( shape=(105, 105), chunks=(10, 10), dtype="i4", fill_value=0, store=store, zarr_format=zarr_format, ) a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) z[:] = a result = z[:] assert isinstance(result, NDArrayLike) assert (105, 105) == z.shape assert (105, 105) == result.shape assert np.dtype("i4") == z.dtype assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks np.testing.assert_array_equal(a, result) z.resize((205, 205)) result = z[:] assert isinstance(result, NDArrayLike) assert (205, 205) == z.shape assert (205, 205) == result.shape assert np.dtype("i4") == z.dtype assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks np.testing.assert_array_equal(a, z[:105, :105]) np.testing.assert_array_equal(np.zeros((100, 205), dtype="i4"), z[105:, :]) np.testing.assert_array_equal(np.zeros((205, 100), dtype="i4"), z[:, 105:]) z.resize((55, 55)) result = z[:] assert isinstance(result, NDArrayLike) assert (55, 55) == z.shape assert (55, 55) == result.shape assert np.dtype("i4") == z.dtype assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks np.testing.assert_array_equal(a[:55, :55], result) z.resize((55, 1)) result = z[:] assert isinstance(result, NDArrayLike) assert (55, 1) == z.shape assert (55, 1) == result.shape assert np.dtype("i4") == z.dtype assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks np.testing.assert_array_equal(a[:55, :1], result) z.resize((1, 55)) result = z[:] assert isinstance(result, NDArrayLike) assert (1, 55) == z.shape assert (1, 55) == result.shape assert np.dtype("i4") == z.dtype assert np.dtype("i4") == result.dtype assert (10, 10) == z.chunks np.testing.assert_array_equal(a[:1, :10], z[:, :10]) np.testing.assert_array_equal(np.zeros((1, 55 - 10), dtype="i4"), z[:, 10:55]) # via shape setter new_shape = (105, 105) z.shape = new_shape result = z[:] assert isinstance(result, NDArrayLike) assert new_shape == z.shape assert new_shape == result.shape @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_append_1d(store: MemoryStore, zarr_format: ZarrFormat) -> None: a = np.arange(105) z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format) z[:] = a assert a.shape == z.shape assert a.dtype == z.dtype assert (10,) == z.chunks np.testing.assert_array_equal(a, z[:]) b = np.arange(105, 205) e = np.append(a, b) assert z.shape == (105,) z.append(b) assert e.shape == z.shape assert e.dtype == z.dtype assert (10,) == z.chunks np.testing.assert_array_equal(e, z[:]) # check append handles array-like c = [1, 2, 3] f = np.append(e, c) z.append(c) assert f.shape == z.shape assert f.dtype == z.dtype assert (10,) == z.chunks np.testing.assert_array_equal(f, z[:]) @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_append_2d(store: MemoryStore, zarr_format: ZarrFormat) -> None: a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) z = zarr.create( shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format ) z[:] = a assert a.shape == z.shape assert a.dtype == z.dtype assert (10, 10) == z.chunks actual = z[:] np.testing.assert_array_equal(a, actual) b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105)) e = np.append(a, b, axis=0) z.append(b) assert e.shape == z.shape assert e.dtype == z.dtype assert (10, 10) == z.chunks actual = z[:] np.testing.assert_array_equal(e, actual) @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_append_2d_axis(store: MemoryStore, zarr_format: ZarrFormat) -> None: a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) z = zarr.create( shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format ) z[:] = a assert a.shape == z.shape assert a.dtype == z.dtype assert (10, 10) == z.chunks np.testing.assert_array_equal(a, z[:]) b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105)) e = np.append(a, b, axis=1) z.append(b, axis=1) assert e.shape == z.shape assert e.dtype == z.dtype assert (10, 10) == z.chunks np.testing.assert_array_equal(e, z[:]) @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_append_bad_shape(store: MemoryStore, zarr_format: ZarrFormat) -> None: a = np.arange(100) z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format) z[:] = a b = a.reshape(10, 10) with pytest.raises(ValueError): z.append(b) @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("write_empty_chunks", [True, False]) @pytest.mark.parametrize("fill_value", [0, 5]) def test_write_empty_chunks_behavior( zarr_format: ZarrFormat, store: MemoryStore, write_empty_chunks: bool, fill_value: int ) -> None: """ Check that the write_empty_chunks value of the config is applied correctly. We expect that when write_empty_chunks is True, writing chunks equal to the fill value will result in those chunks appearing in the store. When write_empty_chunks is False, writing chunks that are equal to the fill value will result in those chunks not being present in the store. In particular, they should be deleted if they were already present. """ arr = zarr.create_array( store=store, shape=(2,), zarr_format=zarr_format, dtype="i4", fill_value=fill_value, chunks=(1,), config={"write_empty_chunks": write_empty_chunks}, ) assert arr.async_array._config.write_empty_chunks == write_empty_chunks # initialize the store with some non-fill value chunks arr[:] = fill_value + 1 assert arr._nshards_initialized == arr._nshards arr[:] = fill_value if not write_empty_chunks: assert arr._nshards_initialized == 0 else: assert arr._nshards_initialized == arr._nshards @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("fill_value", [0.0, -0.0]) @pytest.mark.parametrize("dtype", ["f4", "f2"]) def test_write_empty_chunks_negative_zero( zarr_format: ZarrFormat, store: MemoryStore, fill_value: float, dtype: str ) -> None: # regression test for https://github.com/zarr-developers/zarr-python/issues/3144 arr = zarr.create_array( store=store, shape=(2,), zarr_format=zarr_format, dtype=dtype, fill_value=fill_value, chunks=(1,), config={"write_empty_chunks": False}, ) assert arr.nchunks_initialized == 0 # initialize the with the negated fill value (-0.0 for +0.0, +0.0 for -0.0) arr[:] = -fill_value assert arr.nchunks_initialized == arr.nchunks @pytest.mark.parametrize( ("fill_value", "expected"), [ (np.nan * 1j, ["NaN", "NaN"]), (np.nan, ["NaN", 0.0]), (np.inf, ["Infinity", 0.0]), (np.inf * 1j, ["NaN", "Infinity"]), (-np.inf, ["-Infinity", 0.0]), (math.inf, ["Infinity", 0.0]), ], ) async def test_special_complex_fill_values_roundtrip(fill_value: Any, expected: list[Any]) -> None: store = MemoryStore() zarr.create_array(store=store, shape=(1,), dtype=np.complex64, fill_value=fill_value) content = await store.get("zarr.json", prototype=default_buffer_prototype()) assert content is not None actual = json.loads(content.to_bytes()) assert actual["fill_value"] == expected @pytest.mark.parametrize("shape", [(1,), (2, 3), (4, 5, 6)]) @pytest.mark.parametrize("dtype", ["uint8", "float32"]) @pytest.mark.parametrize("array_type", ["async", "sync"]) async def test_nbytes( shape: tuple[int, ...], dtype: str, array_type: Literal["async", "sync"] ) -> None: """ Test that the ``nbytes`` attribute of an Array or AsyncArray correctly reports the capacity of the chunks of that array. """ store = MemoryStore() arr = zarr.create_array(store=store, shape=shape, dtype=dtype, fill_value=0) if array_type == "async": assert arr.async_array.nbytes == np.prod(arr.shape) * arr.dtype.itemsize else: assert arr.nbytes == np.prod(arr.shape) * arr.dtype.itemsize @pytest.mark.parametrize( ("array_shape", "chunk_shape", "target_shard_size_bytes", "expected_shards"), [ pytest.param( (256, 256), (32, 32), 129 * 129, (128, 128), id="2d_chunking_max_byes_does_not_evenly_divide", ), pytest.param( (256, 256), (32, 32), 64 * 64, (64, 64), id="2d_chunking_max_byes_evenly_divides" ), pytest.param( (256, 256), (64, 32), 128 * 128, (128, 64), id="2d_non_square_chunking_max_byes_evenly_divides", ), pytest.param((256,), (2,), 255, (254,), id="max_bytes_just_below_array_shape"), pytest.param((256,), (2,), 256, (256,), id="max_bytes_equal_to_array_shape"), pytest.param((256,), (2,), 16, (16,), id="max_bytes_normal_val"), pytest.param((256,), (2,), 2, (2,), id="max_bytes_same_as_chunk"), pytest.param((256,), (2,), 1, (2,), id="max_bytes_less_than_chunk"), pytest.param((256,), (2,), None, (4,), id="use_default_auto_setting"), pytest.param((4,), (2,), None, (2,), id="small_array_shape_does_not_shard"), ], ) def test_auto_partition_auto_shards( array_shape: tuple[int, ...], chunk_shape: tuple[int, ...], target_shard_size_bytes: int | None, expected_shards: tuple[int, ...], ) -> None: """ Test that automatically picking a shard size returns a tuple of 2 * the chunk shape for any axis where there are 8 or more chunks. """ dtype = np.dtype("uint8") with pytest.warns( ZarrUserWarning, match="Automatic shard shape inference is experimental and may change without notice.", ): with zarr.config.set({"array.target_shard_size_bytes": target_shard_size_bytes}): auto_shards, _ = _auto_partition( array_shape=array_shape, chunk_shape=chunk_shape, shard_shape="auto", item_size=dtype.itemsize, ) assert auto_shards == expected_shards def test_chunks_and_shards() -> None: store = StorePath(MemoryStore()) shape = (100, 100) chunks = (5, 5) shards = (10, 10) arr_v3 = zarr.create_array(store=store / "v3", shape=shape, chunks=chunks, dtype="i4") assert arr_v3.chunks == chunks assert arr_v3.shards is None arr_v3_sharding = zarr.create_array( store=store / "v3_sharding", shape=shape, chunks=chunks, shards=shards, dtype="i4", ) assert arr_v3_sharding.chunks == chunks assert arr_v3_sharding.shards == shards arr_v2 = zarr.create_array( store=store / "v2", shape=shape, chunks=chunks, zarr_format=2, dtype="i4" ) assert arr_v2.chunks == chunks assert arr_v2.shards is None @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize( ("dtype", "fill_value_expected"), [(" None: a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype) assert a.fill_value == fill_value_expected @pytest.mark.parametrize("store", ["memory"], indirect=True) class TestCreateArray: @staticmethod def test_chunks_and_shards(store: Store) -> None: spath = StorePath(store) shape = (100, 100) chunks = (5, 5) shards = (10, 10) arr_v3 = zarr.create_array(store=spath / "v3", shape=shape, chunks=chunks, dtype="i4") assert arr_v3.chunks == chunks assert arr_v3.shards is None arr_v3_sharding = zarr.create_array( store=spath / "v3_sharding", shape=shape, chunks=chunks, shards=shards, dtype="i4", ) assert arr_v3_sharding.chunks == chunks assert arr_v3_sharding.shards == shards arr_v2 = zarr.create_array( store=spath / "v2", shape=shape, chunks=chunks, zarr_format=2, dtype="i4" ) assert arr_v2.chunks == chunks assert arr_v2.shards is None @staticmethod @pytest.mark.parametrize("dtype", zdtype_examples) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None: """ Test that the fill value of an array is set to the default value for the dtype object """ a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype) if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value): assert np.isnat(dtype.default_scalar()) else: assert a.fill_value == dtype.default_scalar() @staticmethod # @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("dtype", zdtype_examples) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") def test_default_fill_value_None( dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat ) -> None: """ Test that the fill value of an array is set to the default value for an explicit None argument for Zarr Format 3, and to null for Zarr Format 2 """ a = zarr.create_array( store, shape=(5,), chunks=(5,), dtype=dtype, fill_value=None, zarr_format=zarr_format ) if zarr_format == 3: if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value): assert np.isnat(dtype.default_scalar()) else: assert a.fill_value == dtype.default_scalar() elif zarr_format == 2: assert a.fill_value is None @staticmethod @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("dtype", zdtype_examples) def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat) -> None: """ Test that the same array is produced from a ZDType instance, a numpy dtype, or a numpy string """ skip_object_dtype(dtype) a = zarr.create_array( store, name="a", shape=(5,), chunks=(5,), dtype=dtype, zarr_format=zarr_format ) b = zarr.create_array( store, name="b", shape=(5,), chunks=(5,), dtype=dtype.to_native_dtype(), zarr_format=zarr_format, ) assert a.dtype == b.dtype # Structured dtypes do not have a numpy string representation that uniquely identifies them if not isinstance(dtype, Structured): if isinstance(dtype, VariableLengthUTF8): # in numpy 2.3, StringDType().str becomes the string 'StringDType()' which numpy # does not accept as a string representation of the dtype. c = zarr.create_array( store, name="c", shape=(5,), chunks=(5,), dtype=dtype.to_native_dtype().char, zarr_format=zarr_format, ) else: c = zarr.create_array( store, name="c", shape=(5,), chunks=(5,), dtype=dtype.to_native_dtype().str, zarr_format=zarr_format, ) assert a.dtype == c.dtype @staticmethod @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("dtype", zdtype_examples) def test_dtype_roundtrip( dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat ) -> None: """ Test that creating an array, then opening it, gets the same array. """ skip_object_dtype(dtype) a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype, zarr_format=zarr_format) b = zarr.open_array(store) assert a.dtype == b.dtype @staticmethod @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("dtype", ["uint8", "float32", "U3", "S4", "V1"]) @pytest.mark.parametrize( "compressors", [ "auto", None, (), (ZstdCodec(level=3),), (ZstdCodec(level=3), GzipCodec(level=0)), ZstdCodec(level=3), {"name": "zstd", "configuration": {"level": 3}}, ({"name": "zstd", "configuration": {"level": 3}},), ], ) @pytest.mark.parametrize( "filters", [ "auto", None, (), ( TransposeCodec( order=[ 0, ] ), ), ( TransposeCodec( order=[ 0, ] ), TransposeCodec( order=[ 0, ] ), ), TransposeCodec( order=[ 0, ] ), {"name": "transpose", "configuration": {"order": [0]}}, ({"name": "transpose", "configuration": {"order": [0]}},), ], ) @pytest.mark.parametrize(("chunks", "shards"), [((6,), None), ((3,), (6,))]) async def test_v3_chunk_encoding( store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str, chunks: tuple[int, ...], shards: tuple[int, ...] | None, ) -> None: """ Test various possibilities for the compressors and filters parameter to create_array """ arr = await create_array( store=store, dtype=dtype, shape=(12,), chunks=chunks, shards=shards, zarr_format=3, filters=filters, compressors=compressors, ) filters_expected, _, compressors_expected = _parse_chunk_encoding_v3( filters=filters, compressors=compressors, serializer="auto", dtype=arr._zdtype, ) assert arr.filters == filters_expected assert arr.compressors == compressors_expected @staticmethod @pytest.mark.parametrize("name", ["v2", "default", "invalid"]) @pytest.mark.parametrize("separator", [".", "/"]) async def test_chunk_key_encoding( name: str, separator: Literal[".", "/"], zarr_format: ZarrFormat, store: MemoryStore ) -> None: chunk_key_encoding = ChunkKeyEncodingParams(name=name, separator=separator) # type: ignore[typeddict-item] error_msg = "" if name == "invalid": error_msg = r'Unknown chunk key encoding: "Chunk key encoding \'invalid\' not found in registered chunk key encodings: \[.*\]."' if zarr_format == 2 and name == "default": error_msg = "Invalid chunk key encoding. For Zarr format 2 arrays, the `name` field of the chunk key encoding must be 'v2'." if error_msg: with pytest.raises(ValueError, match=error_msg): arr = await create_array( store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=zarr_format, chunk_key_encoding=chunk_key_encoding, ) else: arr = await create_array( store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=zarr_format, chunk_key_encoding=chunk_key_encoding, ) if isinstance(arr.metadata, ArrayV2Metadata): assert arr.metadata.dimension_separator == separator @staticmethod @pytest.mark.parametrize( ("kwargs", "error_msg"), [ ({"serializer": "bytes"}, "Zarr format 2 arrays do not support `serializer`."), ({"dimension_names": ["test"]}, "Zarr format 2 arrays do not support dimension names."), ], ) async def test_create_array_invalid_v2_arguments( kwargs: dict[str, Any], error_msg: str, store: MemoryStore ) -> None: with pytest.raises(ValueError, match=re.escape(error_msg)): await zarr.api.asynchronous.create_array( store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs ) @staticmethod @pytest.mark.parametrize( ("kwargs", "error_msg"), [ ( {"dimension_names": ["test"]}, "dimension_names cannot be used for arrays with zarr_format 2.", ), ( {"chunk_key_encoding": {"name": "default", "separator": "/"}}, "chunk_key_encoding cannot be used for arrays with zarr_format 2. Use dimension_separator instead.", ), ( {"codecs": "bytes"}, "codecs cannot be used for arrays with zarr_format 2. Use filters and compressor instead.", ), ], ) async def test_create_invalid_v2_arguments( kwargs: dict[str, Any], error_msg: str, store: MemoryStore ) -> None: with pytest.raises(ValueError, match=re.escape(error_msg)): await zarr.api.asynchronous.create( store=store, dtype="uint8", shape=(10,), chunks=(1,), zarr_format=2, **kwargs ) @staticmethod @pytest.mark.parametrize( ("kwargs", "error_msg"), [ ( {"chunk_shape": (1,), "chunks": (2,)}, "Only one of chunk_shape or chunks can be provided.", ), ( {"dimension_separator": "/"}, "dimension_separator cannot be used for arrays with zarr_format 3. Use chunk_key_encoding instead.", ), ( {"filters": []}, "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead", ), ( {"compressor": "blosc"}, "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead", ), ], ) async def test_invalid_v3_arguments( kwargs: dict[str, Any], error_msg: str, store: MemoryStore ) -> None: kwargs.setdefault("chunks", (1,)) with pytest.raises(ValueError, match=re.escape(error_msg)): zarr.create(store=store, dtype="uint8", shape=(10,), zarr_format=3, **kwargs) @staticmethod @pytest.mark.parametrize("dtype", ["uint8", "float32", "str", "U10", "S10", ">M8[10s]"]) @pytest.mark.parametrize( "compressors", [ "auto", None, numcodecs.Zstd(level=3), (), (numcodecs.Zstd(level=3),), ], ) @pytest.mark.parametrize( "filters", ["auto", None, numcodecs.GZip(level=1), (numcodecs.GZip(level=1),)] ) async def test_v2_chunk_encoding( store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str ) -> None: if dtype == "str" and filters != "auto": pytest.skip("Only the auto filters are compatible with str dtype in this test.") arr: AsyncArray[ArrayV2Metadata] = await create_array( store=store, dtype=dtype, shape=(10,), zarr_format=2, compressors=compressors, filters=filters, ) filters_expected, compressor_expected = _parse_chunk_encoding_v2( filters=filters, compressor=compressors, dtype=parse_dtype(dtype, zarr_format=2) ) assert arr.metadata.zarr_format == 2 # guard for mypy assert arr.metadata.compressor == compressor_expected assert arr.metadata.filters == filters_expected # Normalize for property getters arr_compressors_expected = () if compressor_expected is None else (compressor_expected,) arr_filters_expected = () if filters_expected is None else filters_expected assert arr.compressors == arr_compressors_expected assert arr.filters == arr_filters_expected @staticmethod @pytest.mark.parametrize("dtype", [UInt8(), Float32(), VariableLengthUTF8()]) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") async def test_default_filters_compressors( store: MemoryStore, dtype: UInt8 | Float32 | VariableLengthUTF8, zarr_format: ZarrFormat ) -> None: """ Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with ``filters`` and ``compressors`` unspecified. """ arr = await create_array( store=store, dtype=dtype, # type: ignore[arg-type] shape=(10,), zarr_format=zarr_format, ) sig = inspect.signature(create_array) if zarr_format == 3: expected_filters, expected_serializer, expected_compressors = _parse_chunk_encoding_v3( compressors=sig.parameters["compressors"].default, filters=sig.parameters["filters"].default, serializer=sig.parameters["serializer"].default, dtype=dtype, # type: ignore[arg-type] ) elif zarr_format == 2: default_filters, default_compressors = _parse_chunk_encoding_v2( compressor=sig.parameters["compressors"].default, filters=sig.parameters["filters"].default, dtype=dtype, # type: ignore[arg-type] ) if default_filters is None: expected_filters = () else: expected_filters = default_filters # type: ignore[assignment] if default_compressors is None: expected_compressors = () else: expected_compressors = (default_compressors,) # type: ignore[assignment] expected_serializer = None else: raise ValueError(f"Invalid zarr_format: {zarr_format}") assert arr.filters == expected_filters assert arr.serializer == expected_serializer assert arr.compressors == expected_compressors @staticmethod async def test_v2_no_shards(store: Store) -> None: """ Test that creating a Zarr v2 array with ``shard_shape`` set to a non-None value raises an error. """ msg = re.escape( "Zarr format 2 arrays can only be created with `shard_shape` set to `None`. Got `shard_shape=(5,)` instead." ) with pytest.raises(ValueError, match=msg): _ = await create_array( store=store, dtype="uint8", shape=(10,), shards=(5,), zarr_format=2, ) @staticmethod @pytest.mark.parametrize("impl", ["sync", "async"]) async def test_with_data(impl: Literal["sync", "async"], store: Store) -> None: """ Test that we can invoke ``create_array`` with a ``data`` parameter. """ data = np.arange(10) name = "foo" arr: AnyAsyncArray | AnyArray if impl == "sync": arr = sync_api.create_array(store, name=name, data=data) stored = arr[:] elif impl == "async": arr = await create_array(store, name=name, data=data, zarr_format=3) stored = await arr._get_selection( BasicIndexer(..., shape=arr.shape, chunk_grid=arr.metadata.chunk_grid), prototype=default_buffer_prototype(), ) else: raise ValueError(f"Invalid impl: {impl}") assert np.array_equal(stored, data) @staticmethod async def test_with_data_invalid_params(store: Store) -> None: """ Test that failing to specify data AND shape / dtype results in a ValueError """ with pytest.raises(ValueError, match="shape was not specified"): await create_array(store, data=None, shape=None, dtype=None) # we catch shape=None first, so specifying a dtype should raise the same exception as before with pytest.raises(ValueError, match="shape was not specified"): await create_array(store, data=None, shape=None, dtype="uint8") with pytest.raises(ValueError, match="dtype was not specified"): await create_array(store, data=None, shape=(10, 10)) @staticmethod async def test_data_ignored_params(store: Store) -> None: """ Test that specifying data AND shape AND dtype results in a ValueError """ data = np.arange(10) with pytest.raises( ValueError, match="The data parameter was used, but the shape parameter was also used." ): await create_array(store, data=data, shape=data.shape, dtype=None, overwrite=True) # we catch shape first, so specifying a dtype should raise the same warning as before with pytest.raises( ValueError, match="The data parameter was used, but the shape parameter was also used." ): await create_array(store, data=data, shape=data.shape, dtype=data.dtype, overwrite=True) with pytest.raises( ValueError, match="The data parameter was used, but the dtype parameter was also used." ): await create_array(store, data=data, shape=None, dtype=data.dtype, overwrite=True) @staticmethod @pytest.mark.parametrize("write_empty_chunks", [True, False]) async def test_write_empty_chunks_config(write_empty_chunks: bool, store: Store) -> None: """ Test that the value of write_empty_chunks is sensitive to the global config when not set explicitly """ with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}): arr = await create_array(store, shape=(2, 2), dtype="i4") assert arr._config.write_empty_chunks == write_empty_chunks @staticmethod @pytest.mark.parametrize("path", [None, "", "/", "/foo", "foo", "foo/bar"]) async def test_name(store: Store, zarr_format: ZarrFormat, path: str | None) -> None: arr = await create_array( store, shape=(2, 2), dtype="i4", name=path, zarr_format=zarr_format ) if path is None: expected_path = "" elif path.startswith("/"): expected_path = path.lstrip("/") else: expected_path = path assert arr.path == expected_path assert arr.name == "/" + expected_path # test that implicit groups were created path_parts = expected_path.split("/") if len(path_parts) > 1: *parents, _ = ["", *accumulate(path_parts, lambda x, y: "/".join([x, y]))] # noqa: FLY002 for parent_path in parents: # this will raise if these groups were not created _ = await zarr.api.asynchronous.open_group( store=store, path=parent_path, zarr_format=zarr_format ) @staticmethod @pytest.mark.parametrize("endianness", ENDIANNESS_STR) def test_default_endianness( store: Store, zarr_format: ZarrFormat, endianness: EndiannessStr ) -> None: """ Test that that endianness is correctly set when creating an array when not specifying a serializer """ dtype = Int16(endianness=endianness) arr = zarr.create_array(store=store, shape=(1,), dtype=dtype, zarr_format=zarr_format) byte_order: str = arr[:].dtype.byteorder # type: ignore[union-attr] assert byte_order in NUMPY_ENDIANNESS_STR assert endianness_from_numpy_str(byte_order) == endianness # type: ignore[arg-type] @pytest.mark.parametrize("value", [1, 1.4, "a", b"a", np.array(1)]) @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") def test_scalar_array(value: Any, zarr_format: ZarrFormat) -> None: arr = zarr.array(value, zarr_format=zarr_format) assert arr[...] == value assert arr.shape == () assert arr.ndim == 0 assert isinstance(arr[()], NDArrayLikeOrScalar) @pytest.mark.parametrize("store", ["local"], indirect=True) @pytest.mark.parametrize("store2", ["local"], indirect=["store2"]) @pytest.mark.parametrize("src_format", [2, 3]) @pytest.mark.parametrize("new_format", [2, 3, None]) async def test_creation_from_other_zarr_format( store: Store, store2: Store, src_format: ZarrFormat, new_format: ZarrFormat | None, ) -> None: if src_format == 2: src = zarr.create( (50, 50), chunks=(10, 10), store=store, zarr_format=src_format, dimension_separator="/" ) else: src = zarr.create( (50, 50), chunks=(10, 10), store=store, zarr_format=src_format, chunk_key_encoding=("default", "."), ) src[:] = np.arange(50 * 50).reshape((50, 50)) result = zarr.from_array( store=store2, data=src, zarr_format=new_format, ) np.testing.assert_array_equal(result[:], src[:]) assert result.fill_value == src.fill_value assert result.dtype == src.dtype assert result.chunks == src.chunks expected_format = src_format if new_format is None else new_format assert result.metadata.zarr_format == expected_format if src_format == new_format: assert result.metadata == src.metadata result2 = zarr.array( data=src, store=store2, overwrite=True, zarr_format=new_format, ) np.testing.assert_array_equal(result2[:], src[:]) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=True) @pytest.mark.parametrize("store2", ["local", "memory", "zip"], indirect=["store2"]) @pytest.mark.parametrize("src_chunks", [(40, 10), (11, 50)]) @pytest.mark.parametrize("new_chunks", [(40, 10), (11, 50)]) async def test_from_array( store: Store, store2: Store, src_chunks: tuple[int, int], new_chunks: tuple[int, int], zarr_format: ZarrFormat, ) -> None: src_fill_value = 2 src_dtype = np.dtype("uint8") src_attributes = None src = zarr.create( (100, 10), chunks=src_chunks, dtype=src_dtype, store=store, fill_value=src_fill_value, attributes=src_attributes, ) src[:] = np.arange(1000).reshape((100, 10)) new_fill_value = 3 new_attributes: dict[str, JSON] = {"foo": "bar"} result = zarr.from_array( data=src, store=store2, chunks=new_chunks, fill_value=new_fill_value, attributes=new_attributes, ) np.testing.assert_array_equal(result[:], src[:]) assert result.fill_value == new_fill_value assert result.dtype == src_dtype assert result.attrs == new_attributes assert result.chunks == new_chunks @pytest.mark.parametrize("store", ["local"], indirect=True) @pytest.mark.parametrize("chunks", ["keep", "auto"]) @pytest.mark.parametrize("write_data", [True, False]) @pytest.mark.parametrize( "src", [ np.arange(1000).reshape(10, 10, 10), zarr.ones((10, 10, 10)), 5, [1, 2, 3], [[1, 2, 3], [4, 5, 6]], ], ) # add other npt.ArrayLike? async def test_from_array_arraylike( store: Store, chunks: Literal["auto", "keep"] | tuple[int, int], write_data: bool, src: AnyArray | npt.ArrayLike, ) -> None: fill_value = 42 result = zarr.from_array( store, data=src, chunks=chunks, write_data=write_data, fill_value=fill_value ) if write_data: np.testing.assert_array_equal(result[...], np.array(src)) else: np.testing.assert_array_equal(result[...], np.full_like(src, fill_value)) def test_from_array_F_order() -> None: arr = zarr.create_array(store={}, data=np.array([1]), order="F", zarr_format=2) with pytest.warns( ZarrUserWarning, match="The existing order='F' of the source Zarr format 2 array will be ignored.", ): zarr.from_array(store={}, data=arr, zarr_format=3) async def test_orthogonal_set_total_slice() -> None: """Ensure that a whole chunk overwrite does not read chunks""" store = MemoryStore() array = zarr.create_array(store, shape=(20, 20), chunks=(1, 2), dtype=int, fill_value=-1) with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError): array[0, slice(4, 10)] = np.arange(6) array = zarr.create_array( store, shape=(20, 21), chunks=(1, 2), dtype=int, fill_value=-1, overwrite=True ) with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError): array[0, :] = np.arange(21) with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError): array[:] = 1 @pytest.mark.skipif( Version(numcodecs.__version__) < Version("0.15.1"), reason="codec configuration is overwritten on older versions. GH2800", ) def test_roundtrip_numcodecs() -> None: store = MemoryStore() compressors = [ {"name": "numcodecs.shuffle", "configuration": {"elementsize": 2}}, {"name": "numcodecs.zlib", "configuration": {"level": 4}}, ] filters: list[CodecJSON_V3] = [ { "name": "numcodecs.fixedscaleoffset", "configuration": { "scale": 100.0, "offset": 0.0, "dtype": " Any: return arr[index] @pytest.mark.parametrize( "method", [ pytest.param( "fork", marks=pytest.mark.skipif( sys.platform in ("win32", "darwin"), reason="fork not supported on Windows or OSX" ), ), "spawn", pytest.param( "forkserver", marks=pytest.mark.skipif( sys.platform == "win32", reason="forkserver not supported on Windows" ), ), ], ) @pytest.mark.parametrize("store", ["local"], indirect=True) @pytest.mark.parametrize("shards", [None, (20,)]) def test_multiprocessing( store: Store, method: Literal["fork", "spawn", "forkserver"], shards: tuple[int, ...] | None ) -> None: """ Test that arrays can be pickled and indexed in child processes """ data = np.arange(100) chunks: Literal["auto"] | tuple[int, ...] if shards is None: chunks = "auto" else: chunks = (1,) arr = zarr.create_array(store=store, data=data, shards=shards, chunks=chunks) ctx = mp.get_context(method) with ctx.Pool() as pool: results = pool.starmap(_index_array, [(arr, slice(len(data)))]) assert all(np.array_equal(r, data) for r in results) def test_create_array_method_signature() -> None: """ Test that the signature of the ``AsyncGroup.create_array`` function has nearly the same signature as the ``create_array`` function. ``AsyncGroup.create_array`` should take all of the same keyword arguments as ``create_array`` except ``store``. """ base_sig = inspect.signature(create_array) meth_sig = inspect.signature(AsyncGroup.create_array) # ignore keyword arguments that are either missing or have different semantics when # create_array is invoked as a group method ignore_kwargs = {"zarr_format", "store", "name"} # TODO: make this test stronger. right now, it only checks that all the parameters in the # function signature are used in the method signature. we can be more strict and check that # the method signature uses no extra parameters. base_params = dict(filter(lambda kv: kv[0] not in ignore_kwargs, base_sig.parameters.items())) assert (set(base_params.items()) - set(meth_sig.parameters.items())) == set() async def test_sharding_coordinate_selection() -> None: store = MemoryStore() g = zarr.open_group(store, mode="w") arr = g.create_array( name="a", shape=(2, 3, 4), chunks=(1, 2, 2), overwrite=True, dtype=np.float32, shards=(2, 4, 4), ) arr[:] = np.arange(2 * 3 * 4).reshape((2, 3, 4)) result = arr[1, [0, 1]] # type: ignore[index] assert isinstance(result, NDArrayLike) assert (result == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all() @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_array_repr(store: Store) -> None: shape = (2, 3, 4) dtype = "uint8" arr = zarr.create_array(store, shape=shape, dtype=dtype) assert str(arr) == f"" class UnknownObjectDtype(UTF8Base[np.dtypes.ObjectDType]): object_codec_id = "unknown" # type: ignore[assignment] def to_native_dtype(self) -> np.dtypes.ObjectDType: """ Create a NumPy object dtype from this VariableLengthUTF8 ZDType. Returns ------- np.dtypes.ObjectDType The NumPy object dtype. """ return np.dtype("o") # type: ignore[return-value] @pytest.mark.parametrize( "dtype", [VariableLengthUTF8(), VariableLengthBytes(), UnknownObjectDtype()] ) def test_chunk_encoding_no_object_codec_errors(dtype: ZDType[Any, Any]) -> None: """ Test that a valuerror is raised when checking the chunk encoding for a v2 array with a data type that requires an object codec, but where no object codec is specified """ if isinstance(dtype, VariableLengthUTF8): codec_name = "the numcodecs.VLenUTF8 codec" elif isinstance(dtype, VariableLengthBytes): codec_name = "the numcodecs.VLenBytes codec" else: codec_name = f"an unknown object codec with id {dtype.object_codec_id!r}" # type: ignore[attr-defined] msg = ( f"Data type {dtype} requires {codec_name}, " "but no such codec was specified in the filters or compressor parameters for " "this array. " ) with pytest.raises(ValueError, match=re.escape(msg)): _parse_chunk_encoding_v2(filters=None, compressor=None, dtype=dtype) def test_unknown_object_codec_default_serializer_v3() -> None: """ Test that we get a valueerrror when trying to create the default serializer for a data type that requires an unknown object codec """ dtype = UnknownObjectDtype() msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}." with pytest.raises(ValueError, match=re.escape(msg)): default_serializer_v3(dtype) def test_unknown_object_codec_default_filters_v2() -> None: """ Test that we get a valueerrror when trying to create the default serializer for a data type that requires an unknown object codec """ dtype = UnknownObjectDtype() msg = f"Data type {dtype} requires an unknown object codec: {dtype.object_codec_id!r}." with pytest.raises(ValueError, match=re.escape(msg)): default_filters_v2(dtype) @pytest.mark.parametrize( ("array_shape", "shard_shape", "chunk_shape"), [ ((10,), None, (1,)), ((10,), (1,), (1,)), ((30, 10), None, (2, 5)), ((30, 10), (4, 10), (2, 5)), ], ) def test_chunk_grid_shape( array_shape: tuple[int, ...], shard_shape: tuple[int, ...] | None, chunk_shape: tuple[int, ...], zarr_format: ZarrFormat, ) -> None: """ Test that the shape of the chunk grid and the shard grid are correctly indicated """ if zarr_format == 2 and shard_shape is not None: with pytest.raises( ValueError, match="Zarr format 2 arrays can only be created with `shard_shape` set to `None`.", ): arr = zarr.create_array( {}, dtype="uint8", shape=array_shape, chunks=chunk_shape, shards=shard_shape, zarr_format=zarr_format, ) pytest.skip("Zarr format 2 arrays can only be created with `shard_shape` set to `None`.") else: arr = zarr.create_array( {}, dtype="uint8", shape=array_shape, chunks=chunk_shape, shards=shard_shape, zarr_format=zarr_format, ) chunk_grid_shape = tuple(ceildiv(a, b) for a, b in zip(array_shape, chunk_shape, strict=True)) if shard_shape is None: _shard_shape = chunk_shape else: _shard_shape = shard_shape shard_grid_shape = tuple(ceildiv(a, b) for a, b in zip(array_shape, _shard_shape, strict=True)) assert arr._chunk_grid_shape == chunk_grid_shape assert arr.cdata_shape == chunk_grid_shape assert arr.async_array.cdata_shape == chunk_grid_shape assert arr._shard_grid_shape == shard_grid_shape assert arr._nshards == np.prod(shard_grid_shape) @pytest.mark.parametrize( ("array_shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((30, 10), None, (2, 5))] ) def test_iter_chunk_coords( array_shape: tuple[int, ...], shard_shape: tuple[int, ...] | None, chunk_shape: tuple[int, ...], zarr_format: ZarrFormat, ) -> None: """ Test that we can use the various invocations of iter_chunk_coords to iterate over the coordinates of the origin of each chunk. """ arr = zarr.create_array( {}, dtype="uint8", shape=array_shape, chunks=chunk_shape, shards=shard_shape, zarr_format=zarr_format, ) expected = tuple(_iter_grid(arr._shard_grid_shape)) observed = tuple(_iter_chunk_coords(arr)) assert observed == expected assert observed == tuple(arr._iter_chunk_coords()) assert observed == tuple(arr.async_array._iter_chunk_coords()) @pytest.mark.parametrize( ("array_shape", "shard_shape", "chunk_shape"), [((10,), (1,), (1,)), ((10,), None, (1,)), ((30, 10), (10, 5), (2, 5))], ) def test_iter_shard_coords( array_shape: tuple[int, ...], shard_shape: tuple[int, ...] | None, chunk_shape: tuple[int, ...], zarr_format: ZarrFormat, ) -> None: """ Test that we can use the various invocations of iter_shard_coords to iterate over the coordinates of the origin of each shard. """ if zarr_format == 2 and shard_shape is not None: pytest.skip("Zarr format 2 does not support shard shape.") arr = zarr.create_array( {}, dtype="uint8", shape=array_shape, chunks=chunk_shape, shards=shard_shape, zarr_format=zarr_format, ) expected = tuple(_iter_grid(arr._shard_grid_shape)) observed = tuple(_iter_shard_coords(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_coords()) assert observed == tuple(arr.async_array._iter_shard_coords()) @pytest.mark.parametrize( ("array_shape", "shard_shape", "chunk_shape"), [((10,), (1,), (1,)), ((10,), None, (1,)), ((30, 10), (10, 5), (2, 5))], ) def test_iter_shard_keys( array_shape: tuple[int, ...], shard_shape: tuple[int, ...] | None, chunk_shape: tuple[int, ...], zarr_format: ZarrFormat, ) -> None: """ Test that we can use the various invocations of iter_shard_keys to iterate over the stored keys of the shards of an array. """ if zarr_format == 2 and shard_shape is not None: pytest.skip("Zarr format 2 does not support shard shape.") arr = zarr.create_array( {}, dtype="uint8", shape=array_shape, chunks=chunk_shape, shards=shard_shape, zarr_format=zarr_format, ) expected = tuple( arr.metadata.encode_chunk_key(key) for key in _iter_grid(arr._shard_grid_shape) ) observed = tuple(_iter_shard_keys(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_keys()) assert observed == tuple(arr.async_array._iter_shard_keys()) @pytest.mark.parametrize( ("array_shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((10,), (1,), (1,)), ((30, 10), (10, 5), (2, 5))], ) def test_iter_shard_regions( array_shape: tuple[int, ...], shard_shape: tuple[int, ...] | None, chunk_shape: tuple[int, ...], zarr_format: ZarrFormat, ) -> None: """ Test that we can use the various invocations of iter_shard_regions to iterate over the regions spanned by the shards of an array. """ if zarr_format == 2 and shard_shape is not None: pytest.skip("Zarr format 2 does not support shard shape.") arr = zarr.create_array( {}, dtype="uint8", shape=array_shape, chunks=chunk_shape, shards=shard_shape, zarr_format=zarr_format, ) if shard_shape is None: _shard_shape = chunk_shape else: _shard_shape = shard_shape expected = tuple(_iter_regions(arr.shape, _shard_shape)) observed = tuple(_iter_shard_regions(arr)) assert observed == expected assert observed == tuple(arr._iter_shard_regions()) assert observed == tuple(arr.async_array._iter_shard_regions()) @pytest.mark.parametrize( ("array_shape", "shard_shape", "chunk_shape"), [((10,), None, (1,)), ((30, 10), None, (2, 5))] ) def test_iter_chunk_regions( array_shape: tuple[int, ...], shard_shape: tuple[int, ...] | None, chunk_shape: tuple[int, ...], zarr_format: ZarrFormat, ) -> None: """ Test that we can use the various invocations of iter_chunk_regions to iterate over the regions spanned by the chunks of an array. """ arr = zarr.create_array( {}, dtype="uint8", shape=array_shape, chunks=chunk_shape, shards=shard_shape, zarr_format=zarr_format, ) expected = tuple(_iter_regions(arr.shape, chunk_shape)) observed = tuple(_iter_chunk_regions(arr)) assert observed == expected assert observed == tuple(arr._iter_chunk_regions()) assert observed == tuple(arr.async_array._iter_chunk_regions()) @pytest.mark.parametrize("num_shards", [1, 3]) @pytest.mark.parametrize("array_type", ["numpy", "zarr"]) def test_create_array_with_data_num_gets( num_shards: int, array_type: Literal["numpy", "zarr"] ) -> None: """ Test that creating an array with data only invokes a single get request per stored object """ store = LoggingStore(store=MemoryStore()) chunk_shape = (1,) shard_shape = (100,) shape = (shard_shape[0] * num_shards,) data: AnyArray | npt.NDArray[np.int64] if array_type == "numpy": data = np.zeros(shape[0], dtype="int64") else: data = zarr.zeros(shape, dtype="int64") zarr.create_array(store, data=data, chunks=chunk_shape, shards=shard_shape, fill_value=-1) # type: ignore[arg-type] # one get for the metadata and one per shard. # Note: we don't actually need one get per shard, but this is the current behavior assert store.counter["get"] == 1 + num_shards zarr-python-3.1.5/tests/test_attributes.py000066400000000000000000000053331511007055700207630ustar00rootroot00000000000000import json from typing import TYPE_CHECKING, Any import numpy as np import pytest import zarr.core import zarr.core.attributes import zarr.storage from tests.conftest import deep_nan_equal from zarr.core.common import ZarrFormat if TYPE_CHECKING: from zarr.types import AnyArray @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize( "data", [{"inf": np.inf, "-inf": -np.inf, "nan": np.nan}, {"a": 3, "c": 4}] ) def test_put(data: dict[str, Any], zarr_format: ZarrFormat) -> None: store = zarr.storage.MemoryStore() attrs = zarr.core.attributes.Attributes(zarr.Group.from_store(store, zarr_format=zarr_format)) attrs.put(data) expected = json.loads(json.dumps(data, allow_nan=True)) assert deep_nan_equal(dict(attrs), expected) def test_asdict() -> None: store = zarr.storage.MemoryStore() attrs = zarr.core.attributes.Attributes( zarr.Group.from_store(store, attributes={"a": 1, "b": 2}) ) result = attrs.asdict() assert result == {"a": 1, "b": 2} def test_update_attributes_preserves_existing() -> None: """ Test that `update_attributes` only updates the specified attributes and preserves existing ones. """ store = zarr.storage.MemoryStore() z = zarr.create(10, store=store, overwrite=True) z.attrs["a"] = [] z.attrs["b"] = 3 assert dict(z.attrs) == {"a": [], "b": 3} z.update_attributes({"a": [3, 4], "c": 4}) assert dict(z.attrs) == {"a": [3, 4], "b": 3, "c": 4} def test_update_empty_attributes() -> None: """ Ensure updating when initial attributes are empty works. """ store = zarr.storage.MemoryStore() z = zarr.create(10, store=store, overwrite=True) assert dict(z.attrs) == {} z.update_attributes({"a": [3, 4], "c": 4}) assert dict(z.attrs) == {"a": [3, 4], "c": 4} def test_update_no_changes() -> None: """ Ensure updating when no new or modified attributes does not alter existing ones. """ store = zarr.storage.MemoryStore() z = zarr.create(10, store=store, overwrite=True) z.attrs["a"] = [] z.attrs["b"] = 3 z.update_attributes({}) assert dict(z.attrs) == {"a": [], "b": 3} @pytest.mark.parametrize("group", [True, False]) def test_del_works(group: bool) -> None: store = zarr.storage.MemoryStore() z: zarr.Group | AnyArray if group: z = zarr.create_group(store) else: z = zarr.create_array(store=store, shape=10, dtype=int) assert dict(z.attrs) == {} z.update_attributes({"a": [3, 4], "c": 4}) del z.attrs["a"] assert dict(z.attrs) == {"c": 4} z2: zarr.Group | AnyArray if group: z2 = zarr.open_group(store) else: z2 = zarr.open_array(store) assert dict(z2.attrs) == {"c": 4} zarr-python-3.1.5/tests/test_buffer.py000066400000000000000000000204201511007055700200400ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Literal import numpy as np import pytest import zarr from zarr.abc.buffer import ArrayLike, BufferPrototype, NDArrayLike from zarr.buffer import cpu, gpu from zarr.codecs.blosc import BloscCodec from zarr.codecs.crc32c_ import Crc32cCodec from zarr.codecs.gzip import GzipCodec from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec from zarr.errors import ZarrUserWarning from zarr.storage import MemoryStore, StorePath from zarr.testing.buffer import ( NDBufferUsingTestNDArrayLike, StoreExpectingTestBuffer, TestBuffer, TestNDArrayLike, ) from zarr.testing.utils import gpu_mark, gpu_test, skip_if_no_gpu if TYPE_CHECKING: import types try: import cupy as cp except ImportError: cp = None import zarr.api.asynchronous if TYPE_CHECKING: import types def test_nd_array_like(xp: types.ModuleType) -> None: ary = xp.arange(10) assert isinstance(ary, ArrayLike) assert isinstance(ary, NDArrayLike) @pytest.mark.asyncio async def test_async_array_prototype() -> None: """Test the use of a custom buffer prototype""" expect = np.zeros((9, 9), dtype="uint16", order="F") a = await zarr.api.asynchronous.create_array( StorePath(StoreExpectingTestBuffer()) / "test_async_array_prototype", shape=expect.shape, chunks=(5, 5), dtype=expect.dtype, fill_value=0, ) expect[1:4, 3:6] = np.ones((3, 3)) my_prototype = BufferPrototype(buffer=TestBuffer, nd_buffer=NDBufferUsingTestNDArrayLike) await a.setitem( selection=(slice(1, 4), slice(3, 6)), value=np.ones((3, 3)), prototype=my_prototype, ) got = await a.getitem(selection=(slice(0, 9), slice(0, 9)), prototype=my_prototype) # ignoring a mypy error here that TestNDArrayLike doesn't meet the NDArrayLike protocol # The test passes, so it clearly does. assert isinstance(got, TestNDArrayLike) assert np.array_equal(expect, got) # type: ignore[unreachable] @gpu_test @pytest.mark.asyncio async def test_async_array_gpu_prototype() -> None: """Test the use of the GPU buffer prototype""" expect = cp.zeros((9, 9), dtype="uint16", order="F") a = await zarr.api.asynchronous.create_array( StorePath(MemoryStore()) / "test_async_array_gpu_prototype", shape=expect.shape, chunks=(5, 5), dtype=expect.dtype, fill_value=0, ) expect[1:4, 3:6] = cp.ones((3, 3)) await a.setitem( selection=(slice(1, 4), slice(3, 6)), value=cp.ones((3, 3)), prototype=gpu.buffer_prototype, ) got = await a.getitem(selection=(slice(0, 9), slice(0, 9)), prototype=gpu.buffer_prototype) assert isinstance(got, cp.ndarray) assert cp.array_equal(expect, got) @pytest.mark.asyncio async def test_codecs_use_of_prototype() -> None: expect = np.zeros((10, 10), dtype="uint16", order="F") a = await zarr.api.asynchronous.create_array( StorePath(StoreExpectingTestBuffer()) / "test_codecs_use_of_prototype", shape=expect.shape, chunks=(5, 5), dtype=expect.dtype, fill_value=0, compressors=[BloscCodec(), Crc32cCodec(), GzipCodec(), ZstdCodec()], filters=[TransposeCodec(order=(1, 0))], ) expect[:] = np.arange(100).reshape(10, 10) my_prototype = BufferPrototype(buffer=TestBuffer, nd_buffer=NDBufferUsingTestNDArrayLike) await a.setitem( selection=(slice(0, 10), slice(0, 10)), value=expect[:], prototype=my_prototype, ) got = await a.getitem(selection=(slice(0, 10), slice(0, 10)), prototype=my_prototype) # ignoring a mypy error here that TestNDArrayLike doesn't meet the NDArrayLike protocol # The test passes, so it clearly does. assert isinstance(got, TestNDArrayLike) assert np.array_equal(expect, got) # type: ignore[unreachable] @gpu_test @pytest.mark.asyncio async def test_codecs_use_of_gpu_prototype() -> None: expect = cp.zeros((10, 10), dtype="uint16", order="F") a = await zarr.api.asynchronous.create_array( StorePath(MemoryStore()) / "test_codecs_use_of_gpu_prototype", shape=expect.shape, chunks=(5, 5), dtype=expect.dtype, fill_value=0, compressors=[BloscCodec(), Crc32cCodec(), GzipCodec(), ZstdCodec()], filters=[TransposeCodec(order=(1, 0))], ) expect[:] = cp.arange(100).reshape(10, 10) msg = "Creating a zarr.buffer.gpu.Buffer with an array that does not support the __cuda_array_interface__ for zero-copy transfers, falling back to slow copy based path" with pytest.warns(ZarrUserWarning, match=msg): await a.setitem( selection=(slice(0, 10), slice(0, 10)), value=expect[:], prototype=gpu.buffer_prototype, ) with pytest.warns(ZarrUserWarning, match=msg): got = await a.getitem( selection=(slice(0, 10), slice(0, 10)), prototype=gpu.buffer_prototype ) assert isinstance(got, cp.ndarray) assert cp.array_equal(expect, got) @gpu_test @pytest.mark.asyncio async def test_sharding_use_of_gpu_prototype() -> None: with zarr.config.enable_gpu(): expect = cp.zeros((10, 10), dtype="uint16", order="F") a = await zarr.api.asynchronous.create_array( StorePath(MemoryStore()) / "test_codecs_use_of_gpu_prototype", shape=expect.shape, chunks=(5, 5), shards=(10, 10), dtype=expect.dtype, fill_value=0, ) expect[:] = cp.arange(100).reshape(10, 10) msg = "Creating a zarr.buffer.gpu.Buffer with an array that does not support the __cuda_array_interface__ for zero-copy transfers, falling back to slow copy based path" with pytest.warns(ZarrUserWarning, match=msg): await a.setitem( selection=(slice(0, 10), slice(0, 10)), value=expect[:], prototype=gpu.buffer_prototype, ) with pytest.warns(ZarrUserWarning, match=msg): got = await a.getitem( selection=(slice(0, 10), slice(0, 10)), prototype=gpu.buffer_prototype ) assert isinstance(got, cp.ndarray) assert cp.array_equal(expect, got) def test_numpy_buffer_prototype() -> None: buffer = cpu.buffer_prototype.buffer.create_zero_length() ndbuffer = cpu.buffer_prototype.nd_buffer.create(shape=(1, 2), dtype=np.dtype("int64")) assert isinstance(buffer.as_array_like(), np.ndarray) assert isinstance(ndbuffer.as_ndarray_like(), np.ndarray) with pytest.raises(ValueError, match="Buffer does not contain a single scalar value"): ndbuffer.as_scalar() @gpu_test def test_gpu_buffer_prototype() -> None: buffer = gpu.buffer_prototype.buffer.create_zero_length() ndbuffer = gpu.buffer_prototype.nd_buffer.create(shape=(1, 2), dtype=cp.dtype("int64")) assert isinstance(buffer.as_array_like(), cp.ndarray) assert isinstance(ndbuffer.as_ndarray_like(), cp.ndarray) with pytest.raises(ValueError, match="Buffer does not contain a single scalar value"): ndbuffer.as_scalar() # TODO: the same test for other buffer classes def test_cpu_buffer_as_scalar() -> None: buf = cpu.buffer_prototype.nd_buffer.create(shape=(), dtype="int64") assert buf.as_scalar() == buf.as_ndarray_like()[()] # type: ignore[index] @pytest.mark.parametrize( "prototype", [ cpu.buffer_prototype, pytest.param( gpu.buffer_prototype, marks=[gpu_mark, skip_if_no_gpu], ), BufferPrototype( buffer=cpu.Buffer, nd_buffer=NDBufferUsingTestNDArrayLike, ), ], ) @pytest.mark.parametrize( "shape", [ (1, 2), (1, 2, 3), ], ) @pytest.mark.parametrize("dtype", ["int32", "float64"]) @pytest.mark.parametrize("order", ["C", "F"]) def test_empty( prototype: BufferPrototype, shape: tuple[int, ...], dtype: str, order: Literal["C", "F"] ) -> None: buf = prototype.nd_buffer.empty(shape=shape, dtype=dtype, order=order) result = buf.as_ndarray_like() assert result.shape == shape assert result.dtype == dtype if order == "C": assert result.flags.c_contiguous # type: ignore[attr-defined] else: assert result.flags.f_contiguous # type: ignore[attr-defined] zarr-python-3.1.5/tests/test_chunk_grids.py000066400000000000000000000036001511007055700210700ustar00rootroot00000000000000from typing import Any import numpy as np import pytest from zarr.core.chunk_grids import _guess_chunks, normalize_chunks @pytest.mark.parametrize( "shape", [(0,), (0,) * 2, (1, 2, 0, 4, 5), (10, 0), (10,), (100,) * 3, (1000000,), (10000,) * 2] ) @pytest.mark.parametrize("itemsize", [1, 2, 4]) def test_guess_chunks(shape: tuple[int, ...], itemsize: int) -> None: chunks = _guess_chunks(shape, itemsize) chunk_size = np.prod(chunks) * itemsize assert isinstance(chunks, tuple) assert len(chunks) == len(shape) assert chunk_size < (64 * 1024 * 1024) # doesn't make any sense to allow chunks to have zero length dimension assert all(0 < c <= max(s, 1) for c, s in zip(chunks, shape, strict=False)) @pytest.mark.parametrize( ("chunks", "shape", "typesize", "expected"), [ ((10,), (100,), 1, (10,)), ([10], (100,), 1, (10,)), (10, (100,), 1, (10,)), ((10, 10), (100, 10), 1, (10, 10)), (10, (100, 10), 1, (10, 10)), ((10, None), (100, 10), 1, (10, 10)), (30, (100, 20, 10), 1, (30, 30, 30)), ((30,), (100, 20, 10), 1, (30, 20, 10)), ((30, None), (100, 20, 10), 1, (30, 20, 10)), ((30, None, None), (100, 20, 10), 1, (30, 20, 10)), ((30, 20, None), (100, 20, 10), 1, (30, 20, 10)), ((30, 20, 10), (100, 20, 10), 1, (30, 20, 10)), # auto chunking (None, (100,), 1, (100,)), (-1, (100,), 1, (100,)), ((30, -1, None), (100, 20, 10), 1, (30, 20, 10)), ], ) def test_normalize_chunks( chunks: Any, shape: tuple[int, ...], typesize: int, expected: tuple[int, ...] ) -> None: assert expected == normalize_chunks(chunks, shape, typesize) def test_normalize_chunks_errors() -> None: with pytest.raises(ValueError): normalize_chunks("foo", (100,), 1) with pytest.raises(ValueError): normalize_chunks((100, 10), (100,), 1) zarr-python-3.1.5/tests/test_cli/000077500000000000000000000000001511007055700167665ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_cli/conftest.py000066400000000000000000000104231511007055700211650ustar00rootroot00000000000000from pathlib import Path from typing import Any, Literal import pytest import zarr from zarr.abc.store import Store from zarr.core.common import ZarrFormat def create_nested_zarr( store: Store, attributes: dict[str, Any] | None = None, separator: Literal[".", "/"] = ".", zarr_format: ZarrFormat = 2, ) -> list[str]: """Create a zarr with nested groups / arrays for testing, returning the paths to all.""" if attributes is None: attributes = {"baz": 42, "qux": [1, 4, 7, 12]} # 3 levels of nested groups group_0 = zarr.create_group(store=store, zarr_format=zarr_format, attributes=attributes) group_1 = group_0.create_group(name="group_1", attributes=attributes) group_2 = group_1.create_group(name="group_2", attributes=attributes) paths = [group_0.path, group_1.path, group_2.path] # 1 array per group for i, group in enumerate([group_0, group_1, group_2]): array = group.create_array( name=f"array_{i}", shape=(10, 10), chunks=(5, 5), dtype="uint16", attributes=attributes, chunk_key_encoding={"name": "v2", "separator": separator}, ) array[:] = 1 paths.append(array.path) return paths @pytest.fixture def expected_paths() -> list[Path]: """Expected paths for create_nested_zarr, with no metadata files or chunks""" return [ Path("array_0"), Path("group_1"), Path("group_1/array_1"), Path("group_1/group_2"), Path("group_1/group_2/array_2"), ] @pytest.fixture def expected_chunks() -> list[Path]: """Expected chunks for create_nested_zarr""" return [ Path("array_0/0.0"), Path("array_0/0.1"), Path("array_0/1.0"), Path("array_0/1.1"), Path("group_1/array_1/0.0"), Path("group_1/array_1/0.1"), Path("group_1/array_1/1.0"), Path("group_1/array_1/1.1"), Path("group_1/group_2/array_2/0.0"), Path("group_1/group_2/array_2/0.1"), Path("group_1/group_2/array_2/1.0"), Path("group_1/group_2/array_2/1.1"), ] @pytest.fixture def expected_v3_metadata() -> list[Path]: """Expected v3 metadata for create_nested_zarr""" return sorted( [ Path("zarr.json"), Path("array_0/zarr.json"), Path("group_1/zarr.json"), Path("group_1/array_1/zarr.json"), Path("group_1/group_2/zarr.json"), Path("group_1/group_2/array_2/zarr.json"), ] ) @pytest.fixture def expected_v2_metadata() -> list[Path]: """Expected v2 metadata for create_nested_zarr""" return sorted( [ Path(".zgroup"), Path(".zattrs"), Path("array_0/.zarray"), Path("array_0/.zattrs"), Path("group_1/.zgroup"), Path("group_1/.zattrs"), Path("group_1/array_1/.zarray"), Path("group_1/array_1/.zattrs"), Path("group_1/group_2/.zgroup"), Path("group_1/group_2/.zattrs"), Path("group_1/group_2/array_2/.zarray"), Path("group_1/group_2/array_2/.zattrs"), ] ) @pytest.fixture def expected_paths_no_metadata( expected_paths: list[Path], expected_chunks: list[Path] ) -> list[Path]: return sorted(expected_paths + expected_chunks) @pytest.fixture def expected_paths_v3_metadata( expected_paths: list[Path], expected_chunks: list[Path], expected_v3_metadata: list[Path] ) -> list[Path]: return sorted(expected_paths + expected_chunks + expected_v3_metadata) @pytest.fixture def expected_paths_v3_metadata_no_chunks( expected_paths: list[Path], expected_v3_metadata: list[Path] ) -> list[Path]: return sorted(expected_paths + expected_v3_metadata) @pytest.fixture def expected_paths_v2_metadata( expected_paths: list[Path], expected_chunks: list[Path], expected_v2_metadata: list[Path] ) -> list[Path]: return sorted(expected_paths + expected_chunks + expected_v2_metadata) @pytest.fixture def expected_paths_v2_v3_metadata( expected_paths: list[Path], expected_chunks: list[Path], expected_v2_metadata: list[Path], expected_v3_metadata: list[Path], ) -> list[Path]: return sorted(expected_paths + expected_chunks + expected_v2_metadata + expected_v3_metadata) zarr-python-3.1.5/tests/test_cli/test_migrate_v3.py000066400000000000000000000552241511007055700224470ustar00rootroot00000000000000import lzma from pathlib import Path from typing import Literal, cast import numcodecs import numcodecs.abc import numpy as np import pytest import zarr from tests.test_cli.conftest import create_nested_zarr from zarr.abc.codec import Codec from zarr.codecs.blosc import BloscCodec from zarr.codecs.bytes import BytesCodec from zarr.codecs.gzip import GzipCodec from zarr.codecs.numcodecs import LZMA, Delta from zarr.codecs.transpose import TransposeCodec from zarr.codecs.zstd import ZstdCodec from zarr.core.chunk_grids import RegularChunkGrid from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding from zarr.core.common import JSON, ZarrFormat from zarr.core.dtype.npy.int import UInt8, UInt16 from zarr.core.group import Group, GroupMetadata from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.storage._local import LocalStore from zarr.types import AnyArray typer_testing = pytest.importorskip( "typer.testing", reason="optional cli dependencies aren't installed" ) cli = pytest.importorskip("zarr._cli.cli", reason="optional cli dependencies aren't installed") runner = typer_testing.CliRunner() NUMCODECS_USER_WARNING = "Numcodecs codecs are not in the Zarr version 3 specification and may not be supported by other zarr implementations." def test_migrate_array(local_store: LocalStore) -> None: shape = (10, 10) chunks = (10, 10) dtype = "uint16" compressors = numcodecs.Blosc(cname="zstd", clevel=3, shuffle=1) fill_value = 2 attributes = cast(dict[str, JSON], {"baz": 42, "qux": [1, 4, 7, 12]}) zarr.create_array( store=local_store, shape=shape, chunks=chunks, dtype=dtype, compressors=compressors, zarr_format=2, fill_value=fill_value, attributes=attributes, ) result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 0 assert (local_store.root / "zarr.json").exists() zarr_array = zarr.open(local_store.root, zarr_format=3) expected_metadata = ArrayV3Metadata( shape=shape, data_type=UInt16(endianness="little"), chunk_grid=RegularChunkGrid(chunk_shape=chunks), chunk_key_encoding=V2ChunkKeyEncoding(separator="."), fill_value=fill_value, codecs=( BytesCodec(endian="little"), BloscCodec(typesize=2, cname="zstd", clevel=3, shuffle="shuffle", blocksize=0), ), attributes=attributes, dimension_names=None, storage_transformers=None, ) assert zarr_array.metadata == expected_metadata def test_migrate_group(local_store: LocalStore) -> None: attributes = {"baz": 42, "qux": [1, 4, 7, 12]} zarr.create_group(store=local_store, zarr_format=2, attributes=attributes) result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 0 assert (local_store.root / "zarr.json").exists() zarr_array = zarr.open(local_store.root, zarr_format=3) expected_metadata = GroupMetadata( attributes=attributes, zarr_format=3, consolidated_metadata=None ) assert zarr_array.metadata == expected_metadata @pytest.mark.parametrize("separator", [".", "/"]) def test_migrate_nested_groups_and_arrays_in_place( local_store: LocalStore, separator: str, expected_v3_metadata: list[Path] ) -> None: """Test that zarr.json are made at the correct points in a hierarchy of groups and arrays (including when there are additional dirs due to using a / separator)""" attributes = {"baz": 42, "qux": [1, 4, 7, 12]} paths = create_nested_zarr(local_store, attributes=attributes, separator=separator) result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 0 zarr_json_paths = sorted(local_store.root.rglob("zarr.json")) expected_zarr_json_paths = [local_store.root / p for p in expected_v3_metadata] assert zarr_json_paths == expected_zarr_json_paths # Check converted zarr can be opened + metadata accessed at all levels zarr_array = zarr.open(local_store.root, zarr_format=3) for path in paths: zarr_v3 = cast(AnyArray | Group, zarr_array[path]) metadata = zarr_v3.metadata assert metadata.zarr_format == 3 assert metadata.attributes == attributes @pytest.mark.parametrize("separator", [".", "/"]) async def test_migrate_nested_groups_and_arrays_separate_location( tmp_path: Path, separator: str, expected_v2_metadata: list[Path], expected_v3_metadata: list[Path], ) -> None: """Test that zarr.json are made at the correct paths, when saving to a separate output location.""" input_zarr_path = tmp_path / "input.zarr" output_zarr_path = tmp_path / "output.zarr" local_store = await LocalStore.open(str(input_zarr_path)) create_nested_zarr(local_store, separator=separator) result = runner.invoke(cli.app, ["migrate", "v3", str(input_zarr_path), str(output_zarr_path)]) assert result.exit_code == 0 # Files in input zarr should be unchanged i.e. still v2 only zarr_json_paths = sorted(input_zarr_path.rglob("zarr.json")) assert len(zarr_json_paths) == 0 paths = [ path for path in input_zarr_path.rglob("*") if path.stem in [".zarray", ".zgroup", ".zattrs"] ] expected_paths = [input_zarr_path / p for p in expected_v2_metadata] assert sorted(paths) == expected_paths # Files in output zarr should only contain v3 metadata zarr_json_paths = sorted(output_zarr_path.rglob("zarr.json")) expected_zarr_json_paths = [output_zarr_path / p for p in expected_v3_metadata] assert zarr_json_paths == expected_zarr_json_paths def test_remove_v2_metadata_option_in_place( local_store: LocalStore, expected_paths_v3_metadata: list[Path] ) -> None: create_nested_zarr(local_store) # convert v2 metadata to v3, then remove v2 metadata result = runner.invoke( cli.app, ["migrate", "v3", str(local_store.root), "--remove-v2-metadata"] ) assert result.exit_code == 0 paths = sorted(local_store.root.rglob("*")) expected_paths = [local_store.root / p for p in expected_paths_v3_metadata] assert paths == expected_paths async def test_remove_v2_metadata_option_separate_location( tmp_path: Path, expected_paths_v2_metadata: list[Path], expected_paths_v3_metadata_no_chunks: list[Path], ) -> None: """Check that when using --remove-v2-metadata with a separate output location, no v2 metadata is removed from the input location.""" input_zarr_path = tmp_path / "input.zarr" output_zarr_path = tmp_path / "output.zarr" local_store = await LocalStore.open(str(input_zarr_path)) create_nested_zarr(local_store) result = runner.invoke( cli.app, ["migrate", "v3", str(input_zarr_path), str(output_zarr_path), "--remove-v2-metadata"], ) assert result.exit_code == 0 # input image should be unchanged paths = sorted(input_zarr_path.rglob("*")) expected_paths = [input_zarr_path / p for p in expected_paths_v2_metadata] assert paths == expected_paths # output image should be only v3 metadata paths = sorted(output_zarr_path.rglob("*")) expected_paths = [output_zarr_path / p for p in expected_paths_v3_metadata_no_chunks] assert paths == expected_paths def test_overwrite_option_in_place( local_store: LocalStore, expected_paths_v2_v3_metadata: list[Path] ) -> None: create_nested_zarr(local_store) # add v3 metadata in place result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 0 # check that v3 metadata can be overwritten with --overwrite result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root), "--overwrite"]) assert result.exit_code == 0 paths = sorted(local_store.root.rglob("*")) expected_paths = [local_store.root / p for p in expected_paths_v2_v3_metadata] assert paths == expected_paths async def test_overwrite_option_separate_location( tmp_path: Path, expected_paths_v2_metadata: list[Path], expected_paths_v3_metadata_no_chunks: list[Path], ) -> None: input_zarr_path = tmp_path / "input.zarr" output_zarr_path = tmp_path / "output.zarr" local_store = await LocalStore.open(str(input_zarr_path)) create_nested_zarr(local_store) # create v3 metadata at output_zarr_path result = runner.invoke( cli.app, ["migrate", "v3", str(input_zarr_path), str(output_zarr_path)], ) assert result.exit_code == 0 # re-run with --overwrite option result = runner.invoke( cli.app, ["migrate", "v3", str(input_zarr_path), str(output_zarr_path), "--overwrite", "--force"], ) assert result.exit_code == 0 # original image should be un-changed paths = sorted(input_zarr_path.rglob("*")) expected_paths = [input_zarr_path / p for p in expected_paths_v2_metadata] assert paths == expected_paths # output image is only v3 metadata paths = sorted(output_zarr_path.rglob("*")) expected_paths = [output_zarr_path / p for p in expected_paths_v3_metadata_no_chunks] assert paths == expected_paths @pytest.mark.parametrize("separator", [".", "/"]) def test_migrate_sub_group( local_store: LocalStore, separator: str, expected_v3_metadata: list[Path] ) -> None: """Test that only arrays/groups within group_1 are converted (+ no other files in store)""" create_nested_zarr(local_store, separator=separator) group_path = local_store.root / "group_1" result = runner.invoke(cli.app, ["migrate", "v3", str(group_path)]) assert result.exit_code == 0 zarr_json_paths = sorted(local_store.root.rglob("zarr.json")) expected_zarr_json_paths = [ local_store.root / p for p in expected_v3_metadata if group_path in (local_store.root / p).parents ] assert zarr_json_paths == expected_zarr_json_paths @pytest.mark.parametrize( ("compressor_v2", "compressor_v3"), [ ( numcodecs.Blosc(cname="zstd", clevel=3, shuffle=1), BloscCodec(typesize=2, cname="zstd", clevel=3, shuffle="shuffle", blocksize=0), ), (numcodecs.Zstd(level=3), ZstdCodec(level=3)), (numcodecs.GZip(level=3), GzipCodec(level=3)), ], ids=["blosc", "zstd", "gzip"], ) def test_migrate_compressor( local_store: LocalStore, compressor_v2: numcodecs.abc.Codec, compressor_v3: Codec ) -> None: zarr_array = zarr.create_array( store=local_store, shape=(10, 10), chunks=(10, 10), dtype="uint16", compressors=compressor_v2, zarr_format=2, fill_value=0, ) zarr_array[:] = 1 result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 0 assert (local_store.root / "zarr.json").exists() zarr_array = zarr.open_array(local_store.root, zarr_format=3) metadata = zarr_array.metadata assert metadata.zarr_format == 3 assert metadata.codecs == ( BytesCodec(endian="little"), compressor_v3, ) assert np.all(zarr_array[:] == 1) @pytest.mark.filterwarnings(f"ignore:{NUMCODECS_USER_WARNING}:UserWarning") def test_migrate_numcodecs_compressor(local_store: LocalStore) -> None: """Test migration of a numcodecs compressor without a zarr.codecs equivalent.""" lzma_settings = { "format": lzma.FORMAT_RAW, "check": -1, "preset": None, "filters": [ {"id": lzma.FILTER_DELTA, "dist": 4}, {"id": lzma.FILTER_LZMA2, "preset": 1}, ], } zarr_array = zarr.create_array( store=local_store, shape=(10, 10), chunks=(10, 10), dtype="uint16", compressors=numcodecs.LZMA.from_config(lzma_settings), zarr_format=2, fill_value=0, ) zarr_array[:] = 1 result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 0 assert (local_store.root / "zarr.json").exists() zarr_array = zarr.open_array(local_store.root, zarr_format=3) metadata = zarr_array.metadata assert metadata.zarr_format == 3 assert metadata.codecs == ( BytesCodec(endian="little"), LZMA( format=lzma_settings["format"], check=lzma_settings["check"], preset=lzma_settings["preset"], filters=lzma_settings["filters"], ), ) assert np.all(zarr_array[:] == 1) @pytest.mark.filterwarnings(f"ignore:{NUMCODECS_USER_WARNING}:UserWarning") def test_migrate_filter(local_store: LocalStore) -> None: filter_v2 = numcodecs.Delta(dtype=" None: zarr_array = zarr.create_array( store=local_store, shape=(10, 10), chunks=(10, 10), dtype="uint16", compressors=None, zarr_format=2, fill_value=0, order=order, ) zarr_array[:] = 1 result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 0 assert (local_store.root / "zarr.json").exists() zarr_array = zarr.open_array(local_store.root, zarr_format=3) metadata = zarr_array.metadata assert metadata.zarr_format == 3 assert metadata.codecs == expected_codecs assert np.all(zarr_array[:] == 1) @pytest.mark.parametrize( ("dtype", "expected_data_type", "expected_codecs"), [ ("uint8", UInt8(), (BytesCodec(endian=None),)), ("uint16", UInt16(), (BytesCodec(endian="little"),)), ], ids=["single_byte", "multi_byte"], ) def test_migrate_endian( local_store: LocalStore, dtype: str, expected_data_type: UInt8 | UInt16, expected_codecs: tuple[Codec], ) -> None: zarr_array = zarr.create_array( store=local_store, shape=(10, 10), chunks=(10, 10), dtype=dtype, compressors=None, zarr_format=2, fill_value=0, ) zarr_array[:] = 1 result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 0 assert (local_store.root / "zarr.json").exists() zarr_array = zarr.open_array(local_store.root, zarr_format=3) metadata = zarr_array.metadata assert metadata.zarr_format == 3 assert metadata.data_type == expected_data_type assert metadata.codecs == expected_codecs assert np.all(zarr_array[:] == 1) @pytest.mark.parametrize("node_type", ["array", "group"]) def test_migrate_v3(local_store: LocalStore, node_type: str) -> None: """Attempting to convert a v3 array/group should always fail""" if node_type == "array": zarr.create_array( store=local_store, shape=(10, 10), chunks=(10, 10), zarr_format=3, dtype="uint16" ) else: zarr.create_group(store=local_store, zarr_format=3) result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 1 assert isinstance(result.exception, TypeError) assert str(result.exception) == "Only arrays / groups with zarr v2 metadata can be converted" def test_migrate_consolidated_metadata(local_store: LocalStore) -> None: """Attempting to convert a group with consolidated metadata should always fail""" group = zarr.create_group(store=local_store, zarr_format=2) group.create_array(shape=(1,), name="a", dtype="uint8") zarr.consolidate_metadata(local_store) result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 1 assert isinstance(result.exception, NotImplementedError) assert str(result.exception) == "Migration of consolidated metadata isn't supported." def test_migrate_unknown_codec(local_store: LocalStore) -> None: """Attempting to convert a codec without a v3 equivalent should always fail""" zarr.create_array( store=local_store, shape=(10, 10), chunks=(10, 10), dtype="uint16", filters=[numcodecs.Categorize(labels=["a", "b"], dtype=object)], zarr_format=2, fill_value=0, ) result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 1 assert isinstance(result.exception, ValueError) assert ( str(result.exception) == "Couldn't find corresponding zarr.codecs.numcodecs codec for categorize" ) def test_migrate_incorrect_filter(local_store: LocalStore) -> None: """Attempting to convert a filter (which is the wrong type of codec) should always fail""" zarr.create_array( store=local_store, shape=(10, 10), chunks=(10, 10), dtype="uint16", filters=[numcodecs.Zstd(level=3)], zarr_format=2, fill_value=0, ) with pytest.warns(UserWarning, match=NUMCODECS_USER_WARNING): result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 1 assert isinstance(result.exception, TypeError) assert ( str(result.exception) == "Filter is not an ArrayArrayCodec" ) def test_migrate_incorrect_compressor(local_store: LocalStore) -> None: """Attempting to convert a compressor (which is the wrong type of codec) should always fail""" zarr.create_array( store=local_store, shape=(10, 10), chunks=(10, 10), dtype="uint16", compressors=numcodecs.Delta(dtype=" is not a BytesBytesCodec" ) @pytest.mark.parametrize("zarr_format", [2, 3]) def test_remove_metadata_fails_without_force( local_store: LocalStore, zarr_format: ZarrFormat ) -> None: """Test removing metadata (when no alternate metadata is present) fails without --force.""" create_nested_zarr(local_store, zarr_format=zarr_format) result = runner.invoke(cli.app, ["remove-metadata", f"v{zarr_format}", str(local_store.root)]) assert result.exit_code == 1 assert isinstance(result.exception, ValueError) assert str(result.exception).startswith(f"Cannot remove v{zarr_format} metadata at file") @pytest.mark.parametrize("zarr_format", [2, 3]) def test_remove_metadata_succeeds_with_force( local_store: LocalStore, zarr_format: ZarrFormat, expected_paths_no_metadata: list[Path] ) -> None: """Test removing metadata (when no alternate metadata is present) succeeds with --force.""" create_nested_zarr(local_store, zarr_format=zarr_format) result = runner.invoke( cli.app, ["remove-metadata", f"v{zarr_format}", str(local_store.root), "--force"] ) assert result.exit_code == 0 paths = sorted(local_store.root.rglob("*")) expected_paths = [local_store.root / p for p in expected_paths_no_metadata] assert paths == expected_paths def test_remove_metadata_sub_group( local_store: LocalStore, expected_paths_no_metadata: list[Path] ) -> None: """Test only v2 metadata within group_1 is removed and rest remains un-changed.""" create_nested_zarr(local_store) result = runner.invoke( cli.app, ["remove-metadata", "v2", str(local_store.root / "group_1"), "--force"] ) assert result.exit_code == 0 # check all metadata files inside group_1 are removed (.zattrs / .zgroup / .zarray should remain only inside the top # group) paths = sorted(local_store.root.rglob("*")) expected_paths = [local_store.root / p for p in expected_paths_no_metadata] expected_paths.append(local_store.root / ".zattrs") expected_paths.append(local_store.root / ".zgroup") expected_paths.append(local_store.root / "array_0" / ".zarray") expected_paths.append(local_store.root / "array_0" / ".zattrs") assert paths == sorted(expected_paths) @pytest.mark.parametrize( ("zarr_format", "expected_output_paths"), [("v2", "expected_paths_v3_metadata"), ("v3", "expected_paths_v2_metadata")], ) def test_remove_metadata_after_conversion( local_store: LocalStore, request: pytest.FixtureRequest, zarr_format: str, expected_output_paths: str, ) -> None: """Test all v2/v3 metadata can be removed after metadata conversion (all groups / arrays / metadata of other versions should remain as-is)""" create_nested_zarr(local_store) # convert v2 metadata to v3 (so now both v2 and v3 metadata present!), then remove either the v2 or v3 metadata result = runner.invoke(cli.app, ["migrate", "v3", str(local_store.root)]) assert result.exit_code == 0 result = runner.invoke(cli.app, ["remove-metadata", zarr_format, str(local_store.root)]) assert result.exit_code == 0 paths = sorted(local_store.root.rglob("*")) expected_paths = request.getfixturevalue(expected_output_paths) expected_paths = [local_store.root / p for p in expected_paths] assert paths == expected_paths @pytest.mark.parametrize("cli_command", ["migrate", "remove-metadata"]) def test_dry_run( local_store: LocalStore, cli_command: str, expected_paths_v2_metadata: list[Path] ) -> None: """Test that all files are un-changed after a dry run""" create_nested_zarr(local_store) if cli_command == "migrate": result = runner.invoke( cli.app, ["migrate", "v3", str(local_store.root), "--overwrite", "--force", "--dry-run"] ) else: result = runner.invoke( cli.app, ["remove-metadata", "v2", str(local_store.root), "--force", "--dry-run"] ) assert result.exit_code == 0 paths = sorted(local_store.root.rglob("*")) expected_paths = [local_store.root / p for p in expected_paths_v2_metadata] assert paths == expected_paths zarr-python-3.1.5/tests/test_codec_entrypoints.py000066400000000000000000000023011511007055700223200ustar00rootroot00000000000000import pytest import zarr.registry from zarr import config @pytest.mark.usefixtures("set_path") @pytest.mark.parametrize("codec_name", ["TestEntrypointCodec", "TestEntrypointGroup.Codec"]) def test_entrypoint_codec(codec_name: str) -> None: config.set({"codecs.test": "package_with_entrypoint." + codec_name}) cls_test = zarr.registry.get_codec_class("test") assert cls_test.__qualname__ == codec_name @pytest.mark.usefixtures("set_path") def test_entrypoint_pipeline() -> None: config.set({"codec_pipeline.path": "package_with_entrypoint.TestEntrypointCodecPipeline"}) cls = zarr.registry.get_pipeline_class() assert cls.__name__ == "TestEntrypointCodecPipeline" @pytest.mark.usefixtures("set_path") @pytest.mark.parametrize("buffer_name", ["TestEntrypointBuffer", "TestEntrypointGroup.Buffer"]) def test_entrypoint_buffer(buffer_name: str) -> None: config.set( { "buffer": "package_with_entrypoint." + buffer_name, "ndbuffer": "package_with_entrypoint.TestEntrypointNDBuffer", } ) assert zarr.registry.get_buffer_class().__qualname__ == buffer_name assert zarr.registry.get_ndbuffer_class().__name__ == "TestEntrypointNDBuffer" zarr-python-3.1.5/tests/test_codecs/000077500000000000000000000000001511007055700174575ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_codecs/__init__.py000066400000000000000000000000001511007055700215560ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_codecs/test_blosc.py000066400000000000000000000075101511007055700221750ustar00rootroot00000000000000import json import numcodecs import numpy as np import pytest from packaging.version import Version import zarr from zarr.codecs import BloscCodec from zarr.codecs.blosc import BloscShuffle, Shuffle from zarr.core.array_spec import ArraySpec from zarr.core.buffer import default_buffer_prototype from zarr.core.dtype import UInt16 from zarr.storage import MemoryStore, StorePath @pytest.mark.parametrize("dtype", ["uint8", "uint16"]) async def test_blosc_evolve(dtype: str) -> None: typesize = np.dtype(dtype).itemsize path = "blosc_evolve" store = MemoryStore() spath = StorePath(store, path) zarr.create_array( spath, shape=(16, 16), chunks=(16, 16), dtype=dtype, fill_value=0, compressors=BloscCodec(), ) buf = await store.get(f"{path}/zarr.json", prototype=default_buffer_prototype()) assert buf is not None zarr_json = json.loads(buf.to_bytes()) blosc_configuration_json = zarr_json["codecs"][1]["configuration"] assert blosc_configuration_json["typesize"] == typesize if typesize == 1: assert blosc_configuration_json["shuffle"] == "bitshuffle" else: assert blosc_configuration_json["shuffle"] == "shuffle" path2 = "blosc_evolve_sharding" spath2 = StorePath(store, path2) zarr.create_array( spath2, shape=(16, 16), chunks=(16, 16), shards=(16, 16), dtype=dtype, fill_value=0, compressors=BloscCodec(), ) buf = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype()) assert buf is not None zarr_json = json.loads(buf.to_bytes()) blosc_configuration_json = zarr_json["codecs"][0]["configuration"]["codecs"][1]["configuration"] assert blosc_configuration_json["typesize"] == typesize if typesize == 1: assert blosc_configuration_json["shuffle"] == "bitshuffle" else: assert blosc_configuration_json["shuffle"] == "shuffle" @pytest.mark.parametrize("shuffle", [None, "bitshuffle", BloscShuffle.shuffle]) @pytest.mark.parametrize("typesize", [None, 1, 2]) def test_tunable_attrs_param(shuffle: None | Shuffle | BloscShuffle, typesize: None | int) -> None: """ Test that the tunable_attrs parameter is set as expected when creating a BloscCodec, """ codec = BloscCodec(typesize=typesize, shuffle=shuffle) if shuffle is None: assert codec.shuffle == BloscShuffle.bitshuffle # default shuffle assert "shuffle" in codec._tunable_attrs if typesize is None: assert codec.typesize == 1 # default typesize assert "typesize" in codec._tunable_attrs new_dtype = UInt16() array_spec = ArraySpec( shape=(1,), dtype=new_dtype, fill_value=1, prototype=default_buffer_prototype(), config={}, # type: ignore[arg-type] ) evolved_codec = codec.evolve_from_array_spec(array_spec=array_spec) if typesize is None: assert evolved_codec.typesize == new_dtype.item_size else: assert evolved_codec.typesize == codec.typesize if shuffle is None: assert evolved_codec.shuffle == BloscShuffle.shuffle else: assert evolved_codec.shuffle == codec.shuffle async def test_typesize() -> None: a = np.arange(1000000, dtype=np.uint64) codecs = [zarr.codecs.BytesCodec(), zarr.codecs.BloscCodec()] z = zarr.array(a, chunks=(10000), codecs=codecs) data = await z.store.get("c/0", prototype=default_buffer_prototype()) assert data is not None bytes = data.to_bytes() size = len(bytes) msg = f"Blosc size mismatch. First 10 bytes: {bytes[:20]!r} and last 10 bytes: {bytes[-20:]!r}" if Version(numcodecs.__version__) >= Version("0.16.0"): expected_size = 402 assert size == expected_size, msg else: expected_size = 10216 assert size == expected_size, msg zarr-python-3.1.5/tests/test_codecs/test_codecs.py000066400000000000000000000263331511007055700223370ustar00rootroot00000000000000from __future__ import annotations import json from dataclasses import dataclass from typing import TYPE_CHECKING, Any import numpy as np import pytest import zarr import zarr.api import zarr.api.asynchronous from zarr import Array, AsyncArray, config from zarr.codecs import ( BytesCodec, GzipCodec, ShardingCodec, TransposeCodec, ) from zarr.core.buffer import default_buffer_prototype from zarr.core.indexing import BasicSelection, morton_order_iter from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.dtype import UInt8 from zarr.errors import ZarrUserWarning from zarr.storage import StorePath if TYPE_CHECKING: from zarr.abc.codec import Codec from zarr.abc.store import Store from zarr.core.buffer.core import NDArrayLikeOrScalar from zarr.core.common import MemoryOrder from zarr.types import AnyAsyncArray @dataclass(frozen=True) class _AsyncArrayProxy: array: AnyAsyncArray def __getitem__(self, selection: BasicSelection) -> _AsyncArraySelectionProxy: return _AsyncArraySelectionProxy(self.array, selection) @dataclass(frozen=True) class _AsyncArraySelectionProxy: array: AnyAsyncArray selection: BasicSelection async def get(self) -> NDArrayLikeOrScalar: return await self.array.getitem(self.selection) async def set(self, value: np.ndarray[Any, Any]) -> None: return await self.array.setitem(self.selection, value) def order_from_dim(order: MemoryOrder, ndim: int) -> tuple[int, ...]: if order == "F": return tuple(ndim - x - 1 for x in range(ndim)) else: return tuple(range(ndim)) def test_sharding_pickle() -> None: """ Test that sharding codecs can be pickled """ @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("input_order", ["F", "C"]) @pytest.mark.parametrize("store_order", ["F", "C"]) @pytest.mark.parametrize("runtime_write_order", ["F", "C"]) @pytest.mark.parametrize("runtime_read_order", ["F", "C"]) @pytest.mark.parametrize("with_sharding", [True, False]) async def test_order( store: Store, input_order: MemoryOrder, store_order: MemoryOrder, runtime_write_order: MemoryOrder, runtime_read_order: MemoryOrder, with_sharding: bool, ) -> None: data = np.arange(0, 256, dtype="uint16").reshape((32, 8), order=input_order) path = "order" spath = StorePath(store, path=path) a = await zarr.api.asynchronous.create_array( spath, shape=data.shape, chunks=(16, 8) if with_sharding else (32, 8), shards=(32, 8) if with_sharding else None, dtype=data.dtype, fill_value=0, chunk_key_encoding={"name": "v2", "separator": "."}, filters=[TransposeCodec(order=order_from_dim(store_order, data.ndim))], config={"order": runtime_write_order}, ) await _AsyncArrayProxy(a)[:, :].set(data) read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) with config.set({"array.order": runtime_read_order}): a = await AsyncArray.open( spath, ) read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) assert isinstance(read_data, np.ndarray) if runtime_read_order == "F": assert read_data.flags["F_CONTIGUOUS"] assert not read_data.flags["C_CONTIGUOUS"] else: assert not read_data.flags["F_CONTIGUOUS"] assert read_data.flags["C_CONTIGUOUS"] @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("input_order", ["F", "C"]) @pytest.mark.parametrize("runtime_write_order", ["F", "C"]) @pytest.mark.parametrize("runtime_read_order", ["F", "C"]) @pytest.mark.parametrize("with_sharding", [True, False]) def test_order_implicit( store: Store, input_order: MemoryOrder, runtime_write_order: MemoryOrder, runtime_read_order: MemoryOrder, with_sharding: bool, ) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16), order=input_order) path = "order_implicit" spath = StorePath(store, path) with config.set({"array.order": runtime_write_order}): a = zarr.create_array( spath, shape=data.shape, chunks=(8, 8) if with_sharding else (16, 16), shards=(16, 16) if with_sharding else None, dtype=data.dtype, fill_value=0, ) a[:, :] = data with config.set({"array.order": runtime_read_order}): a = Array.open(spath) read_data = a[:, :] assert np.array_equal(data, read_data) assert isinstance(read_data, np.ndarray) if runtime_read_order == "F": assert read_data.flags["F_CONTIGUOUS"] assert not read_data.flags["C_CONTIGUOUS"] else: assert not read_data.flags["F_CONTIGUOUS"] assert read_data.flags["C_CONTIGUOUS"] @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_open(store: Store) -> None: spath = StorePath(store) a = zarr.create_array( spath, shape=(16, 16), chunks=(16, 16), dtype="int32", fill_value=0, ) b = Array.open(spath) assert a.metadata == b.metadata def test_morton() -> None: assert list(morton_order_iter((2, 2))) == [(0, 0), (1, 0), (0, 1), (1, 1)] assert list(morton_order_iter((2, 2, 2))) == [ (0, 0, 0), (1, 0, 0), (0, 1, 0), (1, 1, 0), (0, 0, 1), (1, 0, 1), (0, 1, 1), (1, 1, 1), ] assert list(morton_order_iter((2, 2, 2, 2))) == [ (0, 0, 0, 0), (1, 0, 0, 0), (0, 1, 0, 0), (1, 1, 0, 0), (0, 0, 1, 0), (1, 0, 1, 0), (0, 1, 1, 0), (1, 1, 1, 0), (0, 0, 0, 1), (1, 0, 0, 1), (0, 1, 0, 1), (1, 1, 0, 1), (0, 0, 1, 1), (1, 0, 1, 1), (0, 1, 1, 1), (1, 1, 1, 1), ] @pytest.mark.parametrize( "shape", [ [2, 2, 2], [5, 2], [2, 5], [2, 9, 2], [3, 2, 12], [2, 5, 1], [4, 3, 6, 2, 7], [3, 2, 1, 6, 4, 5, 2], ], ) def test_morton2(shape: tuple[int, ...]) -> None: order = list(morton_order_iter(shape)) for i, x in enumerate(order): assert x not in order[:i] # no duplicates assert all(x[j] < shape[j] for j in range(len(shape))) # all indices are within bounds @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_write_partial_chunks(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) spath = StorePath(store) a = zarr.create_array( spath, shape=data.shape, chunks=(20, 20), dtype=data.dtype, fill_value=1, ) a[0:16, 0:16] = data assert np.array_equal(a[0:16, 0:16], data) @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) async def test_delete_empty_chunks(store: Store) -> None: data = np.ones((16, 16)) path = "delete_empty_chunks" spath = StorePath(store, path) a = await zarr.api.asynchronous.create_array( spath, shape=data.shape, chunks=(32, 32), dtype=data.dtype, fill_value=1, ) await _AsyncArrayProxy(a)[:16, :16].set(np.zeros((16, 16))) await _AsyncArrayProxy(a)[:16, :16].set(data) assert np.array_equal(await _AsyncArrayProxy(a)[:16, :16].get(), data) assert await store.get(f"{path}/c0/0", prototype=default_buffer_prototype()) is None @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) async def test_dimension_names(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) path = "dimension_names" spath = StorePath(store, path) await zarr.api.asynchronous.create_array( spath, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, dimension_names=("x", "y"), ) assert isinstance( meta := (await zarr.api.asynchronous.open_array(store=spath)).metadata, ArrayV3Metadata ) assert meta.dimension_names == ( "x", "y", ) path2 = "dimension_names2" spath2 = StorePath(store, path2) await zarr.api.asynchronous.create_array( spath2, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, ) assert isinstance(meta := (await AsyncArray.open(spath2)).metadata, ArrayV3Metadata) assert meta.dimension_names is None zarr_json_buffer = await store.get(f"{path2}/zarr.json", prototype=default_buffer_prototype()) assert zarr_json_buffer is not None assert "dimension_names" not in json.loads(zarr_json_buffer.to_bytes()) @pytest.mark.parametrize( "codecs", [ (BytesCodec(), TransposeCodec(order=order_from_dim("F", 2))), (TransposeCodec(order=order_from_dim("F", 2)),), ], ) def test_invalid_metadata(codecs: tuple[Codec, ...]) -> None: shape = (16,) chunks = (16,) data_type = UInt8() with pytest.raises(ValueError, match="The `order` tuple must have as many entries"): ArrayV3Metadata( shape=shape, chunk_grid={"name": "regular", "configuration": {"chunk_shape": chunks}}, chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}}, fill_value=0, data_type=data_type, codecs=codecs, attributes={}, dimension_names=None, ) def test_invalid_metadata_create_array() -> None: with pytest.warns( ZarrUserWarning, match="codec disables partial reads and writes, which may lead to inefficient performance", ): zarr.create_array( {}, shape=(16, 16), chunks=(16, 16), dtype=np.dtype("uint8"), fill_value=0, serializer=ShardingCodec(chunk_shape=(8, 8)), compressors=[ GzipCodec(), ], ) @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) async def test_resize(store: Store) -> None: data = np.zeros((16, 18), dtype="uint16") path = "resize" spath = StorePath(store, path) a = await zarr.api.asynchronous.create_array( spath, shape=data.shape, chunks=(10, 10), dtype=data.dtype, chunk_key_encoding={"name": "v2", "separator": "."}, fill_value=1, ) await _AsyncArrayProxy(a)[:16, :18].set(data) assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is not None await a.resize((10, 12)) assert a.metadata.shape == (10, 12) assert a.shape == (10, 12) assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None assert await store.get(f"{path}/1.1", prototype=default_buffer_prototype()) is None zarr-python-3.1.5/tests/test_codecs/test_endian.py000066400000000000000000000040231511007055700223250ustar00rootroot00000000000000from typing import Literal import numpy as np import pytest import zarr from zarr.abc.store import Store from zarr.codecs import BytesCodec from zarr.storage import StorePath from .test_codecs import _AsyncArrayProxy @pytest.mark.filterwarnings("ignore:The endianness of the requested serializer") @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("endian", ["big", "little"]) async def test_endian(store: Store, endian: Literal["big", "little"]) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) path = "endian" spath = StorePath(store, path) a = await zarr.api.asynchronous.create_array( spath, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, chunk_key_encoding={"name": "v2", "separator": "."}, serializer=BytesCodec(endian=endian), ) await _AsyncArrayProxy(a)[:, :].set(data) readback_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, readback_data) @pytest.mark.filterwarnings("ignore:The endianness of the requested serializer") @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("dtype_input_endian", [">u2", "u2", " None: data = np.arange(0, 256, dtype=dtype_input_endian).reshape((16, 16)) path = "endian" spath = StorePath(store, path) a = await zarr.api.asynchronous.create_array( spath, shape=data.shape, chunks=(16, 16), dtype="uint16", fill_value=0, chunk_key_encoding={"name": "v2", "separator": "."}, serializer=BytesCodec(endian=dtype_store_endian), ) await _AsyncArrayProxy(a)[:, :].set(data) readback_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, readback_data) zarr-python-3.1.5/tests/test_codecs/test_gzip.py000066400000000000000000000011021511007055700220330ustar00rootroot00000000000000import numpy as np import pytest import zarr from zarr.abc.store import Store from zarr.codecs import GzipCodec from zarr.storage import StorePath @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_gzip(store: Store) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) a = zarr.create_array( StorePath(store), shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, compressors=GzipCodec(), ) a[:, :] = data assert np.array_equal(data, a[:, :]) zarr-python-3.1.5/tests/test_codecs/test_numcodecs.py000066400000000000000000000273321511007055700230570ustar00rootroot00000000000000from __future__ import annotations import contextlib import pickle from typing import TYPE_CHECKING, Any import numpy as np import pytest from numcodecs import GZip try: from numcodecs.errors import UnknownCodecError except ImportError: # Older versions of numcodecs don't have a separate errors module UnknownCodecError = ValueError from zarr import config, create_array, open_array from zarr.abc.numcodec import _is_numcodec, _is_numcodec_cls from zarr.codecs import numcodecs as _numcodecs from zarr.errors import ZarrUserWarning from zarr.registry import get_codec_class, get_numcodec if TYPE_CHECKING: from collections.abc import Iterator @contextlib.contextmanager def codec_conf() -> Iterator[Any]: base_conf = config.get("codecs") new_conf = { "numcodecs.bz2": "zarr.codecs.numcodecs.BZ2", "numcodecs.crc32": "zarr.codecs.numcodecs.CRC32", "numcodecs.crc32c": "zarr.codecs.numcodecs.CRC32C", "numcodecs.lz4": "zarr.codecs.numcodecs.LZ4", "numcodecs.lzma": "zarr.codecs.numcodecs.LZMA", "numcodecs.zfpy": "zarr.codecs.numcodecs.ZFPY", "numcodecs.adler32": "zarr.codecs.numcodecs.Adler32", "numcodecs.astype": "zarr.codecs.numcodecs.AsType", "numcodecs.bitround": "zarr.codecs.numcodecs.BitRound", "numcodecs.blosc": "zarr.codecs.numcodecs.Blosc", "numcodecs.delta": "zarr.codecs.numcodecs.Delta", "numcodecs.fixedscaleoffset": "zarr.codecs.numcodecs.FixedScaleOffset", "numcodecs.fletcher32": "zarr.codecs.numcodecs.Fletcher32", "numcodecs.gzip": "zarr.codecs.numcodecs.GZip", "numcodecs.jenkinslookup3": "zarr.codecs.numcodecs.JenkinsLookup3", "numcodecs.pcodec": "zarr.codecs.numcodecs.PCodec", "numcodecs.packbits": "zarr.codecs.numcodecs.PackBits", "numcodecs.shuffle": "zarr.codecs.numcodecs.Shuffle", "numcodecs.quantize": "zarr.codecs.numcodecs.Quantize", "numcodecs.zlib": "zarr.codecs.numcodecs.Zlib", "numcodecs.zstd": "zarr.codecs.numcodecs.Zstd", } yield config.set({"codecs": new_conf | base_conf}) if TYPE_CHECKING: from zarr.core.common import JSON def test_get_numcodec() -> None: assert get_numcodec({"id": "gzip", "level": 2}) == GZip(level=2) # type: ignore[typeddict-unknown-key] def test_is_numcodec() -> None: """ Test the _is_numcodec function """ assert _is_numcodec(GZip()) def test_is_numcodec_cls() -> None: """ Test the _is_numcodec_cls function """ assert _is_numcodec_cls(GZip) EXPECTED_WARNING_STR = "Numcodecs codecs are not in the Zarr version 3.*" ALL_CODECS = tuple( filter( lambda v: issubclass(v, _numcodecs._NumcodecsCodec) and hasattr(v, "codec_name"), tuple(getattr(_numcodecs, cls_name) for cls_name in _numcodecs.__all__), ) ) @pytest.mark.parametrize("codec_cls", ALL_CODECS) def test_get_codec_class(codec_cls: type[_numcodecs._NumcodecsCodec]) -> None: assert get_codec_class(codec_cls.codec_name) == codec_cls # type: ignore[comparison-overlap] @pytest.mark.parametrize("codec_class", ALL_CODECS) def test_docstring(codec_class: type[_numcodecs._NumcodecsCodec]) -> None: """ Test that the docstring for the zarr.numcodecs codecs references the wrapped numcodecs class. """ assert "See [numcodecs." in codec_class.__doc__ # type: ignore[operator] @pytest.mark.parametrize( "codec_class", [ _numcodecs.Blosc, _numcodecs.LZ4, _numcodecs.Zstd, _numcodecs.Zlib, _numcodecs.GZip, _numcodecs.BZ2, _numcodecs.LZMA, _numcodecs.Shuffle, ], ) def test_generic_compressor(codec_class: type[_numcodecs._NumcodecsBytesBytesCodec]) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): a = create_array( {}, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, compressors=[codec_class()], ) a[:, :] = data.copy() np.testing.assert_array_equal(data, a[:, :]) @pytest.mark.parametrize( ("codec_class", "codec_config"), [ (_numcodecs.Delta, {"dtype": "float32"}), (_numcodecs.FixedScaleOffset, {"offset": 0, "scale": 25.5}), (_numcodecs.FixedScaleOffset, {"offset": 0, "scale": 51, "astype": "uint16"}), (_numcodecs.AsType, {"encode_dtype": "float32", "decode_dtype": "float32"}), ], ids=[ "delta", "fixedscaleoffset", "fixedscaleoffset2", "astype", ], ) def test_generic_filter( codec_class: type[_numcodecs._NumcodecsArrayArrayCodec], codec_config: dict[str, JSON], ) -> None: data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): a = create_array( {}, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, filters=[ codec_class(**codec_config), ], ) a[:, :] = data.copy() with codec_conf(): with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): b = open_array(a.store, mode="r") np.testing.assert_array_equal(data, b[:, :]) def test_generic_filter_bitround() -> None: data = np.linspace(0, 1, 256, dtype="float32").reshape((16, 16)) with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): a = create_array( {}, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, filters=[_numcodecs.BitRound(keepbits=3)], ) a[:, :] = data.copy() with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): b = open_array(a.store, mode="r") assert np.allclose(data, b[:, :], atol=0.1) def test_generic_filter_quantize() -> None: data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): a = create_array( {}, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, filters=[_numcodecs.Quantize(digits=3)], ) a[:, :] = data.copy() with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): b = open_array(a.store, mode="r") assert np.allclose(data, b[:, :], atol=0.001) def test_generic_filter_packbits() -> None: data = np.zeros((16, 16), dtype="bool") data[0:4, :] = True with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): a = create_array( {}, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, filters=[_numcodecs.PackBits()], ) a[:, :] = data.copy() with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): b = open_array(a.store, mode="r") np.testing.assert_array_equal(data, b[:, :]) with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): with pytest.raises(ValueError, match=".*requires bool dtype.*"): create_array( {}, shape=data.shape, chunks=(16, 16), dtype="uint32", fill_value=0, filters=[_numcodecs.PackBits()], ) @pytest.mark.parametrize( "codec_class", [ _numcodecs.CRC32, _numcodecs.CRC32C, _numcodecs.Adler32, _numcodecs.Fletcher32, _numcodecs.JenkinsLookup3, ], ) def test_generic_checksum(codec_class: type[_numcodecs._NumcodecsBytesBytesCodec]) -> None: # Check if the codec is available in numcodecs try: with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): codec_class()._codec # noqa: B018 except UnknownCodecError as e: # pragma: no cover pytest.skip(f"{codec_class.codec_name} is not available in numcodecs: {e}") data = np.linspace(0, 10, 256, dtype="float32").reshape((16, 16)) with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): a = create_array( {}, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, compressors=[codec_class()], ) a[:, :] = data.copy() with codec_conf(): with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): b = open_array(a.store, mode="r") np.testing.assert_array_equal(data, b[:, :]) @pytest.mark.parametrize("codec_class", [_numcodecs.PCodec, _numcodecs.ZFPY]) def test_generic_bytes_codec(codec_class: type[_numcodecs._NumcodecsArrayBytesCodec]) -> None: try: with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): codec_class()._codec # noqa: B018 except ValueError as e: # pragma: no cover if "codec not available" in str(e): pytest.xfail(f"{codec_class.codec_name} is not available: {e}") else: raise except ImportError as e: # pragma: no cover pytest.xfail(f"{codec_class.codec_name} is not available: {e}") data = np.arange(0, 256, dtype="float32").reshape((16, 16)) with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): a = create_array( {}, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, serializer=codec_class(), ) a[:, :] = data.copy() np.testing.assert_array_equal(data, a[:, :]) def test_delta_astype() -> None: data = np.linspace(0, 10, 256, dtype="i8").reshape((16, 16)) with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): a = create_array( {}, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, filters=[ _numcodecs.Delta(dtype="i8", astype="i2"), ], ) a[:, :] = data.copy() with codec_conf(): with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): b = open_array(a.store, mode="r") np.testing.assert_array_equal(data, b[:, :]) def test_repr() -> None: with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): codec = _numcodecs.LZ4(level=5) assert repr(codec) == "LZ4(codec_name='numcodecs.lz4', codec_config={'level': 5})" def test_to_dict() -> None: with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): codec = _numcodecs.LZ4(level=5) assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}} @pytest.mark.parametrize( "codec_cls", [ _numcodecs.Blosc, _numcodecs.LZ4, _numcodecs.Zstd, _numcodecs.Zlib, _numcodecs.GZip, _numcodecs.BZ2, _numcodecs.LZMA, _numcodecs.Shuffle, _numcodecs.BitRound, _numcodecs.Delta, _numcodecs.FixedScaleOffset, _numcodecs.Quantize, _numcodecs.PackBits, _numcodecs.AsType, _numcodecs.CRC32, _numcodecs.CRC32C, _numcodecs.Adler32, _numcodecs.Fletcher32, _numcodecs.JenkinsLookup3, _numcodecs.PCodec, _numcodecs.ZFPY, ], ) def test_codecs_pickleable(codec_cls: type[_numcodecs._NumcodecsCodec]) -> None: # Check if the codec is available in numcodecs try: with pytest.warns(ZarrUserWarning, match=EXPECTED_WARNING_STR): codec = codec_cls() except UnknownCodecError as e: # pragma: no cover pytest.skip(f"{codec_cls.codec_name} is not available in numcodecs: {e}") expected = codec p = pickle.dumps(codec) actual = pickle.loads(p) assert actual == expected zarr-python-3.1.5/tests/test_codecs/test_sharding.py000066400000000000000000000367361511007055700227060ustar00rootroot00000000000000import pickle import re from typing import Any import numpy as np import numpy.typing as npt import pytest import zarr import zarr.api import zarr.api.asynchronous from zarr import Array from zarr.abc.store import Store from zarr.codecs import ( BloscCodec, ShardingCodec, ShardingCodecIndexLocation, TransposeCodec, ) from zarr.core.buffer import NDArrayLike, default_buffer_prototype from zarr.errors import ZarrUserWarning from zarr.storage import StorePath, ZipStore from ..conftest import ArrayRequest from .test_codecs import _AsyncArrayProxy, order_from_dim @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize( "array_fixture", [ ArrayRequest(shape=(128,) * 1, dtype="uint8", order="C"), ArrayRequest(shape=(128,) * 2, dtype="uint8", order="C"), ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), ], indirect=["array_fixture"], ) @pytest.mark.parametrize("offset", [0, 10]) def test_sharding( store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation, offset: int, ) -> None: """ Test that we can create an array with a sharding codec, write data to that array, and get the same data out via indexing. """ data = array_fixture spath = StorePath(store) arr = zarr.create_array( spath, shape=tuple(s + offset for s in data.shape), chunks=(32,) * data.ndim, shards={"shape": (64,) * data.ndim, "index_location": index_location}, dtype=data.dtype, fill_value=6, filters=[TransposeCodec(order=order_from_dim("F", data.ndim))], compressors=BloscCodec(cname="lz4"), ) write_region = tuple(slice(offset, None) for dim in range(data.ndim)) arr[write_region] = data if offset > 0: empty_region = tuple(slice(0, offset) for dim in range(data.ndim)) assert np.all(arr[empty_region] == arr.metadata.fill_value) read_data = arr[write_region] assert isinstance(read_data, NDArrayLike) assert data.shape == read_data.shape assert np.array_equal(data, read_data) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize("offset", [0, 10]) def test_sharding_scalar( store: Store, index_location: ShardingCodecIndexLocation, offset: int, ) -> None: """ Test that we can create an array with a sharding codec, write data to that array, and get the same data out via indexing. """ spath = StorePath(store) arr = zarr.create_array( spath, shape=(128, 128), chunks=(32, 32), shards={"shape": (64, 64), "index_location": index_location}, dtype="uint8", fill_value=6, filters=[TransposeCodec(order=order_from_dim("F", 2))], compressors=BloscCodec(cname="lz4"), ) arr[:16, :16] = 10 # intentionally write partial chunks read_data = arr[:16, :16] np.testing.assert_array_equal(read_data, 10) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize( "array_fixture", [ ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), ], indirect=["array_fixture"], ) def test_sharding_partial( store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation ) -> None: data = array_fixture spath = StorePath(store) a = zarr.create_array( spath, shape=tuple(a + 10 for a in data.shape), chunks=(32, 32, 32), shards={"shape": (64, 64, 64), "index_location": index_location}, compressors=BloscCodec(cname="lz4"), filters=[TransposeCodec(order=order_from_dim("F", data.ndim))], dtype=data.dtype, fill_value=0, ) a[10:, 10:, 10:] = data read_data = a[0:10, 0:10, 0:10] assert np.all(read_data == 0) read_data = a[10:, 10:, 10:] assert isinstance(read_data, NDArrayLike) assert data.shape == read_data.shape assert np.array_equal(data, read_data) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize( "array_fixture", [ ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), ], indirect=["array_fixture"], ) def test_sharding_partial_readwrite( store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation ) -> None: data = array_fixture spath = StorePath(store) a = zarr.create_array( spath, shape=data.shape, chunks=(1, data.shape[1], data.shape[2]), shards={"shape": data.shape, "index_location": index_location}, dtype=data.dtype, fill_value=0, filters=None, compressors=None, ) a[:] = data for x in range(data.shape[0]): read_data = a[x, :, :] assert np.array_equal(data[x], read_data) @pytest.mark.parametrize( "array_fixture", [ ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), ], indirect=["array_fixture"], ) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_sharding_partial_read( store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation ) -> None: data = array_fixture spath = StorePath(store) a = zarr.create_array( spath, shape=tuple(a + 10 for a in data.shape), chunks=(32, 32, 32), shards={"shape": (64, 64, 64), "index_location": index_location}, compressors=BloscCodec(cname="lz4"), filters=[TransposeCodec(order=order_from_dim("F", data.ndim))], dtype=data.dtype, fill_value=1, ) read_data = a[0:10, 0:10, 0:10] assert np.all(read_data == 1) @pytest.mark.parametrize( "array_fixture", [ ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), ], indirect=["array_fixture"], ) @pytest.mark.parametrize("index_location", ["start", "end"]) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_sharding_partial_overwrite( store: Store, array_fixture: npt.NDArray[Any], index_location: ShardingCodecIndexLocation ) -> None: data = array_fixture[:10, :10, :10] spath = StorePath(store) a = zarr.create_array( spath, shape=tuple(a + 10 for a in data.shape), chunks=(32, 32, 32), shards={"shape": (64, 64, 64), "index_location": index_location}, compressors=BloscCodec(cname="lz4"), filters=[TransposeCodec(order=order_from_dim("F", data.ndim))], dtype=data.dtype, fill_value=1, ) a[:10, :10, :10] = data read_data = a[0:10, 0:10, 0:10] assert np.array_equal(data, read_data) data += 10 if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): a[:10, :10, :10] = data else: a[:10, :10, :10] = data read_data = a[0:10, 0:10, 0:10] assert np.array_equal(data, read_data) @pytest.mark.parametrize( "array_fixture", [ ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), ], indirect=["array_fixture"], ) @pytest.mark.parametrize( "outer_index_location", ["start", "end"], ) @pytest.mark.parametrize( "inner_index_location", ["start", "end"], ) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_nested_sharding( store: Store, array_fixture: npt.NDArray[Any], outer_index_location: ShardingCodecIndexLocation, inner_index_location: ShardingCodecIndexLocation, ) -> None: data = array_fixture spath = StorePath(store) msg = "Combining a `sharding_indexed` codec disables partial reads and writes, which may lead to inefficient performance." with pytest.warns(ZarrUserWarning, match=msg): a = zarr.create_array( spath, shape=data.shape, chunks=(64, 64, 64), dtype=data.dtype, fill_value=0, serializer=ShardingCodec( chunk_shape=(32, 32, 32), codecs=[ ShardingCodec(chunk_shape=(16, 16, 16), index_location=inner_index_location) ], index_location=outer_index_location, ), ) a[:, :, :] = data read_data = a[0 : data.shape[0], 0 : data.shape[1], 0 : data.shape[2]] assert isinstance(read_data, NDArrayLike) assert data.shape == read_data.shape assert np.array_equal(data, read_data) @pytest.mark.parametrize( "array_fixture", [ ArrayRequest(shape=(128,) * 3, dtype="uint16", order="F"), ], indirect=["array_fixture"], ) @pytest.mark.parametrize( "outer_index_location", ["start", "end"], ) @pytest.mark.parametrize( "inner_index_location", ["start", "end"], ) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_nested_sharding_create_array( store: Store, array_fixture: npt.NDArray[Any], outer_index_location: ShardingCodecIndexLocation, inner_index_location: ShardingCodecIndexLocation, ) -> None: data = array_fixture spath = StorePath(store) a = zarr.create_array( spath, shape=data.shape, chunks=(32, 32, 32), dtype=data.dtype, fill_value=0, serializer=ShardingCodec( chunk_shape=(32, 32, 32), codecs=[ShardingCodec(chunk_shape=(16, 16, 16), index_location=inner_index_location)], index_location=outer_index_location, ), filters=None, compressors=None, ) print(a.metadata.to_dict()) a[:, :, :] = data read_data = a[0 : data.shape[0], 0 : data.shape[1], 0 : data.shape[2]] assert isinstance(read_data, NDArrayLike) assert data.shape == read_data.shape assert np.array_equal(data, read_data) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_open_sharding(store: Store) -> None: path = "open_sharding" spath = StorePath(store, path) a = zarr.create_array( spath, shape=(16, 16), chunks=(8, 8), shards=(16, 16), filters=[TransposeCodec(order=order_from_dim("F", 2))], compressors=BloscCodec(), dtype="int32", fill_value=0, ) b = Array.open(spath) assert a.metadata == b.metadata @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) def test_write_partial_sharded_chunks(store: Store) -> None: data = np.arange(0, 16 * 16, dtype="uint16").reshape((16, 16)) spath = StorePath(store) a = zarr.create_array( spath, shape=(40, 40), chunks=(10, 10), shards=(20, 20), dtype=data.dtype, compressors=BloscCodec(), fill_value=1, ) a[0:16, 0:16] = data assert np.array_equal(a[0:16, 0:16], data) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) async def test_delete_empty_shards(store: Store) -> None: if not store.supports_deletes: pytest.skip("store does not support deletes") path = "delete_empty_shards" spath = StorePath(store, path) a = await zarr.api.asynchronous.create_array( spath, shape=(16, 16), chunks=(8, 8), shards=(8, 16), dtype="uint16", compressors=None, fill_value=1, ) print(a.metadata.to_dict()) await _AsyncArrayProxy(a)[:, :].set(np.zeros((16, 16))) await _AsyncArrayProxy(a)[8:, :].set(np.ones((8, 16))) await _AsyncArrayProxy(a)[:, 8:].set(np.ones((16, 8))) # chunk (0, 0) is full # chunks (0, 1), (1, 0), (1, 1) are empty # shard (0, 0) is half-full # shard (1, 0) is empty data = np.ones((16, 16), dtype="uint16") data[:8, :8] = 0 assert np.array_equal(data, await _AsyncArrayProxy(a)[:, :].get()) assert await store.get(f"{path}/c/1/0", prototype=default_buffer_prototype()) is None chunk_bytes = await store.get(f"{path}/c/0/0", prototype=default_buffer_prototype()) assert chunk_bytes is not None assert len(chunk_bytes) == 16 * 2 + 8 * 8 * 2 + 4 def test_pickle() -> None: codec = ShardingCodec(chunk_shape=(8, 8)) assert pickle.loads(pickle.dumps(codec)) == codec @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize( "index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end] ) async def test_sharding_with_empty_inner_chunk( store: Store, index_location: ShardingCodecIndexLocation ) -> None: data = np.arange(0, 16 * 16, dtype="uint32").reshape((16, 16)) fill_value = 1 path = f"sharding_with_empty_inner_chunk_{index_location}" spath = StorePath(store, path) a = await zarr.api.asynchronous.create_array( spath, shape=(16, 16), chunks=(4, 4), shards={"shape": (8, 8), "index_location": index_location}, dtype="uint32", fill_value=fill_value, ) data[:4, :4] = fill_value await a.setitem(..., data) print("read data") data_read = await a.getitem(...) assert np.array_equal(data_read, data) @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize( "index_location", [ShardingCodecIndexLocation.start, ShardingCodecIndexLocation.end], ) @pytest.mark.parametrize("chunks_per_shard", [(5, 2), (2, 5), (5, 5)]) async def test_sharding_with_chunks_per_shard( store: Store, index_location: ShardingCodecIndexLocation, chunks_per_shard: tuple[int] ) -> None: chunk_shape = (2, 1) shape = tuple(x * y for x, y in zip(chunks_per_shard, chunk_shape, strict=False)) data = np.ones(np.prod(shape), dtype="int32").reshape(shape) fill_value = 42 path = f"test_sharding_with_chunks_per_shard_{index_location}" spath = StorePath(store, path) a = zarr.create_array( spath, shape=shape, chunks=chunk_shape, shards={"shape": shape, "index_location": index_location}, dtype="int32", fill_value=fill_value, ) a[...] = data data_read = a[...] assert np.array_equal(data_read, data) @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_invalid_metadata(store: Store) -> None: spath1 = StorePath(store, "invalid_inner_chunk_shape") with pytest.raises(ValueError): zarr.create_array( spath1, shape=(16, 16), shards=(16, 16), chunks=(8,), dtype=np.dtype("uint8"), fill_value=0, ) spath2 = StorePath(store, "invalid_inner_chunk_shape") with pytest.raises(ValueError): zarr.create_array( spath2, shape=(16, 16), shards=(16, 16), chunks=(8, 7), dtype=np.dtype("uint8"), fill_value=0, ) def test_invalid_shard_shape() -> None: with pytest.raises( ValueError, match=re.escape( "The array's `chunk_shape` (got (16, 16)) needs to be divisible by the shard's inner `chunk_shape` (got (9,))." ), ): zarr.create_array( {}, shape=(16, 16), shards=(16, 16), chunks=(9,), dtype=np.dtype("uint8"), fill_value=0, ) zarr-python-3.1.5/tests/test_codecs/test_transpose.py000066400000000000000000000065311511007055700231130ustar00rootroot00000000000000import numpy as np import pytest import zarr from zarr import AsyncArray, config from zarr.abc.store import Store from zarr.codecs import TransposeCodec from zarr.core.common import MemoryOrder from zarr.storage import StorePath from .test_codecs import _AsyncArrayProxy @pytest.mark.parametrize("input_order", ["F", "C"]) @pytest.mark.parametrize("runtime_write_order", ["F", "C"]) @pytest.mark.parametrize("runtime_read_order", ["F", "C"]) @pytest.mark.parametrize("with_sharding", [True, False]) @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) async def test_transpose( store: Store, input_order: MemoryOrder, runtime_write_order: MemoryOrder, runtime_read_order: MemoryOrder, with_sharding: bool, ) -> None: data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8), order=input_order) spath = StorePath(store, path="transpose") with config.set({"array.order": runtime_write_order}): a = await zarr.api.asynchronous.create_array( spath, shape=data.shape, chunks=(1, 16, 8) if with_sharding else (1, 32, 8), shards=(1, 32, 8) if with_sharding else None, dtype=data.dtype, fill_value=0, chunk_key_encoding={"name": "v2", "separator": "."}, filters=[TransposeCodec(order=(2, 1, 0))], ) await _AsyncArrayProxy(a)[:, :].set(data) read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) with config.set({"array.order": runtime_read_order}): a = await AsyncArray.open( spath, ) read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) assert isinstance(read_data, np.ndarray) if runtime_read_order == "F": assert read_data.flags["F_CONTIGUOUS"] assert not read_data.flags["C_CONTIGUOUS"] else: assert not read_data.flags["F_CONTIGUOUS"] assert read_data.flags["C_CONTIGUOUS"] @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("order", [[1, 2, 0], [1, 2, 3, 0], [3, 2, 4, 0, 1]]) def test_transpose_non_self_inverse(store: Store, order: list[int]) -> None: shape = [i + 3 for i in range(len(order))] data = np.arange(0, np.prod(shape), dtype="uint16").reshape(shape) spath = StorePath(store, "transpose_non_self_inverse") a = zarr.create_array( spath, shape=data.shape, chunks=data.shape, dtype=data.dtype, fill_value=0, filters=[TransposeCodec(order=order)], ) a[:, :] = data read_data = a[:, :] assert np.array_equal(data, read_data) @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_transpose_invalid( store: Store, ) -> None: data = np.arange(0, 256, dtype="uint16").reshape((1, 32, 8)) spath = StorePath(store, "transpose_invalid") for order in [(1, 0), (3, 2, 1), (3, 3, 1), "F", "C"]: with pytest.raises((ValueError, TypeError)): zarr.create_array( spath, shape=data.shape, chunks=(1, 32, 8), dtype=data.dtype, fill_value=0, chunk_key_encoding={"name": "v2", "separator": "."}, filters=[TransposeCodec(order=order)], # type: ignore[arg-type] ) zarr-python-3.1.5/tests/test_codecs/test_vlen.py000066400000000000000000000044231511007055700220370ustar00rootroot00000000000000from typing import Any import numpy as np import pytest import zarr from zarr import Array from zarr.abc.codec import Codec from zarr.abc.store import Store from zarr.codecs import ZstdCodec from zarr.core.dtype import get_data_type_from_native_dtype from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.storage import StorePath numpy_str_dtypes: list[type | str | None] = [None, str, "str", np.dtypes.StrDType, "S", "U"] expected_array_string_dtype: np.dtype[Any] if _NUMPY_SUPPORTS_VLEN_STRING: numpy_str_dtypes.append(np.dtypes.StringDType) expected_array_string_dtype = np.dtypes.StringDType() else: expected_array_string_dtype = np.dtype("O") @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"]) @pytest.mark.parametrize("dtype", numpy_str_dtypes) @pytest.mark.parametrize("as_object_array", [False, True]) @pytest.mark.parametrize("compressor", [None, ZstdCodec()]) def test_vlen_string( store: Store, dtype: np.dtype[Any] | None, as_object_array: bool, compressor: Codec | None ) -> None: strings = ["hello", "world", "this", "is", "a", "test"] data = np.array(strings, dtype=dtype).reshape((2, 3)) sp = StorePath(store, path="string") a = zarr.create_array( sp, shape=data.shape, chunks=data.shape, dtype=data.dtype, fill_value="", compressors=compressor, # type: ignore[arg-type] ) assert isinstance(a.metadata, ArrayV3Metadata) # needed for mypy # should also work if input array is an object array, provided we explicitly specified # a stringlike dtype when creating the Array if as_object_array: data_obj = data.astype("O") a[:, :] = data_obj else: a[:, :] = data assert np.array_equal(data, a[:, :]) assert a.metadata.data_type == get_data_type_from_native_dtype(data.dtype) assert a.dtype == data.dtype # test round trip b = Array.open(sp) assert isinstance(b.metadata, ArrayV3Metadata) # needed for mypy assert np.array_equal(data, b[:, :]) assert b.metadata.data_type == get_data_type_from_native_dtype(data.dtype) assert a.dtype == data.dtype zarr-python-3.1.5/tests/test_codecs/test_zstd.py000066400000000000000000000012551511007055700220570ustar00rootroot00000000000000import numpy as np import pytest import zarr from zarr.abc.store import Store from zarr.codecs import ZstdCodec from zarr.storage import StorePath @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("checksum", [True, False]) def test_zstd(store: Store, checksum: bool) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) a = zarr.create_array( StorePath(store, path="zstd"), shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, compressors=ZstdCodec(level=0, checksum=checksum), ) a[:, :] = data assert np.array_equal(data, a[:, :]) zarr-python-3.1.5/tests/test_common.py000066400000000000000000000076231511007055700200710ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, get_args import numpy as np import pytest from zarr.core.common import ( ANY_ACCESS_MODE, AccessModeLiteral, parse_name, parse_shapelike, product, ) from zarr.core.config import parse_indexing_order if TYPE_CHECKING: from collections.abc import Iterable from typing import Any, Literal @pytest.mark.parametrize("data", [(0, 0, 0, 0), (1, 3, 4, 5, 6), (2, 4)]) def test_product(data: tuple[int, ...]) -> None: assert product(data) == np.prod(data) def test_access_modes() -> None: """ Test that the access modes type and variable for run-time checking are equivalent. """ assert set(ANY_ACCESS_MODE) == set(get_args(AccessModeLiteral)) # todo: test def test_concurrent_map() -> None: ... # todo: test def test_to_thread() -> None: ... # todo: test def test_enum_names() -> None: ... # todo: test def test_parse_enum() -> None: ... @pytest.mark.parametrize("data", [("foo", "bar"), (10, 11)]) def test_parse_name_invalid(data: tuple[Any, Any]) -> None: observed, expected = data if isinstance(observed, str): with pytest.raises(ValueError, match=f"Expected '{expected}'. Got {observed} instead."): parse_name(observed, expected) else: with pytest.raises( TypeError, match=f"Expected a string, got an instance of {type(observed)}." ): parse_name(observed, expected) @pytest.mark.parametrize("data", [("foo", "foo"), ("10", "10")]) def test_parse_name_valid(data: tuple[Any, Any]) -> None: observed, expected = data assert parse_name(observed, expected) == observed @pytest.mark.parametrize("data", [0, 1, "hello", "f"]) def test_parse_indexing_order_invalid(data: Any) -> None: with pytest.raises(ValueError, match="Expected one of"): parse_indexing_order(data) @pytest.mark.parametrize("data", ["C", "F"]) def parse_indexing_order_valid(data: Literal["C", "F"]) -> None: assert parse_indexing_order(data) == data @pytest.mark.parametrize("data", [lambda v: v, slice(None)]) def test_parse_shapelike_invalid_single_type(data: Any) -> None: """ Test that we get the expected error message when passing in a value that is not an integer or an iterable of integers. """ with pytest.raises(TypeError, match="Expected an integer or an iterable of integers."): parse_shapelike(data) def test_parse_shapelike_invalid_single_value() -> None: """ Test that we get the expected error message when passing in a negative integer. """ with pytest.raises(ValueError, match="Expected a non-negative integer."): parse_shapelike(-1) @pytest.mark.parametrize("data", ["shape", ("0", 1, 2, 3), {"0": "0"}, ((1, 2), (2, 2)), (4.0, 2)]) def test_parse_shapelike_invalid_iterable_types(data: Any) -> None: """ Test that we get the expected error message when passing in an iterable containing non-integer elements """ with pytest.raises(TypeError, match="Expected an iterable of integers"): parse_shapelike(data) @pytest.mark.parametrize("data", [(1, 2, 3, -1), (-10,)]) def test_parse_shapelike_invalid_iterable_values(data: Any) -> None: """ Test that we get the expected error message when passing in an iterable containing negative integers """ with pytest.raises(ValueError, match="Expected all values to be non-negative."): parse_shapelike(data) @pytest.mark.parametrize("data", [range(10), [0, 1, 2, 3], (3, 4, 5), ()]) def test_parse_shapelike_valid(data: Iterable[int]) -> None: assert parse_shapelike(data) == tuple(data) # todo: more dtypes @pytest.mark.parametrize("data", [("uint8", np.uint8), ("float64", np.float64)]) def parse_dtype(data: tuple[str, np.dtype[Any]]) -> None: unparsed, parsed = data assert parse_dtype(unparsed) == parsed # todo: figure out what it means to test this def test_parse_fill_value() -> None: ... zarr-python-3.1.5/tests/test_config.py000066400000000000000000000301461511007055700200420ustar00rootroot00000000000000import os from collections.abc import Iterable from typing import Any from unittest import mock from unittest.mock import Mock import numpy as np import pytest import zarr from zarr import zeros from zarr.abc.codec import CodecPipeline from zarr.abc.store import ByteSetter, Store from zarr.codecs import ( BloscCodec, BytesCodec, Crc32cCodec, ShardingCodec, ) from zarr.core.array_spec import ArraySpec from zarr.core.buffer import NDBuffer from zarr.core.buffer.core import Buffer from zarr.core.codec_pipeline import BatchedCodecPipeline from zarr.core.config import BadConfigError, config from zarr.core.indexing import SelectorTuple from zarr.errors import ZarrUserWarning from zarr.registry import ( fully_qualified_name, get_buffer_class, get_codec_class, get_ndbuffer_class, get_pipeline_class, register_buffer, register_codec, register_ndbuffer, register_pipeline, ) from zarr.testing.buffer import ( NDBufferUsingTestNDArrayLike, StoreExpectingTestBuffer, TestBuffer, TestNDArrayLike, ) def test_config_defaults_set() -> None: # regression test for available defaults assert ( config.defaults == [ { "default_zarr_format": 3, "array": { "order": "C", "write_empty_chunks": False, "target_shard_size_bytes": None, }, "async": {"concurrency": 10, "timeout": None}, "threading": {"max_workers": None}, "json_indent": 2, "codec_pipeline": { "path": "zarr.core.codec_pipeline.BatchedCodecPipeline", "batch_size": 1, }, "codecs": { "blosc": "zarr.codecs.blosc.BloscCodec", "gzip": "zarr.codecs.gzip.GzipCodec", "zstd": "zarr.codecs.zstd.ZstdCodec", "bytes": "zarr.codecs.bytes.BytesCodec", "endian": "zarr.codecs.bytes.BytesCodec", # compatibility with earlier versions of ZEP1 "crc32c": "zarr.codecs.crc32c_.Crc32cCodec", "sharding_indexed": "zarr.codecs.sharding.ShardingCodec", "transpose": "zarr.codecs.transpose.TransposeCodec", "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec", "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec", "numcodecs.bz2": "zarr.codecs.numcodecs.BZ2", "numcodecs.crc32": "zarr.codecs.numcodecs.CRC32", "numcodecs.crc32c": "zarr.codecs.numcodecs.CRC32C", "numcodecs.lz4": "zarr.codecs.numcodecs.LZ4", "numcodecs.lzma": "zarr.codecs.numcodecs.LZMA", "numcodecs.zfpy": "zarr.codecs.numcodecs.ZFPY", "numcodecs.adler32": "zarr.codecs.numcodecs.Adler32", "numcodecs.astype": "zarr.codecs.numcodecs.AsType", "numcodecs.bitround": "zarr.codecs.numcodecs.BitRound", "numcodecs.blosc": "zarr.codecs.numcodecs.Blosc", "numcodecs.delta": "zarr.codecs.numcodecs.Delta", "numcodecs.fixedscaleoffset": "zarr.codecs.numcodecs.FixedScaleOffset", "numcodecs.fletcher32": "zarr.codecs.numcodecs.Fletcher32", "numcodecs.gzip": "zarr.codecs.numcodecs.GZip", "numcodecs.jenkins_lookup3": "zarr.codecs.numcodecs.JenkinsLookup3", "numcodecs.pcodec": "zarr.codecs.numcodecs.PCodec", "numcodecs.packbits": "zarr.codecs.numcodecs.PackBits", "numcodecs.shuffle": "zarr.codecs.numcodecs.Shuffle", "numcodecs.quantize": "zarr.codecs.numcodecs.Quantize", "numcodecs.zlib": "zarr.codecs.numcodecs.Zlib", "numcodecs.zstd": "zarr.codecs.numcodecs.Zstd", }, "buffer": "zarr.buffer.cpu.Buffer", "ndbuffer": "zarr.buffer.cpu.NDBuffer", } ] ) assert config.get("array.order") == "C" assert config.get("async.concurrency") == 10 assert config.get("async.timeout") is None assert config.get("codec_pipeline.batch_size") == 1 assert config.get("json_indent") == 2 @pytest.mark.parametrize( ("key", "old_val", "new_val"), [("array.order", "C", "F"), ("async.concurrency", 10, 128), ("json_indent", 2, 0)], ) def test_config_defaults_can_be_overridden(key: str, old_val: Any, new_val: Any) -> None: assert config.get(key) == old_val with config.set({key: new_val}): assert config.get(key) == new_val def test_fully_qualified_name() -> None: class MockClass: pass assert ( fully_qualified_name(MockClass) == "tests.test_config.test_fully_qualified_name..MockClass" ) @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_config_codec_pipeline_class(store: Store) -> None: # has default value assert get_pipeline_class().__name__ != "" config.set({"codec_pipeline.name": "zarr.core.codec_pipeline.BatchedCodecPipeline"}) assert get_pipeline_class() == zarr.core.codec_pipeline.BatchedCodecPipeline _mock = Mock() class MockCodecPipeline(BatchedCodecPipeline): async def write( self, batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]], value: NDBuffer, drop_axes: tuple[int, ...] = (), ) -> None: _mock.call() register_pipeline(MockCodecPipeline) config.set({"codec_pipeline.path": fully_qualified_name(MockCodecPipeline)}) assert get_pipeline_class() == MockCodecPipeline # test if codec is used arr = zarr.create_array( store=store, shape=(100,), chunks=(10,), zarr_format=3, dtype="i4", ) arr[:] = range(100) _mock.call.assert_called() config.set({"codec_pipeline.path": "wrong_name"}) with pytest.raises(BadConfigError): get_pipeline_class() class MockEnvCodecPipeline(CodecPipeline): pass register_pipeline(MockEnvCodecPipeline) # type: ignore[type-abstract] with mock.patch.dict( os.environ, {"ZARR_CODEC_PIPELINE__PATH": fully_qualified_name(MockEnvCodecPipeline)} ): assert get_pipeline_class(reload_config=True) == MockEnvCodecPipeline @pytest.mark.filterwarnings("error") @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_config_codec_implementation(store: Store) -> None: # has default value assert fully_qualified_name(get_codec_class("blosc")) == config.defaults[0]["codecs"]["blosc"] _mock = Mock() class MockBloscCodec(BloscCodec): async def _encode_single(self, chunk_bytes: Buffer, chunk_spec: ArraySpec) -> Buffer | None: _mock.call() return None register_codec("blosc", MockBloscCodec) with config.set({"codecs.blosc": fully_qualified_name(MockBloscCodec)}): assert get_codec_class("blosc") == MockBloscCodec # test if codec is used arr = zarr.create_array( store=store, shape=(100,), chunks=(10,), zarr_format=3, dtype="i4", compressors=[{"name": "blosc", "configuration": {}}], ) arr[:] = range(100) _mock.call.assert_called() # test set codec with environment variable class NewBloscCodec(BloscCodec): pass register_codec("blosc", NewBloscCodec) with mock.patch.dict(os.environ, {"ZARR_CODECS__BLOSC": fully_qualified_name(NewBloscCodec)}): assert get_codec_class("blosc", reload_config=True) == NewBloscCodec @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_config_ndbuffer_implementation(store: Store) -> None: # set custom ndbuffer with TestNDArrayLike implementation register_ndbuffer(NDBufferUsingTestNDArrayLike) with config.set({"ndbuffer": fully_qualified_name(NDBufferUsingTestNDArrayLike)}): assert get_ndbuffer_class() == NDBufferUsingTestNDArrayLike arr = zarr.create_array( store=store, shape=(100,), chunks=(10,), zarr_format=3, dtype="i4", ) got = arr[:] assert isinstance(got, TestNDArrayLike) def test_config_buffer_implementation() -> None: # has default value assert config.defaults[0]["buffer"] == "zarr.buffer.cpu.Buffer" arr = zeros(shape=(100,), store=StoreExpectingTestBuffer()) # AssertionError of StoreExpectingTestBuffer when not using my buffer with pytest.raises(AssertionError): arr[:] = np.arange(100) register_buffer(TestBuffer) with config.set({"buffer": fully_qualified_name(TestBuffer)}): assert get_buffer_class() == TestBuffer # no error using TestBuffer data = np.arange(100) arr[:] = np.arange(100) assert np.array_equal(arr[:], data) data2d = np.arange(1000).reshape(100, 10) arr_sharding = zeros( shape=(100, 10), store=StoreExpectingTestBuffer(), codecs=[ShardingCodec(chunk_shape=(10, 10))], ) arr_sharding[:] = data2d assert np.array_equal(arr_sharding[:], data2d) arr_Crc32c = zeros( shape=(100, 10), store=StoreExpectingTestBuffer(), codecs=[BytesCodec(), Crc32cCodec()], ) arr_Crc32c[:] = data2d assert np.array_equal(arr_Crc32c[:], data2d) def test_config_buffer_backwards_compatibility() -> None: # This should warn once zarr.core is private # https://github.com/zarr-developers/zarr-python/issues/2621 with zarr.config.set( {"buffer": "zarr.core.buffer.cpu.Buffer", "ndbuffer": "zarr.core.buffer.cpu.NDBuffer"} ): get_buffer_class() get_ndbuffer_class() @pytest.mark.gpu def test_config_buffer_backwards_compatibility_gpu() -> None: # This should warn once zarr.core is private # https://github.com/zarr-developers/zarr-python/issues/2621 with zarr.config.set( {"buffer": "zarr.core.buffer.gpu.Buffer", "ndbuffer": "zarr.core.buffer.gpu.NDBuffer"} ): get_buffer_class() get_ndbuffer_class() @pytest.mark.filterwarnings("error") def test_warning_on_missing_codec_config() -> None: class NewCodec(BytesCodec): pass class NewCodec2(BytesCodec): pass # error if codec is not registered with pytest.raises(KeyError): get_codec_class("missing_codec") # no warning if only one implementation is available register_codec("new_codec", NewCodec) get_codec_class("new_codec") # warning because multiple implementations are available but none is selected in the config register_codec("new_codec", NewCodec2) with pytest.warns( ZarrUserWarning, match="not configured in config. Selecting any implementation" ): get_codec_class("new_codec") # no warning if multiple implementations are available and one is selected in the config with config.set({"codecs.new_codec": fully_qualified_name(NewCodec)}): get_codec_class("new_codec") @pytest.mark.parametrize( "key", [ "array.v2_default_compressor.numeric", "array.v2_default_compressor.string", "array.v2_default_compressor.bytes", "array.v2_default_filters.string", "array.v2_default_filters.bytes", "array.v3_default_filters.numeric", "array.v3_default_filters.raw", "array.v3_default_filters.bytes", "array.v3_default_serializer.numeric", "array.v3_default_serializer.string", "array.v3_default_serializer.bytes", "array.v3_default_compressors.string", "array.v3_default_compressors.bytes", "array.v3_default_compressors", ], ) def test_deprecated_config(key: str) -> None: """ Test that a valuerror is raised when setting the default chunk encoding for a given data type category """ with pytest.raises(ValueError): with zarr.config.set({key: "foo"}): pass zarr-python-3.1.5/tests/test_docs.py000066400000000000000000000101321511007055700175160ustar00rootroot00000000000000""" Tests for executable code blocks in markdown documentation. This module uses pytest-examples to validate that all Python code examples with exec="true" in the documentation execute successfully. """ from __future__ import annotations from collections import defaultdict from pathlib import Path import pytest pytest.importorskip("pytest_examples") from pytest_examples import CodeExample, EvalExample, find_examples # Find all markdown files with executable code blocks DOCS_ROOT = Path(__file__).parent.parent / "docs" SOURCES_ROOT = Path(__file__).parent.parent / "src" / "zarr" def find_markdown_files_with_exec() -> list[Path]: """Find all markdown files containing exec="true" code blocks.""" markdown_files = [] for md_file in DOCS_ROOT.rglob("*.md"): try: content = md_file.read_text(encoding="utf-8") if 'exec="true"' in content: markdown_files.append(md_file) except Exception: # Skip files that can't be read continue return sorted(markdown_files) def group_examples_by_session() -> list[tuple[str, str]]: """ Group examples by their session and file, maintaining order. Returns a list of session_key tuples where session_key is (file_path, session_name). """ all_examples = list(find_examples(DOCS_ROOT)) # Group by file and session sessions = defaultdict(list) for example in all_examples: settings = example.prefix_settings() if settings.get("exec") != "true": continue # Use file path and session name as key file_path = example.path session_name = settings.get("session", "_default") session_key = (str(file_path), session_name) sessions[session_key].append(example) # Return sorted list of session keys for consistent test ordering return sorted(sessions.keys(), key=lambda x: (x[0], x[1])) def name_example(path: str, session: str) -> str: """Generate a readable name for a test case from file path and session.""" return f"{Path(path).relative_to(DOCS_ROOT)}:{session}" # Get all example sessions @pytest.mark.parametrize( "session_key", group_examples_by_session(), ids=lambda v: name_example(v[0], v[1]) ) def test_documentation_examples( session_key: tuple[str, str], eval_example: EvalExample, ) -> None: """ Test that all exec="true" code examples in documentation execute successfully. This test groups examples by session (file + session name) and runs them sequentially in the same execution context, allowing code to build on previous examples. This test uses pytest-examples to: - Find all code examples with exec="true" in markdown files - Group them by session - Execute them in order within the same context - Verify no exceptions are raised """ file_path, session_name = session_key # Get examples for this session all_examples = list(find_examples(DOCS_ROOT)) examples = [] for example in all_examples: settings = example.prefix_settings() if settings.get("exec") != "true": continue if str(example.path) == file_path and settings.get("session", "_default") == session_name: examples.append(example) # Run all examples in this session sequentially, preserving state module_globals: dict[str, object] = {} for example in examples: # TODO: uncomment this line when we are ready to fix output checks # result = eval_example.run_print_check(example, module_globals=module_globals) result = eval_example.run(example, module_globals=module_globals) # Update globals with the results from this execution module_globals.update(result) @pytest.mark.parametrize("example", find_examples(str(SOURCES_ROOT)), ids=str) def test_docstrings(example: CodeExample, eval_example: EvalExample) -> None: """Test our docstring examples.""" if example.path.name == "config.py" and "your.module" in example.source: pytest.skip("Skip testing docstring example that assumes nonexistent module.") eval_example.run_print_check(example) zarr-python-3.1.5/tests/test_dtype/000077500000000000000000000000001511007055700173445ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_dtype/__init__.py000066400000000000000000000000001511007055700214430ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_dtype/conftest.py000066400000000000000000000051421511007055700215450ustar00rootroot00000000000000# Generate a collection of zdtype instances for use in testing. import warnings from typing import Any import numpy as np from zarr.core.dtype import data_type_registry from zarr.core.dtype.common import HasLength from zarr.core.dtype.npy.structured import Structured from zarr.core.dtype.npy.time import DateTime64, TimeDelta64 from zarr.core.dtype.wrapper import ZDType zdtype_examples: tuple[ZDType[Any, Any], ...] = () for wrapper_cls in data_type_registry.contents.values(): # The Structured dtype has to be constructed with some actual fields if wrapper_cls is Structured: with warnings.catch_warnings(): warnings.simplefilter("ignore") zdtype_examples += ( wrapper_cls.from_native_dtype(np.dtype([("a", np.float64), ("b", np.int8)])), ) elif issubclass(wrapper_cls, HasLength): zdtype_examples += (wrapper_cls(length=1),) elif issubclass(wrapper_cls, DateTime64 | TimeDelta64): zdtype_examples += (wrapper_cls(unit="s", scale_factor=10),) else: zdtype_examples += (wrapper_cls(),) def pytest_generate_tests(metafunc: Any) -> None: """ This is a pytest hook to parametrize class-scoped fixtures. This hook allows us to define class-scoped fixtures as class attributes and then generate the parametrize calls for pytest. This allows the fixtures to be reused across multiple tests within the same class. For example, if you had a regular pytest class like this: class TestClass: @pytest.mark.parametrize("param_a", [1, 2, 3]) def test_method(self, param_a): ... Child classes inheriting from ``TestClass`` would not be able to override the ``param_a`` fixture this implementation of ``pytest_generate_tests`` allows you to define class-scoped fixtures as class attributes, which allows the following to work: class TestExample: param_a = [1, 2, 3] def test_example(self, param_a): ... # this class will have its test_example method parametrized with the values of TestB.param_a class TestB(TestExample): param_a = [1, 2, 100, 10] """ # Iterate over all the fixtures defined in the class # and parametrize them with the values defined in the class # This allows us to define class-scoped fixtures as class attributes # and then generate the parametrize calls for pytest for fixture_name in metafunc.fixturenames: if hasattr(metafunc.cls, fixture_name): params = getattr(metafunc.cls, fixture_name) metafunc.parametrize(fixture_name, params, scope="class", ids=str) zarr-python-3.1.5/tests/test_dtype/test_npy/000077500000000000000000000000001511007055700212115ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_dtype/test_npy/test_bool.py000066400000000000000000000020501511007055700235520ustar00rootroot00000000000000from __future__ import annotations import numpy as np from tests.test_dtype.test_wrapper import BaseTestZDType from zarr.core.dtype.npy.bool import Bool class TestBool(BaseTestZDType): test_cls = Bool valid_dtype = (np.dtype(np.bool_),) invalid_dtype = ( np.dtype(np.int8), np.dtype(np.float64), np.dtype(np.uint16), ) valid_json_v2 = ({"name": "|b1", "object_codec_id": None},) valid_json_v3 = ("bool",) invalid_json_v2 = ( "|b1", "bool", "|f8", ) invalid_json_v3 = ( "|b1", "|f8", {"name": "bool", "configuration": {"endianness": "little"}}, ) scalar_v2_params = ((Bool(), True), (Bool(), False)) scalar_v3_params = ((Bool(), True), (Bool(), False)) cast_value_params = ( (Bool(), "true", np.True_), (Bool(), True, np.True_), (Bool(), False, np.False_), (Bool(), np.True_, np.True_), (Bool(), np.False_, np.False_), ) invalid_scalar_params = (None,) item_size_params = (Bool(),) zarr-python-3.1.5/tests/test_dtype/test_npy/test_bytes.py000066400000000000000000000126131511007055700237530ustar00rootroot00000000000000import numpy as np import pytest from tests.test_dtype.test_wrapper import BaseTestZDType from zarr.core.dtype.npy.bytes import NullTerminatedBytes, RawBytes, VariableLengthBytes from zarr.errors import UnstableSpecificationWarning class TestNullTerminatedBytes(BaseTestZDType): test_cls = NullTerminatedBytes valid_dtype = (np.dtype("|S10"), np.dtype("|S4")) invalid_dtype = ( np.dtype(np.int8), np.dtype(np.float64), np.dtype("|U10"), ) valid_json_v2 = ( {"name": "|S1", "object_codec_id": None}, {"name": "|S2", "object_codec_id": None}, {"name": "|S4", "object_codec_id": None}, ) valid_json_v3 = ({"name": "null_terminated_bytes", "configuration": {"length_bytes": 10}},) invalid_json_v2 = ( "|S", "|U10", "|f8", {"name": "|S4", "object_codec_id": "vlen-bytes"}, ) invalid_json_v3 = ( {"name": "fixed_length_ascii", "configuration": {"length_bits": 0}}, {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}}, ) scalar_v2_params = ( (NullTerminatedBytes(length=1), "MA=="), (NullTerminatedBytes(length=2), "YWI="), (NullTerminatedBytes(length=4), "YWJjZA=="), ) scalar_v3_params = ( (NullTerminatedBytes(length=1), "MA=="), (NullTerminatedBytes(length=2), "YWI="), (NullTerminatedBytes(length=4), "YWJjZA=="), ) cast_value_params = ( (NullTerminatedBytes(length=1), "", np.bytes_("")), (NullTerminatedBytes(length=2), "ab", np.bytes_("ab")), (NullTerminatedBytes(length=4), "abcdefg", np.bytes_("abcd")), ) invalid_scalar_params = ((NullTerminatedBytes(length=1), 1.0),) item_size_params = ( NullTerminatedBytes(length=1), NullTerminatedBytes(length=4), NullTerminatedBytes(length=10), ) class TestRawBytes(BaseTestZDType): test_cls = RawBytes valid_dtype = (np.dtype("|V10"),) invalid_dtype = ( np.dtype(np.int8), np.dtype(np.float64), np.dtype("|S10"), ) valid_json_v2 = ({"name": "|V10", "object_codec_id": None},) valid_json_v3 = ( {"name": "raw_bytes", "configuration": {"length_bytes": 1}}, {"name": "raw_bytes", "configuration": {"length_bytes": 8}}, ) invalid_json_v2 = ( "|V", "|S10", "|f8", ) invalid_json_v3 = ( {"name": "r10"}, {"name": "r-80"}, ) scalar_v2_params = ( (RawBytes(length=1), "AA=="), (RawBytes(length=2), "YWI="), (RawBytes(length=4), "YWJjZA=="), ) scalar_v3_params = ( (RawBytes(length=1), "AA=="), (RawBytes(length=2), "YWI="), (RawBytes(length=4), "YWJjZA=="), ) cast_value_params = ( (RawBytes(length=1), b"\x00", np.void(b"\x00")), (RawBytes(length=2), b"ab", np.void(b"ab")), (RawBytes(length=4), b"abcd", np.void(b"abcd")), ) invalid_scalar_params = ((RawBytes(length=1), 1.0),) item_size_params = ( RawBytes(length=1), RawBytes(length=4), RawBytes(length=10), ) class TestVariableLengthBytes(BaseTestZDType): test_cls = VariableLengthBytes valid_dtype = (np.dtype("|O"),) invalid_dtype = ( np.dtype(np.int8), np.dtype(np.float64), np.dtype("|U10"), ) valid_json_v2 = ({"name": "|O", "object_codec_id": "vlen-bytes"},) valid_json_v3 = ("variable_length_bytes",) invalid_json_v2 = ( "|S", "|U10", "|f8", ) invalid_json_v3 = ( {"name": "fixed_length_ascii", "configuration": {"length_bits": 0}}, {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}}, ) scalar_v2_params = ( (VariableLengthBytes(), ""), (VariableLengthBytes(), "YWI="), (VariableLengthBytes(), "YWJjZA=="), ) scalar_v3_params = ( (VariableLengthBytes(), ""), (VariableLengthBytes(), "YWI="), (VariableLengthBytes(), "YWJjZA=="), ) cast_value_params = ( (VariableLengthBytes(), "", b""), (VariableLengthBytes(), "ab", b"ab"), (VariableLengthBytes(), "abcdefg", b"abcdefg"), ) invalid_scalar_params = ((VariableLengthBytes(), 1.0),) item_size_params = (VariableLengthBytes(),) def test_vlen_bytes_alias() -> None: """Test that "bytes" is an accepted alias for "variable_length_bytes" in JSON metadata""" a = VariableLengthBytes.from_json("bytes", zarr_format=3) b = VariableLengthBytes.from_json("variable_length_bytes", zarr_format=3) assert a == b @pytest.mark.parametrize( "zdtype", [NullTerminatedBytes(length=10), RawBytes(length=10), VariableLengthBytes()] ) def test_unstable_dtype_warning( zdtype: NullTerminatedBytes | RawBytes | VariableLengthBytes, ) -> None: """ Test that we get a warning when serializing a dtype without a zarr v3 spec to json when zarr_format is 3 """ with pytest.warns(UnstableSpecificationWarning): zdtype.to_json(zarr_format=3) @pytest.mark.parametrize("zdtype_cls", [NullTerminatedBytes, RawBytes]) def test_invalid_size(zdtype_cls: type[NullTerminatedBytes] | type[RawBytes]) -> None: """ Test that it's impossible to create a data type that has no length """ length = 0 msg = f"length must be >= 1, got {length}." with pytest.raises(ValueError, match=msg): zdtype_cls(length=length) zarr-python-3.1.5/tests/test_dtype/test_npy/test_common.py000066400000000000000000000275311511007055700241220ustar00rootroot00000000000000from __future__ import annotations import base64 import re import sys from typing import TYPE_CHECKING, Any, get_args import numpy as np import pytest from tests.conftest import nan_equal from zarr.core.dtype.common import ENDIANNESS_STR, JSONFloatV2, SpecialFloatStrings from zarr.core.dtype.npy.common import ( NumpyEndiannessStr, bytes_from_json, bytes_to_json, check_json_bool, check_json_complex_float_v2, check_json_complex_float_v3, check_json_float_v2, check_json_float_v3, check_json_int, check_json_intish_float, check_json_str, complex_float_to_json_v2, complex_float_to_json_v3, endianness_from_numpy_str, endianness_to_numpy_str, float_from_json_v2, float_from_json_v3, float_to_json_v2, float_to_json_v3, ) if TYPE_CHECKING: from zarr.core.common import JSON, ZarrFormat json_float_v2_roundtrip_cases: tuple[tuple[JSONFloatV2, float | np.floating[Any]], ...] = ( ("Infinity", float("inf")), ("Infinity", np.inf), ("-Infinity", float("-inf")), ("-Infinity", -np.inf), ("NaN", float("nan")), ("NaN", np.nan), (1.0, 1.0), ) json_float_v3_cases = json_float_v2_roundtrip_cases @pytest.mark.parametrize( ("data", "expected"), [(">", "big"), ("<", "little"), ("=", sys.byteorder), ("|", None), ("err", "")], ) def test_endianness_from_numpy_str(data: str, expected: str | None) -> None: """ Test that endianness_from_numpy_str correctly converts a numpy str literal to a human-readable literal value. This test also checks that an invalid string input raises a ``ValueError`` """ if data in get_args(NumpyEndiannessStr): assert endianness_from_numpy_str(data) == expected # type: ignore[arg-type] else: msg = f"Invalid endianness: {data!r}. Expected one of {get_args(NumpyEndiannessStr)}" with pytest.raises(ValueError, match=re.escape(msg)): endianness_from_numpy_str(data) # type: ignore[arg-type] @pytest.mark.parametrize( ("data", "expected"), [("big", ">"), ("little", "<"), (None, "|"), ("err", "")], ) def test_endianness_to_numpy_str(data: str | None, expected: str) -> None: """ Test that endianness_to_numpy_str correctly converts a human-readable literal value to a numpy str literal. This test also checks that an invalid string input raises a ``ValueError`` """ if data in ENDIANNESS_STR: assert endianness_to_numpy_str(data) == expected # type: ignore[arg-type] else: msg = f"Invalid endianness: {data!r}. Expected one of {ENDIANNESS_STR}" with pytest.raises(ValueError, match=re.escape(msg)): endianness_to_numpy_str(data) # type: ignore[arg-type] @pytest.mark.parametrize( ("data", "expected"), json_float_v2_roundtrip_cases + (("SHOULD_ERR", ""),) ) def test_float_from_json_v2(data: JSONFloatV2 | str, expected: float | str) -> None: """ Test that float_from_json_v2 correctly converts a JSON string representation of a float to a float. This test also checks that an invalid string input raises a ``ValueError`` """ if data != "SHOULD_ERR": assert nan_equal(float_from_json_v2(data), expected) # type: ignore[arg-type] else: msg = f"could not convert string to float: {data!r}" with pytest.raises(ValueError, match=msg): float_from_json_v2(data) # type: ignore[arg-type] @pytest.mark.parametrize( ("data", "expected"), json_float_v3_cases + (("SHOULD_ERR", ""), ("0x", "")) ) def test_float_from_json_v3(data: JSONFloatV2 | str, expected: float | str) -> None: """ Test that float_from_json_v3 correctly converts a JSON string representation of a float to a float. This test also checks that an invalid string input raises a ``ValueError`` """ if data == "SHOULD_ERR": msg = ( f"Invalid float value: {data!r}. Expected a string starting with the hex prefix" " '0x', or one of 'NaN', 'Infinity', or '-Infinity'." ) with pytest.raises(ValueError, match=msg): float_from_json_v3(data) elif data == "0x": msg = ( f"Invalid hexadecimal float value: {data!r}. " "Expected the '0x' prefix to be followed by 4, 8, or 16 numeral characters" ) with pytest.raises(ValueError, match=msg): float_from_json_v3(data) else: assert nan_equal(float_from_json_v3(data), expected) # note the order of parameters relative to the order of the parametrized variable. @pytest.mark.parametrize(("expected", "data"), json_float_v2_roundtrip_cases) def test_float_to_json_v2(data: float | np.floating[Any], expected: JSONFloatV2) -> None: """ Test that floats are JSON-encoded properly for zarr v2 """ observed = float_to_json_v2(data) assert observed == expected # note the order of parameters relative to the order of the parametrized variable. @pytest.mark.parametrize(("expected", "data"), json_float_v3_cases) def test_float_to_json_v3(data: float | np.floating[Any], expected: JSONFloatV2) -> None: """ Test that floats are JSON-encoded properly for zarr v3 """ observed = float_to_json_v3(data) assert observed == expected def test_bytes_from_json(zarr_format: ZarrFormat) -> None: """ Test that a string is interpreted as base64-encoded bytes using the ascii alphabet. This test takes zarr_format as a parameter but doesn't actually do anything with it, because at present there is no zarr-format-specific logic in the code being tested, but such logic may exist in the future. """ data = "\00" assert bytes_from_json(data, zarr_format=zarr_format) == base64.b64decode(data.encode("ascii")) def test_bytes_to_json(zarr_format: ZarrFormat) -> None: """ Test that bytes are encoded with base64 using the ascii alphabet. This test takes zarr_format as a parameter but doesn't actually do anything with it, because at present there is no zarr-format-specific logic in the code being tested, but such logic may exist in the future. """ data = b"asdas" assert bytes_to_json(data, zarr_format=zarr_format) == base64.b64encode(data).decode("ascii") # note the order of parameters relative to the order of the parametrized variable. @pytest.mark.parametrize(("json_expected", "float_data"), json_float_v2_roundtrip_cases) def test_complex_to_json_v2( float_data: float | np.floating[Any], json_expected: JSONFloatV2 ) -> None: """ Test that complex numbers are correctly converted to JSON in v2 format. This use the same test input as the float tests, but the conversion is tested for complex numbers with real and imaginary parts equal to the float values provided in the test cases. """ cplx = complex(float_data, float_data) cplx_npy = np.complex128(cplx) assert complex_float_to_json_v2(cplx) == (json_expected, json_expected) assert complex_float_to_json_v2(cplx_npy) == (json_expected, json_expected) # note the order of parameters relative to the order of the parametrized variable. @pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3_cases) def test_complex_to_json_v3( float_data: float | np.floating[Any], json_expected: JSONFloatV2 ) -> None: """ Test that complex numbers are correctly converted to JSON in v3 format. This use the same test input as the float tests, but the conversion is tested for complex numbers with real and imaginary parts equal to the float values provided in the test cases. """ cplx = complex(float_data, float_data) cplx_npy = np.complex128(cplx) assert complex_float_to_json_v3(cplx) == (json_expected, json_expected) assert complex_float_to_json_v3(cplx_npy) == (json_expected, json_expected) @pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3_cases) def test_complex_float_to_json( float_data: float | np.floating[Any], json_expected: JSONFloatV2, zarr_format: ZarrFormat ) -> None: """ Test that complex numbers are correctly converted to JSON in v2 or v3 formats, depending on the ``zarr_format`` keyword argument. This use the same test input as the float tests, but the conversion is tested for complex numbers with real and imaginary parts equal to the float values provided in the test cases. """ cplx = complex(float_data, float_data) cplx_npy = np.complex128(cplx) if zarr_format == 2: assert complex_float_to_json_v2(cplx) == (json_expected, json_expected) assert complex_float_to_json_v2(cplx_npy) == ( json_expected, json_expected, ) elif zarr_format == 3: assert complex_float_to_json_v3(cplx) == (json_expected, json_expected) assert complex_float_to_json_v3(cplx_npy) == ( json_expected, json_expected, ) else: raise ValueError("zarr_format must be 2 or 3") # pragma: no cover check_json_float_cases = get_args(SpecialFloatStrings) + (1.0, 2) @pytest.mark.parametrize("data", check_json_float_cases) def test_check_json_float_v2_valid(data: JSONFloatV2 | int) -> None: assert check_json_float_v2(data) def test_check_json_float_v2_invalid() -> None: assert not check_json_float_v2("invalid") @pytest.mark.parametrize("data", check_json_float_cases) def test_check_json_float_v3_valid(data: JSONFloatV2 | int) -> None: assert check_json_float_v3(data) def test_check_json_float_v3_invalid() -> None: assert not check_json_float_v3("invalid") check_json_complex_float_true_cases: tuple[list[JSONFloatV2], ...] = ( [0.0, 1.0], [0.0, 1.0], [-1.0, "NaN"], ["Infinity", 1.0], ["Infinity", "NaN"], ) check_json_complex_float_false_cases: tuple[object, ...] = ( 0.0, "foo", [0.0], [1.0, 2.0, 3.0], [1.0, "_infinity_"], {"hello": 1.0}, ) @pytest.mark.parametrize("data", check_json_complex_float_true_cases) def test_check_json_complex_float_v2_true(data: JSON) -> None: assert check_json_complex_float_v2(data) @pytest.mark.parametrize("data", check_json_complex_float_false_cases) def test_check_json_complex_float_v2_false(data: JSON) -> None: assert not check_json_complex_float_v2(data) @pytest.mark.parametrize("data", check_json_complex_float_true_cases) def test_check_json_complex_float_v3_true(data: JSON) -> None: assert check_json_complex_float_v3(data) @pytest.mark.parametrize("data", check_json_complex_float_false_cases) def test_check_json_complex_float_v3_false(data: JSON) -> None: assert not check_json_complex_float_v3(data) @pytest.mark.parametrize("data", check_json_complex_float_true_cases) def test_check_json_complex_float_true(data: JSON, zarr_format: ZarrFormat) -> None: if zarr_format == 2: assert check_json_complex_float_v2(data) elif zarr_format == 3: assert check_json_complex_float_v3(data) else: raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover @pytest.mark.parametrize("data", check_json_complex_float_false_cases) def test_check_json_complex_float_false(data: JSON, zarr_format: ZarrFormat) -> None: if zarr_format == 2: assert not check_json_complex_float_v2(data) elif zarr_format == 3: assert not check_json_complex_float_v3(data) else: raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover def test_check_json_int() -> None: assert check_json_int(0) assert not check_json_int(1.0) def test_check_json_intish_float() -> None: assert check_json_intish_float(0.0) assert check_json_intish_float(1.0) assert not check_json_intish_float("0") assert not check_json_intish_float(1.1) def test_check_json_str() -> None: assert check_json_str("0") assert not check_json_str(1.0) def test_check_json_bool() -> None: assert check_json_bool(True) assert check_json_bool(False) assert not check_json_bool(1.0) assert not check_json_bool("True") zarr-python-3.1.5/tests/test_dtype/test_npy/test_complex.py000066400000000000000000000057411511007055700243000ustar00rootroot00000000000000from __future__ import annotations import math import numpy as np from tests.test_dtype.test_wrapper import BaseTestZDType from zarr.core.dtype.npy.complex import Complex64, Complex128 class _BaseTestFloat(BaseTestZDType): def scalar_equals(self, scalar1: object, scalar2: object) -> bool: if np.isnan(scalar1) and np.isnan(scalar2): # type: ignore[call-overload] return True return super().scalar_equals(scalar1, scalar2) class TestComplex64(_BaseTestFloat): test_cls = Complex64 valid_dtype = (np.dtype(">c8"), np.dtype("c8", "object_codec_id": None}, {"name": "c16"), np.dtype("c16", "object_codec_id": None}, {"name": " bool: if np.isnan(scalar1) and np.isnan(scalar2): # type: ignore[call-overload] return True return super().scalar_equals(scalar1, scalar2) hex_string_params: tuple[tuple[str, float], ...] = () def test_hex_encoding(self, hex_string_params: tuple[str, float]) -> None: """ Test that hexadecimal strings can be read as NaN values """ hex_string, expected = hex_string_params zdtype = self.test_cls() observed = zdtype.from_json_scalar(hex_string, zarr_format=3) assert self.scalar_equals(observed, expected) class TestFloat16(_BaseTestFloat): test_cls = Float16 valid_dtype = (np.dtype(">f2"), np.dtype("f2", "object_codec_id": None}, {"name": "f4"), np.dtype("f4", "object_codec_id": None}, {"name": "f8"), np.dtype("f8", "object_codec_id": None}, {"name": " None: """Test the check_json_floatish_str function.""" from zarr.core.dtype.npy.common import check_json_floatish_str # Test valid string floats assert check_json_floatish_str("3.14") assert check_json_floatish_str("0.0") assert check_json_floatish_str("-2.5") assert check_json_floatish_str("1.0") # Test invalid cases assert not check_json_floatish_str("not_a_number") assert not check_json_floatish_str("") assert not check_json_floatish_str(3.14) # actual float, not string assert not check_json_floatish_str(42) # int assert not check_json_floatish_str(None) # Test that special cases still work via float() conversion # (these will be handled by existing functions first in practice) assert check_json_floatish_str("NaN") assert check_json_floatish_str("Infinity") assert check_json_floatish_str("-Infinity") def test_string_float_from_json_scalar() -> None: """Test that string representations of floats can be parsed by from_json_scalar.""" # Test with Float32 dtype_instance = Float32() result = dtype_instance.from_json_scalar("3.14", zarr_format=3) assert abs(result - np.float32(3.14)) < 1e-6 assert isinstance(result, np.float32) # Test other cases result = dtype_instance.from_json_scalar("0.0", zarr_format=3) assert result == np.float32(0.0) result = dtype_instance.from_json_scalar("-2.5", zarr_format=3) assert result == np.float32(-2.5) # Test that it works for v2 format too result = dtype_instance.from_json_scalar("1.5", zarr_format=2) assert result == np.float32(1.5) zarr-python-3.1.5/tests/test_dtype/test_npy/test_int.py000066400000000000000000000221471511007055700234220ustar00rootroot00000000000000from __future__ import annotations import numpy as np from tests.test_dtype.test_wrapper import BaseTestZDType from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64 class TestInt8(BaseTestZDType): test_cls = Int8 scalar_type = np.int8 valid_dtype = (np.dtype(np.int8),) invalid_dtype = ( np.dtype(np.int16), np.dtype(np.uint16), np.dtype(np.float64), ) valid_json_v2 = ({"name": "|i1", "object_codec_id": None},) valid_json_v3 = ("int8",) invalid_json_v2 = ( ">i1", "int8", "|f8", ) invalid_json_v3 = ( "|i1", "|f8", {"name": "int8", "configuration": {"endianness": "little"}}, ) scalar_v2_params = ((Int8(), 1), (Int8(), -1), (Int8(), 1.0)) scalar_v3_params = ((Int8(), 1), (Int8(), -1)) cast_value_params = ( (Int8(), 1, np.int8(1)), (Int8(), -1, np.int8(-1)), ) invalid_scalar_params = ((Int8(), {"set!"}), (Int8(), ("tuple",))) item_size_params = (Int8(),) class TestInt16(BaseTestZDType): test_cls = Int16 scalar_type = np.int16 valid_dtype = (np.dtype(">i2"), np.dtype("i2", "object_codec_id": None}, {"name": "i4"), np.dtype("i4", "object_codec_id": None}, {"name": "i8"), np.dtype("i8", "object_codec_id": None}, {"name": "u2"), np.dtype("u2", "object_codec_id": None}, {"name": "u4"), np.dtype("u4", "object_codec_id": None}, {"name": "u8"), np.dtype("u8", "object_codec_id": None}, {"name": " None: """Test the check_json_intish_str function.""" from zarr.core.dtype.npy.common import check_json_intish_str # Test valid string integers assert check_json_intish_str("0") assert check_json_intish_str("42") assert check_json_intish_str("-5") assert check_json_intish_str("123") # Test invalid cases assert not check_json_intish_str("3.14") assert not check_json_intish_str("not_a_number") assert not check_json_intish_str("") assert not check_json_intish_str(42) # actual int, not string assert not check_json_intish_str(3.14) # float assert not check_json_intish_str(None) def test_string_integer_from_json_scalar() -> None: """Test that string representations of integers can be parsed by from_json_scalar.""" # Test the specific reproducer case dtype_instance = Int32() result = dtype_instance.from_json_scalar("0", zarr_format=3) assert result == np.int32(0) assert isinstance(result, np.int32) # Test other cases result = dtype_instance.from_json_scalar("42", zarr_format=3) assert result == np.int32(42) result = dtype_instance.from_json_scalar("-5", zarr_format=3) assert result == np.int32(-5) # Test that it works for v2 format too result = dtype_instance.from_json_scalar("123", zarr_format=2) assert result == np.int32(123) zarr-python-3.1.5/tests/test_dtype/test_npy/test_string.py000066400000000000000000000113261511007055700241330ustar00rootroot00000000000000from __future__ import annotations import numpy as np import pytest from tests.test_dtype.test_wrapper import BaseTestZDType from zarr.core.dtype import FixedLengthUTF32 from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING, VariableLengthUTF8 from zarr.errors import UnstableSpecificationWarning if _NUMPY_SUPPORTS_VLEN_STRING: class TestVariableLengthString(BaseTestZDType): test_cls = VariableLengthUTF8 # type: ignore[assignment] valid_dtype = (np.dtypes.StringDType(),) # type: ignore[assignment] invalid_dtype = ( np.dtype(np.int8), np.dtype(np.float64), np.dtype("|S10"), ) valid_json_v2 = ({"name": "|O", "object_codec_id": "vlen-utf8"},) valid_json_v3 = ("string",) invalid_json_v2 = ( "|S10", "|f8", "invalid", ) invalid_json_v3 = ( {"name": "variable_length_utf8", "configuration": {"invalid_key": "value"}}, {"name": "invalid_name"}, ) scalar_v2_params = ((VariableLengthUTF8(), ""), (VariableLengthUTF8(), "hi")) scalar_v3_params = ( (VariableLengthUTF8(), ""), (VariableLengthUTF8(), "hi"), ) cast_value_params = ( (VariableLengthUTF8(), "", np.str_("")), (VariableLengthUTF8(), "hi", np.str_("hi")), ) # anything can become a string invalid_scalar_params = (None,) item_size_params = (VariableLengthUTF8(),) else: class TestVariableLengthString(BaseTestZDType): # type: ignore[no-redef] test_cls = VariableLengthUTF8 # type: ignore[assignment] valid_dtype = (np.dtype("O"),) invalid_dtype = ( np.dtype(np.int8), np.dtype(np.float64), np.dtype("|S10"), ) valid_json_v2 = ({"name": "|O", "object_codec_id": "vlen-utf8"},) valid_json_v3 = ("string",) invalid_json_v2 = ( "|S10", "|f8", "invalid", ) invalid_json_v3 = ( {"name": "numpy.variable_length_utf8", "configuration": {"invalid_key": "value"}}, {"name": "invalid_name"}, ) scalar_v2_params = ((VariableLengthUTF8(), ""), (VariableLengthUTF8(), "hi")) scalar_v3_params = ( (VariableLengthUTF8(), ""), (VariableLengthUTF8(), "hi"), ) cast_value_params = ( (VariableLengthUTF8(), "", np.str_("")), (VariableLengthUTF8(), "hi", np.str_("hi")), ) # anything can become a string invalid_scalar_params = (None,) item_size_params = (VariableLengthUTF8(),) class TestFixedLengthUTF32(BaseTestZDType): test_cls = FixedLengthUTF32 valid_dtype = (np.dtype(">U10"), np.dtype("U10", "object_codec_id": None}, {"name": " None: """ Test that we get a warning when serializing a dtype without a zarr v3 spec to json when zarr_format is 3 """ with pytest.warns(UnstableSpecificationWarning): zdtype.to_json(zarr_format=3) def test_invalid_size() -> None: """ Test that it's impossible to create a data type that has no length """ length = 0 msg = f"length must be >= 1, got {length}." with pytest.raises(ValueError, match=msg): FixedLengthUTF32(length=length) zarr-python-3.1.5/tests/test_dtype/test_npy/test_structured.py000066400000000000000000000075411511007055700250350ustar00rootroot00000000000000from __future__ import annotations from typing import Any import numpy as np import pytest from tests.test_dtype.test_wrapper import BaseTestZDType from zarr.core.dtype import ( Float16, Float64, Int32, Int64, Structured, ) class TestStructured(BaseTestZDType): test_cls = Structured valid_dtype = ( np.dtype([("field1", np.int32), ("field2", np.float64)]), np.dtype([("field1", np.int64), ("field2", np.int32)]), ) invalid_dtype = ( np.dtype(np.int8), np.dtype(np.float64), np.dtype("|S10"), ) valid_json_v2 = ( {"name": [["field1", ">i4"], ["field2", ">f8"]], "object_codec_id": None}, {"name": [["field1", ">i8"], ["field2", ">i4"]], "object_codec_id": None}, ) valid_json_v3 = ( { "name": "structured", "configuration": { "fields": [ ["field1", "int32"], ["field2", "float64"], ] }, }, { "name": "structured", "configuration": { "fields": [ [ "field1", { "name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 1}, }, ], [ "field2", {"name": "fixed_length_utf32", "configuration": {"length_bytes": 32}}, ], ] }, }, ) invalid_json_v2 = ( [("field1", "|i1"), ("field2", "|f8")], [("field1", "|S10"), ("field2", "|f8")], ) invalid_json_v3 = ( { "name": "structured", "configuration": { "fields": [ ("field1", {"name": "int32", "configuration": {"endianness": "invalid"}}), ("field2", {"name": "float64", "configuration": {"endianness": "big"}}), ] }, }, {"name": "invalid_name"}, ) scalar_v2_params = ( (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"), (Structured(fields=(("field1", Float16()), ("field2", Int32()))), "AQAAAAAA"), ) scalar_v3_params = ( (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"), (Structured(fields=(("field1", Int64()), ("field2", Int32()))), "AQAAAAAAAAAAAPA/"), ) cast_value_params = ( ( Structured(fields=(("field1", Int32()), ("field2", Float64()))), (1, 2.0), np.array((1, 2.0), dtype=[("field1", np.int32), ("field2", np.float64)]), ), ( Structured(fields=(("field1", Int64()), ("field2", Int32()))), (3, 4.5), np.array((3, 4.5), dtype=[("field1", np.int64), ("field2", np.int32)]), ), ) item_size_params = ( Structured(fields=(("field1", Int32()), ("field2", Float64()))), Structured(fields=(("field1", Int64()), ("field2", Int32()))), ) invalid_scalar_params = ( (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "i am a string"), (Structured(fields=(("field1", Int32()), ("field2", Float64()))), {"type": "dict"}), ) def scalar_equals(self, scalar1: Any, scalar2: Any) -> bool: if hasattr(scalar1, "shape") and hasattr(scalar2, "shape"): return np.array_equal(scalar1, scalar2) return super().scalar_equals(scalar1, scalar2) def test_invalid_size() -> None: """ Test that it's impossible to create a data type that has no fields """ fields = () msg = f"must have at least one field. Got {fields!r}" with pytest.raises(ValueError, match=msg): Structured(fields=fields) zarr-python-3.1.5/tests/test_dtype/test_npy/test_time.py000066400000000000000000000137411511007055700235660ustar00rootroot00000000000000from __future__ import annotations import re from typing import get_args import numpy as np import pytest from tests.test_dtype.test_wrapper import BaseTestZDType from zarr.core.dtype.npy.common import DateTimeUnit from zarr.core.dtype.npy.time import DateTime64, TimeDelta64, datetime_from_int class _TestTimeBase(BaseTestZDType): def json_scalar_equals(self, scalar1: object, scalar2: object) -> bool: # This method gets overridden here to support the equivalency between NaT and # -9223372036854775808 fill values nat_scalars = (-9223372036854775808, "NaT") if scalar1 in nat_scalars and scalar2 in nat_scalars: return True return scalar1 == scalar2 def scalar_equals(self, scalar1: object, scalar2: object) -> bool: if np.isnan(scalar1) and np.isnan(scalar2): # type: ignore[call-overload] return True return super().scalar_equals(scalar1, scalar2) class TestDateTime64(_TestTimeBase): test_cls = DateTime64 valid_dtype = (np.dtype("datetime64[10ns]"), np.dtype("datetime64[us]"), np.dtype("datetime64")) invalid_dtype = ( np.dtype(np.int8), np.dtype(np.float64), np.dtype("timedelta64[ns]"), ) valid_json_v2 = ( {"name": ">M8", "object_codec_id": None}, {"name": ">M8[s]", "object_codec_id": None}, {"name": "m8", "object_codec_id": None}, {"name": ">m8[s]", "object_codec_id": None}, {"name": " None: """ Test that an invalid unit raises a ValueError. """ unit = "invalid" msg = f"unit must be one of ('Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'μs', 'ns', 'ps', 'fs', 'as', 'generic'), got {unit!r}." with pytest.raises(ValueError, match=re.escape(msg)): DateTime64(unit=unit) # type: ignore[arg-type] with pytest.raises(ValueError, match=re.escape(msg)): TimeDelta64(unit=unit) # type: ignore[arg-type] def test_time_scale_factor_too_low() -> None: """ Test that an invalid unit raises a ValueError. """ scale_factor = 0 msg = f"scale_factor must be > 0, got {scale_factor}." with pytest.raises(ValueError, match=msg): DateTime64(scale_factor=scale_factor) with pytest.raises(ValueError, match=msg): TimeDelta64(scale_factor=scale_factor) def test_time_scale_factor_too_high() -> None: """ Test that an invalid unit raises a ValueError. """ scale_factor = 2**31 msg = f"scale_factor must be < 2147483648, got {scale_factor}." with pytest.raises(ValueError, match=msg): DateTime64(scale_factor=scale_factor) with pytest.raises(ValueError, match=msg): TimeDelta64(scale_factor=scale_factor) @pytest.mark.parametrize("unit", get_args(DateTimeUnit)) @pytest.mark.parametrize("scale_factor", [1, 10]) @pytest.mark.parametrize("value", [0, 1, 10]) def test_datetime_from_int(unit: DateTimeUnit, scale_factor: int, value: int) -> None: """ Test datetime_from_int. """ expected = np.int64(value).view(f"datetime64[{scale_factor}{unit}]") assert datetime_from_int(value, unit=unit, scale_factor=scale_factor) == expected zarr-python-3.1.5/tests/test_dtype/test_wrapper.py000066400000000000000000000176671511007055700224560ustar00rootroot00000000000000from __future__ import annotations import re from typing import TYPE_CHECKING, Any, ClassVar import pytest from zarr.core.dtype.common import DTypeSpec_V2, DTypeSpec_V3, HasItemSize if TYPE_CHECKING: from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType """ class _TestZDTypeSchema: # subclasses define the URL for the schema, if available schema_url: ClassVar[str] = "" @pytest.fixture(scope="class") def get_schema(self) -> object: response = requests.get(self.schema_url) response.raise_for_status() return json_schema.loads(response.text) def test_schema(self, schema: json_schema.Schema) -> None: assert schema.is_valid(self.test_cls.to_json(zarr_format=2)) """ class BaseTestZDType: """ A base class for testing ZDType subclasses. This class works in conjunction with the custom pytest collection function ``pytest_generate_tests`` defined in conftest.py, which applies the following procedure when generating tests: At test generation time, for each test fixture referenced by a method on this class pytest will look for an attribute with the same name as that fixture. Pytest will assume that this class attribute is a tuple of values to be used for generating a parametrized test fixture. This means that child classes can, by using different values for these class attributes, have customized test parametrization. Attributes ---------- test_cls : type[ZDType[TBaseDType, TBaseScalar]] The ZDType subclass being tested. scalar_type : ClassVar[type[TBaseScalar]] The expected scalar type for the ZDType. valid_dtype : ClassVar[tuple[TBaseDType, ...]] A tuple of valid numpy dtypes for the ZDType. invalid_dtype : ClassVar[tuple[TBaseDType, ...]] A tuple of invalid numpy dtypes for the ZDType. valid_json_v2 : ClassVar[tuple[str | dict[str, object] | list[object], ...]] A tuple of valid JSON representations for Zarr format version 2. invalid_json_v2 : ClassVar[tuple[str | dict[str, object] | list[object], ...]] A tuple of invalid JSON representations for Zarr format version 2. valid_json_v3 : ClassVar[tuple[str | dict[str, object], ...]] A tuple of valid JSON representations for Zarr format version 3. invalid_json_v3 : ClassVar[tuple[str | dict[str, object], ...]] A tuple of invalid JSON representations for Zarr format version 3. cast_value_params : ClassVar[tuple[tuple[Any, Any, Any], ...]] A tuple of (dtype, value, expected) tuples for testing ZDType.cast_value. scalar_v2_params : ClassVar[tuple[Any, ...]] A tuple of (dtype, scalar json) tuples for testing ZDType.from_json_scalar / ZDType.to_json_scalar for zarr v2 scalar_v3_params : ClassVar[tuple[Any, ...]] A tuple of (dtype, scalar json) tuples for testing ZDType.from_json_scalar / ZDType.to_json_scalar for zarr v3 invalid_scalar_params : ClassVar[tuple[Any, ...]] A tuple of (dtype, value) tuples, where each value is expected to fail ZDType.cast_value. item_size_params : ClassVar[tuple[Any, ...]] A tuple of (dtype, expected) tuples for testing ZDType.item_size """ test_cls: type[ZDType[TBaseDType, TBaseScalar]] scalar_type: ClassVar[type[TBaseScalar]] valid_dtype: ClassVar[tuple[TBaseDType, ...]] = () invalid_dtype: ClassVar[tuple[TBaseDType, ...]] = () valid_json_v2: ClassVar[tuple[DTypeSpec_V2, ...]] = () invalid_json_v2: ClassVar[tuple[str | dict[str, object] | list[object], ...]] = () valid_json_v3: ClassVar[tuple[DTypeSpec_V3, ...]] = () invalid_json_v3: ClassVar[tuple[str | dict[str, object], ...]] = () # for testing scalar round-trip serialization, we need a tuple of (data type json, scalar json) # pairs. the first element of the pair is used to create a dtype instance, and the second # element is the json serialization of the scalar that we want to round-trip. scalar_v2_params: ClassVar[tuple[tuple[ZDType[Any, Any], Any], ...]] = () scalar_v3_params: ClassVar[tuple[tuple[Any, Any], ...]] = () cast_value_params: ClassVar[tuple[tuple[ZDType[Any, Any], Any, Any], ...]] = () # Some data types, like bool and string, can consume any python object as a scalar. # So we allow passing None in to this test to indicate that it should be skipped. invalid_scalar_params: ClassVar[tuple[tuple[ZDType[Any, Any], Any], ...] | tuple[None]] = () item_size_params: ClassVar[tuple[ZDType[Any, Any], ...]] = () def json_scalar_equals(self, scalar1: object, scalar2: object) -> bool: # An equality check for json-encoded scalars. This defaults to regular equality, # but some classes may need to override this for special cases return scalar1 == scalar2 def scalar_equals(self, scalar1: object, scalar2: object) -> bool: # An equality check for scalars. This defaults to regular equality, # but some classes may need to override this for special cases return scalar1 == scalar2 def test_check_dtype_valid(self, valid_dtype: TBaseDType) -> None: assert self.test_cls._check_native_dtype(valid_dtype) def test_check_dtype_invalid(self, invalid_dtype: object) -> None: assert not self.test_cls._check_native_dtype(invalid_dtype) # type: ignore[arg-type] def test_from_dtype_roundtrip(self, valid_dtype: Any) -> None: zdtype = self.test_cls.from_native_dtype(valid_dtype) assert zdtype.to_native_dtype() == valid_dtype def test_from_json_roundtrip_v2(self, valid_json_v2: DTypeSpec_V2) -> None: zdtype = self.test_cls.from_json(valid_json_v2, zarr_format=2) assert zdtype.to_json(zarr_format=2) == valid_json_v2 @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") def test_from_json_roundtrip_v3(self, valid_json_v3: DTypeSpec_V3) -> None: zdtype = self.test_cls.from_json(valid_json_v3, zarr_format=3) assert zdtype.to_json(zarr_format=3) == valid_json_v3 def test_scalar_roundtrip_v2(self, scalar_v2_params: tuple[ZDType[Any, Any], Any]) -> None: zdtype, scalar_json = scalar_v2_params scalar = zdtype.from_json_scalar(scalar_json, zarr_format=2) assert self.json_scalar_equals(scalar_json, zdtype.to_json_scalar(scalar, zarr_format=2)) def test_scalar_roundtrip_v3(self, scalar_v3_params: tuple[ZDType[Any, Any], Any]) -> None: zdtype, scalar_json = scalar_v3_params scalar = zdtype.from_json_scalar(scalar_json, zarr_format=3) assert self.json_scalar_equals(scalar_json, zdtype.to_json_scalar(scalar, zarr_format=3)) def test_cast_value(self, cast_value_params: tuple[ZDType[Any, Any], Any, Any]) -> None: zdtype, value, expected = cast_value_params observed = zdtype.cast_scalar(value) assert self.scalar_equals(expected, observed) # check that casting is idempotent assert self.scalar_equals(zdtype.cast_scalar(observed), observed) def test_invalid_scalar( self, invalid_scalar_params: tuple[ZDType[Any, Any], Any] | None ) -> None: if invalid_scalar_params is None: pytest.skip(f"No test data provided for {self}.{__name__}") zdtype, data = invalid_scalar_params msg = ( f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the " f"data type {zdtype}." ) with pytest.raises(TypeError, match=re.escape(msg)): zdtype.cast_scalar(data) def test_item_size(self, item_size_params: ZDType[Any, Any]) -> None: """ Test that the item_size attribute matches the numpy dtype itemsize attribute, for dtypes with a fixed scalar size. """ if isinstance(item_size_params, HasItemSize): assert item_size_params.item_size == item_size_params.to_native_dtype().itemsize else: pytest.skip(f"Data type {item_size_params} does not implement HasItemSize") zarr-python-3.1.5/tests/test_dtype_registry.py000066400000000000000000000200071511007055700216450ustar00rootroot00000000000000from __future__ import annotations import re from typing import TYPE_CHECKING, Any, Literal, get_args import numpy as np import pytest from tests.conftest import skip_object_dtype from zarr.core.dtype import ( AnyDType, DataTypeRegistry, TBaseDType, TBaseScalar, get_data_type_from_json, ) from zarr.core.dtype.common import unpack_dtype_json from zarr.dtype import ( # type: ignore[attr-defined] Bool, FixedLengthUTF32, ZDType, data_type_registry, parse_data_type, parse_dtype, ) if TYPE_CHECKING: from zarr.core.common import ZarrFormat from .test_dtype.conftest import zdtype_examples @pytest.fixture def data_type_registry_fixture() -> DataTypeRegistry: return DataTypeRegistry() class TestRegistry: @staticmethod def test_register(data_type_registry_fixture: DataTypeRegistry) -> None: """ Test that registering a dtype in a data type registry works. """ data_type_registry_fixture.register(Bool._zarr_v3_name, Bool) assert data_type_registry_fixture.get(Bool._zarr_v3_name) == Bool assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), Bool) @staticmethod def test_override(data_type_registry_fixture: DataTypeRegistry) -> None: """ Test that registering a new dtype with the same name works (overriding the previous one). """ data_type_registry_fixture.register(Bool._zarr_v3_name, Bool) class NewBool(Bool): def default_scalar(self) -> np.bool_: return np.True_ data_type_registry_fixture.register(NewBool._zarr_v3_name, NewBool) assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), NewBool) @staticmethod @pytest.mark.parametrize( ("wrapper_cls", "dtype_str"), [(Bool, "bool"), (FixedLengthUTF32, "|U4")] ) def test_match_dtype( data_type_registry_fixture: DataTypeRegistry, wrapper_cls: type[ZDType[TBaseDType, TBaseScalar]], dtype_str: str, ) -> None: """ Test that match_dtype resolves a numpy dtype into an instance of the correspond wrapper for that dtype. """ data_type_registry_fixture.register(wrapper_cls._zarr_v3_name, wrapper_cls) assert isinstance(data_type_registry_fixture.match_dtype(np.dtype(dtype_str)), wrapper_cls) @staticmethod def test_unregistered_dtype(data_type_registry_fixture: DataTypeRegistry) -> None: """ Test that match_dtype raises an error if the dtype is not registered. """ outside_dtype_name = "int8" outside_dtype = np.dtype(outside_dtype_name) msg = f"No Zarr data type found that matches dtype '{outside_dtype!r}'" with pytest.raises(ValueError, match=re.escape(msg)): data_type_registry_fixture.match_dtype(outside_dtype) with pytest.raises(KeyError): data_type_registry_fixture.get(outside_dtype_name) @staticmethod @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("zdtype", zdtype_examples) def test_registered_dtypes_match_dtype(zdtype: ZDType[TBaseDType, TBaseScalar]) -> None: """ Test that the registered dtypes can be retrieved from the registry. """ skip_object_dtype(zdtype) assert data_type_registry.match_dtype(zdtype.to_native_dtype()) == zdtype @staticmethod @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("zdtype", zdtype_examples) def test_registered_dtypes_match_json( zdtype: ZDType[TBaseDType, TBaseScalar], zarr_format: ZarrFormat ) -> None: assert ( data_type_registry.match_json( zdtype.to_json(zarr_format=zarr_format), zarr_format=zarr_format ) == zdtype ) @staticmethod @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("zdtype", zdtype_examples) def test_match_dtype_unique( zdtype: ZDType[Any, Any], data_type_registry_fixture: DataTypeRegistry, zarr_format: ZarrFormat, ) -> None: """ Test that the match_dtype method uniquely specifies a registered data type. We create a local registry that excludes the data type class being tested, and ensure that an instance of the wrapped data type fails to match anything in the registry """ skip_object_dtype(zdtype) for _cls in get_args(AnyDType): if _cls is not type(zdtype): data_type_registry_fixture.register(_cls._zarr_v3_name, _cls) dtype_instance = zdtype.to_native_dtype() msg = f"No Zarr data type found that matches dtype '{dtype_instance!r}'" with pytest.raises(ValueError, match=re.escape(msg)): data_type_registry_fixture.match_dtype(dtype_instance) instance_dict = zdtype.to_json(zarr_format=zarr_format) msg = f"No Zarr data type found that matches {instance_dict!r}" with pytest.raises(ValueError, match=re.escape(msg)): data_type_registry_fixture.match_json(instance_dict, zarr_format=zarr_format) @pytest.mark.usefixtures("set_path") def test_entrypoint_dtype(zarr_format: ZarrFormat) -> None: from package_with_entrypoint import TestDataType data_type_registry._lazy_load() instance = TestDataType() dtype_json = instance.to_json(zarr_format=zarr_format) assert get_data_type_from_json(dtype_json, zarr_format=zarr_format) == instance data_type_registry.unregister(TestDataType._zarr_v3_name) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("data_type", zdtype_examples, ids=str) @pytest.mark.parametrize("json_style", [(2, "internal"), (2, "metadata"), (3, None)], ids=str) @pytest.mark.parametrize( "dtype_parser_func", [parse_dtype, parse_data_type], ids=["parse_dtype", "parse_data_type"] ) def test_parse_data_type( data_type: ZDType[Any, Any], json_style: tuple[ZarrFormat, None | Literal["internal", "metadata"]], dtype_parser_func: Any, ) -> None: """ Test the parsing of data types into ZDType instances. This function tests the ability of `dtype_parser_func` to correctly interpret and parse data type specifications into `ZDType` instances according to the specified Zarr format and JSON style. Parameters ---------- data_type : ZDType[Any, Any] The data type to be tested for parsing. json_style : tuple[ZarrFormat, None or Literal["internal", "metadata"]] A tuple specifying the Zarr format version and the JSON style for Zarr V2 2. For Zarr V2 there are 2 JSON styles: "internal", and "metadata". The internal style takes the form {"name": , "object_codec_id": }, while the metadata style is just . dtype_parser_func : Any The function to be tested for parsing the data type. This is necessary for compatibility reasons, as we support multiple functions that perform the same data type parsing operation. """ zarr_format, style = json_style dtype_spec: Any if zarr_format == 2: dtype_spec = data_type.to_json(zarr_format=zarr_format) if style == "internal": pass elif style == "metadata": dtype_spec = unpack_dtype_json(dtype_spec) else: raise ValueError(f"Invalid zarr v2 json style: {style}") else: dtype_spec = data_type.to_json(zarr_format=zarr_format) if dtype_spec == "|O": # The object data type on its own is ambiguous and should fail to resolve. msg = "Zarr data type resolution from object failed." with pytest.raises(ValueError, match=msg): dtype_parser_func(dtype_spec, zarr_format=zarr_format) else: observed = dtype_parser_func(dtype_spec, zarr_format=zarr_format) assert observed == data_type zarr-python-3.1.5/tests/test_errors.py000066400000000000000000000051101511007055700201020ustar00rootroot00000000000000"""Test errors""" from zarr.errors import ( ArrayNotFoundError, ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError, GroupNotFoundError, MetadataValidationError, NodeTypeValidationError, ) def test_group_not_found_error() -> None: """ Test that calling GroupNotFoundError with multiple arguments returns a formatted string. This is deprecated behavior. """ err = GroupNotFoundError("store", "path") assert str(err) == "No group found in store 'store' at path 'path'" def test_array_not_found_error() -> None: """ Test that calling ArrayNotFoundError with multiple arguments returns a formatted string. This is deprecated behavior. """ err = ArrayNotFoundError("store", "path") assert str(err) == "No array found in store 'store' at path 'path'" def test_metadata_validation_error() -> None: """ Test that calling MetadataValidationError with multiple arguments returns a formatted string. This is deprecated behavior. """ err = MetadataValidationError("a", "b", "c") assert str(err) == "Invalid value for 'a'. Expected 'b'. Got 'c'." def test_contains_group_error() -> None: """ Test that calling ContainsGroupError with multiple arguments returns a formatted string. This is deprecated behavior. """ err = ContainsGroupError("store", "path") assert str(err) == "A group exists in store 'store' at path 'path'." def test_contains_array_error() -> None: """ Test that calling ContainsArrayError with multiple arguments returns a formatted string. This is deprecated behavior. """ err = ContainsArrayError("store", "path") assert str(err) == "An array exists in store 'store' at path 'path'." def test_contains_array_and_group_error() -> None: """ Test that calling ContainsArrayAndGroupError with multiple arguments returns a formatted string. This is deprecated behavior. """ err = ContainsArrayAndGroupError("store", "path") assert str(err) == ( "Array and group metadata documents (.zarray and .zgroup) were both found in store 'store' " "at path 'path'. Only one of these files may be present in a given directory / prefix. " "Remove the .zarray file, or the .zgroup file, or both." ) def test_node_type_validation_error() -> None: """ Test that calling NodeTypeValidationError with multiple arguments returns a formatted string. This is deprecated behavior. """ err = NodeTypeValidationError("a", "b", "c") assert str(err) == "Invalid value for 'a'. Expected 'b'. Got 'c'." zarr-python-3.1.5/tests/test_examples.py000066400000000000000000000055131511007055700204130ustar00rootroot00000000000000from __future__ import annotations import re import subprocess import sys from pathlib import Path from typing import Final import pytest import tomlkit from packaging.requirements import Requirement examples_dir = "examples" script_paths = Path(examples_dir).glob("*.py") PEP_723_REGEX: Final = r"(?m)^# /// (?P[a-zA-Z0-9-]+)$\s(?P(^#(| .*)$\s)+)^# ///$" # This is the absolute path to the local Zarr installation. Moving this test to a different directory will break it. ZARR_PROJECT_PATH = Path(".").absolute() def set_dep(script: str, dependency: str) -> str: """ Set a dependency in a PEP-723 script header. If the package is already in the list, it will be replaced. If the package is not already in the list, it will be added. Source code modified from https://packaging.python.org/en/latest/specifications/inline-script-metadata/#reference-implementation """ match = re.search(PEP_723_REGEX, script) if match is None: raise ValueError(f"PEP-723 header not found in {script}") content = "".join( line[2:] if line.startswith("# ") else line[1:] for line in match.group("content").splitlines(keepends=True) ) config = tomlkit.parse(content) for idx, dep in enumerate(tuple(config["dependencies"])): if Requirement(dep).name == Requirement(dependency).name: config["dependencies"][idx] = dependency new_content = "".join( f"# {line}" if line.strip() else f"#{line}" for line in tomlkit.dumps(config).splitlines(keepends=True) ) start, end = match.span("content") return script[:start] + new_content + script[end:] def resave_script(source_path: Path, dest_path: Path) -> None: """ Read a script from source_path and save it to dest_path after inserting the absolute path to the local Zarr project directory in the PEP-723 header. """ source_text = source_path.read_text() dest_text = set_dep(source_text, f"zarr @ file:///{ZARR_PROJECT_PATH}") dest_path.write_text(dest_text) @pytest.mark.skipif( sys.platform in ("win32",), reason="This test fails due for unknown reasons on Windows in CI." ) @pytest.mark.parametrize("script_path", script_paths) def test_scripts_can_run(script_path: Path, tmp_path: Path) -> None: dest_path = tmp_path / script_path.name # We resave the script after inserting the absolute path to the local Zarr project directory, # and then test its behavior. # This allows the example to be useful to users who don't have Zarr installed, but also testable. resave_script(script_path, dest_path) result = subprocess.run( ["uv", "run", "--refresh", str(dest_path)], capture_output=True, text=True ) assert result.returncode == 0, ( f"Script at {script_path} failed to run. Output: {result.stdout} Error: {result.stderr}" ) zarr-python-3.1.5/tests/test_experimental/000077500000000000000000000000001511007055700207145ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_experimental/test_cache_store.py000066400000000000000000001027411511007055700246110ustar00rootroot00000000000000""" Tests for the dual-store cache implementation. """ import asyncio import time import pytest from zarr.abc.store import Store from zarr.core.buffer.core import default_buffer_prototype from zarr.core.buffer.cpu import Buffer as CPUBuffer from zarr.experimental.cache_store import CacheStore from zarr.storage import MemoryStore class TestCacheStore: """Test the dual-store cache implementation.""" @pytest.fixture def source_store(self) -> MemoryStore: """Create a source store with some test data.""" return MemoryStore() @pytest.fixture def cache_store(self) -> MemoryStore: """Create an empty cache store.""" return MemoryStore() @pytest.fixture def cached_store(self, source_store: Store, cache_store: Store) -> CacheStore: """Create a cached store instance.""" return CacheStore(source_store, cache_store=cache_store, key_insert_times={}) async def test_basic_caching(self, cached_store: CacheStore, source_store: Store) -> None: """Test basic cache functionality.""" # Store some data test_data = CPUBuffer.from_bytes(b"test data") await cached_store.set("test_key", test_data) # Verify it's in both stores assert await source_store.exists("test_key") assert await cached_store._cache.exists("test_key") # Retrieve and verify caching works result = await cached_store.get("test_key", default_buffer_prototype()) assert result is not None assert result.to_bytes() == b"test data" async def test_cache_miss_and_population( self, cached_store: CacheStore, source_store: Store ) -> None: """Test cache miss and subsequent population.""" # Put data directly in source store (bypassing cache) test_data = CPUBuffer.from_bytes(b"source data") await source_store.set("source_key", test_data) # First access should miss cache but populate it result = await cached_store.get("source_key", default_buffer_prototype()) assert result is not None assert result.to_bytes() == b"source data" # Verify data is now in cache assert await cached_store._cache.exists("source_key") async def test_cache_expiration(self) -> None: """Test cache expiration based on max_age_seconds.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( source_store, cache_store=cache_store, max_age_seconds=1, # 1 second expiration key_insert_times={}, ) # Store data test_data = CPUBuffer.from_bytes(b"expiring data") await cached_store.set("expire_key", test_data) # Should be fresh initially (if _is_key_fresh method exists) if hasattr(cached_store, "_is_key_fresh"): assert cached_store._is_key_fresh("expire_key") # Wait for expiration await asyncio.sleep(1.1) # Should now be stale assert not cached_store._is_key_fresh("expire_key") else: # Skip freshness check if method doesn't exist await asyncio.sleep(1.1) # Just verify the data is still accessible result = await cached_store.get("expire_key", default_buffer_prototype()) assert result is not None async def test_cache_set_data_false(self, source_store: Store, cache_store: Store) -> None: """Test behavior when cache_set_data=False.""" cached_store = CacheStore( source_store, cache_store=cache_store, cache_set_data=False, key_insert_times={} ) test_data = CPUBuffer.from_bytes(b"no cache data") await cached_store.set("no_cache_key", test_data) # Data should be in source but not cache assert await source_store.exists("no_cache_key") assert not await cache_store.exists("no_cache_key") async def test_delete_removes_from_both_stores(self, cached_store: CacheStore) -> None: """Test that delete removes from both source and cache.""" test_data = CPUBuffer.from_bytes(b"delete me") await cached_store.set("delete_key", test_data) # Verify in both stores assert await cached_store._store.exists("delete_key") assert await cached_store._cache.exists("delete_key") # Delete await cached_store.delete("delete_key") # Verify removed from both assert not await cached_store._store.exists("delete_key") assert not await cached_store._cache.exists("delete_key") async def test_exists_checks_source_store( self, cached_store: CacheStore, source_store: Store ) -> None: """Test that exists() checks the source store (source of truth).""" # Put data directly in source test_data = CPUBuffer.from_bytes(b"exists test") await source_store.set("exists_key", test_data) # Should exist even though not in cache assert await cached_store.exists("exists_key") async def test_list_operations(self, cached_store: CacheStore, source_store: Store) -> None: """Test listing operations delegate to source store.""" # Add some test data test_data = CPUBuffer.from_bytes(b"list test") await cached_store.set("list/item1", test_data) await cached_store.set("list/item2", test_data) await cached_store.set("other/item3", test_data) # Test list_dir list_items = [key async for key in cached_store.list_dir("list/")] assert len(list_items) >= 2 # Should include our items # Test list_prefix prefix_items = [key async for key in cached_store.list_prefix("list/")] assert len(prefix_items) >= 2 async def test_stale_cache_refresh(self) -> None: """Test that stale cache entries are refreshed from source.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( source_store, cache_store=cache_store, max_age_seconds=1, key_insert_times={} ) # Store initial data old_data = CPUBuffer.from_bytes(b"old data") await cached_store.set("refresh_key", old_data) # Wait for expiration await asyncio.sleep(1.1) # Update source store directly (simulating external update) new_data = CPUBuffer.from_bytes(b"new data") await source_store.set("refresh_key", new_data) # Access should refresh from source when cache is stale result = await cached_store.get("refresh_key", default_buffer_prototype()) assert result is not None assert result.to_bytes() == b"new data" async def test_infinity_max_age(self, cached_store: CacheStore) -> None: """Test that 'infinity' max_age means cache never expires.""" # Skip test if _is_key_fresh method doesn't exist if not hasattr(cached_store, "_is_key_fresh"): pytest.skip("_is_key_fresh method not implemented") test_data = CPUBuffer.from_bytes(b"eternal data") await cached_store.set("eternal_key", test_data) # Should always be fresh assert cached_store._is_key_fresh("eternal_key") # Even after time passes await asyncio.sleep(0.1) assert cached_store._is_key_fresh("eternal_key") async def test_cache_returns_cached_data_for_performance( self, cached_store: CacheStore, source_store: Store ) -> None: """Test that cache returns cached data for performance, even if not in source.""" # Skip test if key_insert_times attribute doesn't exist if not hasattr(cached_store, "key_insert_times"): pytest.skip("key_insert_times attribute not implemented") # Put data in cache but not source (simulates orphaned cache entry) test_data = CPUBuffer.from_bytes(b"orphaned data") await cached_store._cache.set("orphan_key", test_data) cached_store.key_insert_times["orphan_key"] = time.monotonic() # Cache should return data for performance (no source verification) result = await cached_store.get("orphan_key", default_buffer_prototype()) assert result is not None assert result.to_bytes() == b"orphaned data" # Cache entry should remain (performance optimization) assert await cached_store._cache.exists("orphan_key") assert "orphan_key" in cached_store.key_insert_times async def test_cache_coherency_through_expiration(self) -> None: """Test that cache coherency is managed through cache expiration, not source verification.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( source_store, cache_store=cache_store, max_age_seconds=1, # Short expiration for coherency ) # Add data to both stores test_data = CPUBuffer.from_bytes(b"original data") await cached_store.set("coherency_key", test_data) # Remove from source (simulating external deletion) await source_store.delete("coherency_key") # Cache should still return cached data (performance optimization) result = await cached_store.get("coherency_key", default_buffer_prototype()) assert result is not None assert result.to_bytes() == b"original data" # Wait for cache expiration await asyncio.sleep(1.1) # Now stale cache should be refreshed from source result = await cached_store.get("coherency_key", default_buffer_prototype()) assert result is None # Key no longer exists in source async def test_cache_info(self, cached_store: CacheStore) -> None: """Test cache_info method returns correct information.""" # Test initial state info = cached_store.cache_info() # Check all expected keys are present expected_keys = { "cache_store_type", "max_age_seconds", "max_size", "current_size", "cache_set_data", "tracked_keys", "cached_keys", } assert set(info.keys()) == expected_keys # Check initial values assert info["cache_store_type"] == "MemoryStore" assert info["max_age_seconds"] == "infinity" assert info["max_size"] is None # Default unlimited assert info["current_size"] == 0 assert info["cache_set_data"] is True assert info["tracked_keys"] == 0 assert info["cached_keys"] == 0 # Add some data and verify tracking test_data = CPUBuffer.from_bytes(b"test data for cache info") await cached_store.set("info_test_key", test_data) # Check updated info updated_info = cached_store.cache_info() assert updated_info["tracked_keys"] == 1 assert updated_info["cached_keys"] == 1 assert updated_info["current_size"] > 0 # Should have some size now async def test_cache_info_with_max_size(self) -> None: """Test cache_info with max_size configuration.""" source_store = MemoryStore() cache_store = MemoryStore() # Create cache with specific max_size and max_age cached_store = CacheStore( source_store, cache_store=cache_store, max_size=1024, max_age_seconds=300, key_insert_times={}, ) info = cached_store.cache_info() assert info["max_size"] == 1024 assert info["max_age_seconds"] == 300 assert info["current_size"] == 0 async def test_clear_cache(self, cached_store: CacheStore) -> None: """Test clear_cache method clears all cache data and tracking.""" # Add some test data test_data1 = CPUBuffer.from_bytes(b"test data 1") test_data2 = CPUBuffer.from_bytes(b"test data 2") await cached_store.set("clear_test_1", test_data1) await cached_store.set("clear_test_2", test_data2) # Verify data is cached info_before = cached_store.cache_info() assert info_before["tracked_keys"] == 2 assert info_before["cached_keys"] == 2 assert info_before["current_size"] > 0 # Verify data exists in cache assert await cached_store._cache.exists("clear_test_1") assert await cached_store._cache.exists("clear_test_2") # Clear the cache await cached_store.clear_cache() # Verify cache is cleared info_after = cached_store.cache_info() assert info_after["tracked_keys"] == 0 assert info_after["cached_keys"] == 0 assert info_after["current_size"] == 0 # Verify data is removed from cache store (if it supports clear) if hasattr(cached_store._cache, "clear"): # If cache store supports clear, all data should be gone assert not await cached_store._cache.exists("clear_test_1") assert not await cached_store._cache.exists("clear_test_2") # Verify data still exists in source store assert await cached_store._store.exists("clear_test_1") assert await cached_store._store.exists("clear_test_2") async def test_max_age_infinity(self) -> None: """Test cache with infinite max age.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, max_age_seconds="infinity") # Add data and verify it never expires test_data = CPUBuffer.from_bytes(b"test data") await cached_store.set("test_key", test_data) # Even after time passes, key should be fresh assert cached_store._is_key_fresh("test_key") async def test_max_age_numeric(self) -> None: """Test cache with numeric max age.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( source_store, cache_store=cache_store, max_age_seconds=1, # 1 second ) # Add data test_data = CPUBuffer.from_bytes(b"test data") await cached_store.set("test_key", test_data) # Key should be fresh initially assert cached_store._is_key_fresh("test_key") # Manually set old timestamp to test expiration cached_store.key_insert_times["test_key"] = time.monotonic() - 2 # 2 seconds ago # Key should now be stale assert not cached_store._is_key_fresh("test_key") async def test_cache_set_data_disabled(self) -> None: """Test cache behavior when cache_set_data is False.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, cache_set_data=False) # Set data test_data = CPUBuffer.from_bytes(b"test data") await cached_store.set("test_key", test_data) # Data should be in source but not in cache assert await source_store.exists("test_key") assert not await cache_store.exists("test_key") # Cache info should show no cached data info = cached_store.cache_info() assert info["cache_set_data"] is False assert info["cached_keys"] == 0 async def test_eviction_with_max_size(self) -> None: """Test LRU eviction when max_size is exceeded.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( source_store, cache_store=cache_store, max_size=100, # Small cache size ) # Add data that exceeds cache size small_data = CPUBuffer.from_bytes(b"a" * 40) # 40 bytes medium_data = CPUBuffer.from_bytes(b"b" * 40) # 40 bytes large_data = CPUBuffer.from_bytes(b"c" * 40) # 40 bytes (would exceed 100 byte limit) # Set first two items await cached_store.set("key1", small_data) await cached_store.set("key2", medium_data) # Cache should have 2 items info = cached_store.cache_info() assert info["cached_keys"] == 2 assert info["current_size"] == 80 # Add third item - should trigger eviction of first item await cached_store.set("key3", large_data) # Cache should still have items but first one may be evicted info = cached_store.cache_info() assert info["current_size"] <= 100 async def test_value_exceeds_max_size(self) -> None: """Test behavior when a single value exceeds max_size.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( source_store, cache_store=cache_store, max_size=50, # Small cache size ) # Try to cache data larger than max_size large_data = CPUBuffer.from_bytes(b"x" * 100) # 100 bytes > 50 byte limit await cached_store.set("large_key", large_data) # Data should be in source but not cached assert await source_store.exists("large_key") info = cached_store.cache_info() assert info["cached_keys"] == 0 assert info["current_size"] == 0 async def test_get_nonexistent_key(self) -> None: """Test getting a key that doesn't exist in either store.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store) # Try to get nonexistent key result = await cached_store.get("nonexistent", default_buffer_prototype()) assert result is None # Should not create any cache entries info = cached_store.cache_info() assert info["cached_keys"] == 0 async def test_delete_both_stores(self) -> None: """Test that delete removes from both source and cache stores.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store) # Add data test_data = CPUBuffer.from_bytes(b"test data") await cached_store.set("test_key", test_data) # Verify it's in both stores assert await source_store.exists("test_key") assert await cache_store.exists("test_key") # Delete await cached_store.delete("test_key") # Verify it's removed from both assert not await source_store.exists("test_key") assert not await cache_store.exists("test_key") # Verify tracking is updated info = cached_store.cache_info() assert info["cached_keys"] == 0 async def test_invalid_max_age_seconds(self) -> None: """Test that invalid max_age_seconds values raise ValueError.""" source_store = MemoryStore() cache_store = MemoryStore() with pytest.raises(ValueError, match="max_age_seconds string value must be 'infinity'"): CacheStore(source_store, cache_store=cache_store, max_age_seconds="invalid") async def test_unlimited_cache_size(self) -> None: """Test behavior when max_size is None (unlimited).""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( source_store, cache_store=cache_store, max_size=None, # Unlimited cache ) # Add large amounts of data for i in range(10): large_data = CPUBuffer.from_bytes(b"x" * 1000) # 1KB each await cached_store.set(f"large_key_{i}", large_data) # All should be cached since there's no size limit info = cached_store.cache_info() assert info["cached_keys"] == 10 assert info["current_size"] == 10000 # 10 * 1000 bytes async def test_evict_key_exception_handling(self) -> None: """Test exception handling in _evict_key method.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, max_size=100) # Add some data test_data = CPUBuffer.from_bytes(b"test data") await cached_store.set("test_key", test_data) # Manually corrupt the tracking to trigger exception # Remove from one structure but not others to create inconsistency del cached_store._cache_order["test_key"] # Try to evict - should handle the KeyError gracefully await cached_store._evict_key("test_key") # Should still work and not crash info = cached_store.cache_info() assert isinstance(info, dict) async def test_get_no_cache_delete_tracking(self) -> None: """Test _get_no_cache when key doesn't exist and needs cleanup.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store) # First, add key to cache tracking but not to source test_data = CPUBuffer.from_bytes(b"test data") await cache_store.set("phantom_key", test_data) await cached_store._cache_value("phantom_key", test_data) # Verify it's in tracking assert "phantom_key" in cached_store._cache_order assert "phantom_key" in cached_store.key_insert_times # Now try to get it - since it's not in source, should clean up tracking result = await cached_store._get_no_cache("phantom_key", default_buffer_prototype()) assert result is None # Should have cleaned up tracking assert "phantom_key" not in cached_store._cache_order assert "phantom_key" not in cached_store.key_insert_times async def test_accommodate_value_no_max_size(self) -> None: """Test _accommodate_value early return when max_size is None.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( source_store, cache_store=cache_store, max_size=None, # No size limit ) # This should return early without doing anything await cached_store._accommodate_value(1000000) # Large value # Should not affect anything since max_size is None info = cached_store.cache_info() assert info["current_size"] == 0 async def test_concurrent_set_operations(self) -> None: """Test that concurrent set operations don't corrupt cache size tracking.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, max_size=1000) # Create 10 concurrent set operations async def set_data(key: str) -> None: data = CPUBuffer.from_bytes(b"x" * 50) await cached_store.set(key, data) # Run concurrently await asyncio.gather(*[set_data(f"key_{i}") for i in range(10)]) info = cached_store.cache_info() # Expected: 10 keys * 50 bytes = 500 bytes assert info["cached_keys"] == 10 assert info["current_size"] == 500 # WOULD FAIL due to race condition async def test_concurrent_eviction_race(self) -> None: """Test concurrent evictions don't corrupt size tracking.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, max_size=200) # Fill cache to near capacity data = CPUBuffer.from_bytes(b"x" * 80) await cached_store.set("key1", data) await cached_store.set("key2", data) # Now trigger two concurrent sets that both need to evict async def set_large(key: str) -> None: large_data = CPUBuffer.from_bytes(b"y" * 100) await cached_store.set(key, large_data) await asyncio.gather(set_large("key3"), set_large("key4")) info = cached_store.cache_info() # Size should be consistent with tracked keys assert info["current_size"] <= 200 # Might pass # But verify actual cache store size matches tracking total_size = sum(cached_store._key_sizes.get(k, 0) for k in cached_store._cache_order) assert total_size == info["current_size"] # WOULD FAIL async def test_concurrent_get_and_evict(self) -> None: """Test get operations during eviction don't cause corruption.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, max_size=100) # Setup data = CPUBuffer.from_bytes(b"x" * 40) await cached_store.set("key1", data) await cached_store.set("key2", data) # Concurrent: read key1 while adding key3 (triggers eviction) async def read_key() -> None: for _ in range(100): await cached_store.get("key1", default_buffer_prototype()) async def write_key() -> None: for i in range(10): new_data = CPUBuffer.from_bytes(b"y" * 40) await cached_store.set(f"new_{i}", new_data) await asyncio.gather(read_key(), write_key()) # Verify consistency info = cached_store.cache_info() assert info["current_size"] <= 100 assert len(cached_store._cache_order) == len(cached_store._key_sizes) async def test_eviction_actually_deletes_from_cache_store(self) -> None: """Test that eviction removes keys from cache_store, not just tracking.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, max_size=100) # Add data that will be evicted data1 = CPUBuffer.from_bytes(b"x" * 60) data2 = CPUBuffer.from_bytes(b"y" * 60) await cached_store.set("key1", data1) # Verify key1 is in cache_store assert await cache_store.exists("key1") # Add key2, which should evict key1 await cached_store.set("key2", data2) # Check tracking - key1 should be removed assert "key1" not in cached_store._cache_order assert "key1" not in cached_store._key_sizes # CRITICAL: key1 should also be removed from cache_store assert not await cache_store.exists("key1"), ( "Evicted key still exists in cache_store! _evict_key doesn't actually delete." ) # But key1 should still exist in source store assert await source_store.exists("key1") async def test_eviction_no_orphaned_keys(self) -> None: """Test that eviction doesn't leave orphaned keys in cache_store.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, max_size=150) # Add multiple keys that will cause evictions for i in range(10): data = CPUBuffer.from_bytes(b"x" * 60) await cached_store.set(f"key_{i}", data) # Check tracking info = cached_store.cache_info() tracked_keys = info["cached_keys"] # Count actual keys in cache_store actual_keys = 0 async for _ in cache_store.list(): actual_keys += 1 # Cache store should have same number of keys as tracking assert actual_keys == tracked_keys, ( f"Cache store has {actual_keys} keys but tracking shows {tracked_keys}. " f"Eviction doesn't delete from cache_store!" ) async def test_size_accounting_with_key_updates(self) -> None: """Test that updating the same key replaces size instead of accumulating.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, max_size=500) # Set initial value data1 = CPUBuffer.from_bytes(b"x" * 100) await cached_store.set("same_key", data1) info1 = cached_store.cache_info() assert info1["current_size"] == 100 # Update with different size data2 = CPUBuffer.from_bytes(b"y" * 200) await cached_store.set("same_key", data2) info2 = cached_store.cache_info() # Should be 200, not 300 (update replaces, doesn't accumulate) assert info2["current_size"] == 200, ( f"Expected size 200 but got {info2['current_size']}. " "Updating same key should replace, not accumulate." ) async def test_all_tracked_keys_exist_in_cache_store(self) -> None: """Test invariant: all keys in tracking should exist in cache_store.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(source_store, cache_store=cache_store, max_size=500) # Add some data for i in range(5): data = CPUBuffer.from_bytes(b"x" * 50) await cached_store.set(f"key_{i}", data) # Every key in tracking should exist in cache_store for key in cached_store._cache_order: assert await cache_store.exists(key), ( f"Key '{key}' is tracked but doesn't exist in cache_store" ) # Every key in _key_sizes should exist in cache_store for key in cached_store._key_sizes: assert await cache_store.exists(key), ( f"Key '{key}' has size tracked but doesn't exist in cache_store" ) # Additional coverage tests for 100% coverage async def test_cache_store_requires_delete_support(self) -> None: """Test that CacheStore validates cache_store supports deletes.""" from unittest.mock import MagicMock # Create a mock store that doesn't support deletes source_store = MemoryStore() cache_store = MagicMock() cache_store.supports_deletes = False with pytest.raises(ValueError, match="does not support deletes"): CacheStore(store=source_store, cache_store=cache_store) async def test_evict_key_exception_handling_with_real_error( self, monkeypatch: pytest.MonkeyPatch ) -> None: """Test _evict_key exception handling when deletion fails.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(store=source_store, cache_store=cache_store, max_size=100) # Set up a key in tracking buffer = CPUBuffer.from_bytes(b"test data") await cached_store.set("test_key", buffer) # Mock the cache delete to raise an exception async def failing_delete(key: str) -> None: raise RuntimeError("Simulated cache deletion failure") monkeypatch.setattr(cache_store, "delete", failing_delete) # Attempt to evict should raise the exception with pytest.raises(RuntimeError, match="Simulated cache deletion failure"): async with cached_store._lock: await cached_store._evict_key("test_key") async def test_cache_stats_method(self) -> None: """Test cache_stats method returns correct statistics.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(store=source_store, cache_store=cache_store, max_size=1000) # Initially, stats should be zero stats = cached_store.cache_stats() assert stats["hits"] == 0 assert stats["misses"] == 0 assert stats["evictions"] == 0 assert stats["total_requests"] == 0 assert stats["hit_rate"] == 0.0 # Perform some operations buffer = CPUBuffer.from_bytes(b"x" * 100) # Write to source store directly to avoid affecting stats await source_store.set("key1", buffer) # First get is a miss (not in cache yet) result1 = await cached_store.get("key1", default_buffer_prototype()) assert result1 is not None # Second get is a hit (now in cache) result2 = await cached_store.get("key1", default_buffer_prototype()) assert result2 is not None stats = cached_store.cache_stats() assert stats["hits"] == 1 assert stats["misses"] == 1 assert stats["total_requests"] == 2 assert stats["hit_rate"] == 0.5 async def test_cache_stats_with_evictions(self) -> None: """Test cache_stats tracks evictions correctly.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( store=source_store, cache_store=cache_store, max_size=150, # Small size to force eviction ) # Add items that will trigger eviction buffer1 = CPUBuffer.from_bytes(b"x" * 100) buffer2 = CPUBuffer.from_bytes(b"y" * 100) await cached_store.set("key1", buffer1) await cached_store.set("key2", buffer2) # Should evict key1 stats = cached_store.cache_stats() assert stats["evictions"] == 1 def test_repr_method(self) -> None: """Test __repr__ returns useful string representation.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore( store=source_store, cache_store=cache_store, max_age_seconds=60, max_size=1024 ) repr_str = repr(cached_store) # Check that repr contains key information assert "CacheStore" in repr_str assert "max_age_seconds=60" in repr_str assert "max_size=1024" in repr_str assert "current_size=0" in repr_str assert "cached_keys=0" in repr_str async def test_cache_stats_zero_division_protection(self) -> None: """Test cache_stats handles zero requests correctly.""" source_store = MemoryStore() cache_store = MemoryStore() cached_store = CacheStore(store=source_store, cache_store=cache_store) # With no requests, hit_rate should be 0.0 (not NaN or error) stats = cached_store.cache_stats() assert stats["hit_rate"] == 0.0 assert stats["total_requests"] == 0 zarr-python-3.1.5/tests/test_group.py000066400000000000000000002476721511007055700177470ustar00rootroot00000000000000from __future__ import annotations import contextlib import inspect import json import operator import pickle import re import time import warnings from typing import TYPE_CHECKING, Any, Literal, get_args import numpy as np import pytest from numcodecs import Blosc import zarr import zarr.api.asynchronous import zarr.api.synchronous import zarr.storage from zarr import Array, AsyncArray, AsyncGroup, Group from zarr.abc.store import Store from zarr.core import sync_group from zarr.core._info import GroupInfo from zarr.core.buffer import default_buffer_prototype from zarr.core.config import config as zarr_config from zarr.core.dtype.common import unpack_dtype_json from zarr.core.dtype.npy.int import UInt8 from zarr.core.group import ( ConsolidatedMetadata, GroupMetadata, ImplicitGroupMarker, _build_metadata_v3, _get_roots, _parse_hierarchy_dict, create_hierarchy, create_nodes, create_rooted_hierarchy, get_node, ) from zarr.core.metadata.v3 import ArrayV3Metadata from zarr.core.sync import _collect_aiterator, sync from zarr.errors import ( ContainsArrayError, ContainsGroupError, MetadataValidationError, ZarrDeprecationWarning, ZarrUserWarning, ) from zarr.storage import LocalStore, MemoryStore, StorePath, ZipStore from zarr.storage._common import make_store_path from zarr.storage._utils import _join_paths, normalize_path from zarr.testing.store import LatencyStore from .conftest import meta_from_array, parse_store if TYPE_CHECKING: from collections.abc import Callable from _pytest.compat import LEGACY_PATH from zarr.core.buffer.core import Buffer from zarr.core.common import JSON, ZarrFormat @pytest.fixture(params=["local", "memory", "zip"]) async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: result = await parse_store(request.param, str(tmpdir)) if not isinstance(result, Store): raise TypeError("Wrong store class returned by test fixture! got " + result + " instead") return result @pytest.fixture(params=[True, False]) def overwrite(request: pytest.FixtureRequest) -> bool: result = request.param if not isinstance(result, bool): raise TypeError("Wrong type returned by test fixture.") return result def test_group_init(store: Store, zarr_format: ZarrFormat) -> None: """ Test that initializing a group from an asyncgroup works. """ agroup = sync(AsyncGroup.from_store(store=store, zarr_format=zarr_format)) group = Group(agroup) assert group._async_group == agroup async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) -> None: # prepare a root node, with some data set await zarr.api.asynchronous.open_group( store=store, path="a", zarr_format=zarr_format, attributes={"key": "value"} ) objs = {x async for x in store.list()} if zarr_format == 2: assert objs == {".zgroup", ".zattrs", "a/.zgroup", "a/.zattrs"} else: assert objs == {"zarr.json", "a/zarr.json"} # test that root group node was created root = await zarr.api.asynchronous.open_group( store=store, ) agroup = await root.getitem("a") assert agroup.attrs == {"key": "value"} # create a child node with a couple intermediates await zarr.api.asynchronous.open_group(store=store, path="a/b/c/d", zarr_format=zarr_format) parts = ["a", "a/b", "a/b/c"] if zarr_format == 2: files = [".zattrs", ".zgroup"] else: files = ["zarr.json"] expected = [f"{part}/{file}" for file in files for part in parts] if zarr_format == 2: expected.extend([".zgroup", ".zattrs", "a/b/c/d/.zgroup", "a/b/c/d/.zattrs"]) else: expected.extend(["zarr.json", "a/b/c/d/zarr.json"]) expected = sorted(expected) result = sorted([x async for x in store.list_prefix("")]) assert result == expected paths = ["a", "a/b", "a/b/c"] for path in paths: g = await zarr.api.asynchronous.open_group(store=store, path=path) assert isinstance(g, AsyncGroup) if path == "a": # ensure we didn't overwrite the root attributes assert g.attrs == {"key": "value"} else: assert g.attrs == {} @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("root_name", ["", "/", "a", "/a"]) @pytest.mark.parametrize("branch_name", ["foo", "/foo", "foo/bar", "/foo/bar"]) def test_group_name_properties( store: Store, zarr_format: ZarrFormat, root_name: str, branch_name: str ) -> None: """ Test that the path, name, and basename attributes of a group and its subgroups are consistent """ root = Group.from_store(store=StorePath(store=store, path=root_name), zarr_format=zarr_format) assert root.path == normalize_path(root_name) assert root.name == "/" + root.path assert root.basename == root.path branch = root.create_group(branch_name) if root.path == "": assert branch.path == normalize_path(branch_name) else: assert branch.path == "/".join([root.path, normalize_path(branch_name)]) assert branch.name == "/" + branch.path assert branch.basename == branch_name.split("/")[-1] @pytest.mark.parametrize("consolidated_metadata", [True, False]) def test_group_members(store: Store, zarr_format: ZarrFormat, consolidated_metadata: bool) -> None: """ Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. """ # group/ # subgroup/ # subsubgroup/ # subsubsubgroup # subarray path = "group" group = Group.from_store( store=store, zarr_format=zarr_format, ) members_expected: dict[str, Array | Group] = {} members_expected["subgroup"] = group.create_group("subgroup") # make a sub-sub-subgroup, to ensure that the children calculation doesn't go # too deep in the hierarchy subsubgroup = members_expected["subgroup"].create_group("subsubgroup") subsubsubgroup = subsubgroup.create_group("subsubsubgroup") members_expected["subarray"] = group.create_array( "subarray", shape=(100,), dtype="uint8", chunks=(10,), overwrite=True ) # add an extra object to the domain of the group. # the list of children should ignore this object. sync( store.set( f"{path}/extra_object-1", default_buffer_prototype().buffer.from_bytes(b"000000"), ) ) # add an extra object under a directory-like prefix in the domain of the group. # this creates a directory with a random key in it # this should not show up as a member sync( store.set( f"{path}/extra_directory/extra_object-2", default_buffer_prototype().buffer.from_bytes(b"000000"), ) ) # this warning shows up when extra objects show up in the hierarchy warn_context = pytest.warns( ZarrUserWarning, match=r"(?:Object at .* is not recognized as a component of a Zarr hierarchy.)|(?:Consolidated metadata is currently not part in the Zarr format 3 specification.)", ) if consolidated_metadata: if isinstance(store, ZipStore): with warn_context: with pytest.warns(UserWarning, match="Duplicate name: "): zarr.consolidate_metadata(store=store, zarr_format=zarr_format) else: with warn_context: zarr.consolidate_metadata(store=store, zarr_format=zarr_format) # now that we've consolidated the store, we shouldn't get the warnings from the unrecognized objects anymore # we use a nullcontext to handle these cases warn_context = contextlib.nullcontext() group = zarr.open_consolidated(store=store, zarr_format=zarr_format) with warn_context: members_observed = group.members() # members are not guaranteed to be ordered, so sort before comparing assert sorted(dict(members_observed)) == sorted(members_expected) # partial with warn_context: members_observed = group.members(max_depth=1) members_expected["subgroup/subsubgroup"] = subsubgroup # members are not guaranteed to be ordered, so sort before comparing assert sorted(dict(members_observed)) == sorted(members_expected) # total with warn_context: members_observed = group.members(max_depth=None) members_expected["subgroup/subsubgroup/subsubsubgroup"] = subsubsubgroup # members are not guaranteed to be ordered, so sort before comparing assert sorted(dict(members_observed)) == sorted(members_expected) with pytest.raises(ValueError, match="max_depth"): members_observed = group.members(max_depth=-1) def test_group(store: Store, zarr_format: ZarrFormat) -> None: """ Test basic Group routines. """ store_path = StorePath(store) agroup = AsyncGroup(metadata=GroupMetadata(zarr_format=zarr_format), store_path=store_path) group = Group(agroup) assert agroup.metadata is group.metadata assert agroup.store_path == group.store_path == store_path # create two groups foo = group.create_group("foo") bar = foo.create_group("bar", attributes={"baz": "qux"}) # create an array from the "bar" group data = np.arange(0, 4 * 4, dtype="uint16").reshape((4, 4)) arr = bar.create_array("baz", shape=data.shape, dtype=data.dtype, chunks=(2, 2), overwrite=True) arr[:] = data # check the array assert arr == bar["baz"] assert arr.shape == data.shape assert arr.dtype == data.dtype # TODO: update this once the array api settles down assert arr.chunks == (2, 2) bar2 = foo["bar"] assert dict(bar2.attrs) == {"baz": "qux"} # update a group's attributes if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): bar2.attrs.update({"name": "bar"}) else: bar2.attrs.update({"name": "bar"}) # bar.attrs was modified in-place assert dict(bar2.attrs) == {"baz": "qux", "name": "bar"} # and the attrs were modified in the store bar3 = foo["bar"] assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} def test_group_create(store: Store, overwrite: bool, zarr_format: ZarrFormat) -> None: """ Test that `Group.from_store` works as expected. """ attributes = {"foo": 100} group = Group.from_store( store, attributes=attributes, zarr_format=zarr_format, overwrite=overwrite ) assert group.attrs == attributes if not overwrite: with pytest.raises(ContainsGroupError): _ = Group.from_store(store, overwrite=overwrite, zarr_format=zarr_format) def test_group_open(store: Store, zarr_format: ZarrFormat, overwrite: bool) -> None: """ Test the `Group.open` method. """ spath = StorePath(store) # attempt to open a group that does not exist with pytest.raises(FileNotFoundError): Group.open(store) # create the group attrs = {"path": "foo"} group_created = Group.from_store( store, attributes=attrs, zarr_format=zarr_format, overwrite=overwrite ) assert group_created.attrs == attrs assert group_created.metadata.zarr_format == zarr_format assert group_created.store_path == spath # attempt to create a new group in place, to test overwrite new_attrs = {"path": "bar"} if not overwrite: with pytest.raises(ContainsGroupError): Group.from_store(store, attributes=attrs, zarr_format=zarr_format, overwrite=overwrite) else: if not store.supports_deletes: pytest.skip( "Store does not support deletes but `overwrite` is True, requiring deletes to override a group" ) group_created_again = Group.from_store( store, attributes=new_attrs, zarr_format=zarr_format, overwrite=overwrite ) assert group_created_again.attrs == new_attrs assert group_created_again.metadata.zarr_format == zarr_format assert group_created_again.store_path == spath @pytest.mark.parametrize("consolidated", [True, False]) def test_group_getitem(store: Store, zarr_format: ZarrFormat, consolidated: bool) -> None: """ Test the `Group.__getitem__` method. """ group = Group.from_store(store, zarr_format=zarr_format) subgroup = group.create_group(name="subgroup") subarray = group.create_array(name="subarray", shape=(10,), chunks=(10,), dtype="uint8") subsubarray = subgroup.create_array(name="subarray", shape=(10,), chunks=(10,), dtype="uint8") if consolidated: if zarr_format == 3: with pytest.warns( # noqa: PT031 ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): group = zarr.api.synchronous.consolidate_metadata( store=store, zarr_format=zarr_format ) else: group = zarr.api.synchronous.consolidate_metadata( store=store, zarr_format=zarr_format ) else: if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): group = zarr.api.synchronous.consolidate_metadata( store=store, zarr_format=zarr_format ) else: group = zarr.api.synchronous.consolidate_metadata( store=store, zarr_format=zarr_format ) # we're going to assume that `group.metadata` is correct, and reuse that to focus # on indexing in this test. Other tests verify the correctness of group.metadata object.__setattr__( subgroup.metadata, "consolidated_metadata", ConsolidatedMetadata( metadata={"subarray": group.metadata.consolidated_metadata.metadata["subarray"]} ), ) assert group["subgroup"] == subgroup assert group["subarray"] == subarray assert group["subgroup"]["subarray"] == subsubarray assert group["subgroup/subarray"] == subsubarray with pytest.raises(KeyError): group["nope"] with pytest.raises(KeyError, match="subarray/subsubarray"): group["subarray/subsubarray"] # Now test the mixed case if consolidated: object.__setattr__( group.metadata.consolidated_metadata.metadata["subgroup"], "consolidated_metadata", None, ) # test the implementation directly with pytest.raises(KeyError): group._async_group._getitem_consolidated( group.store_path, "subgroup/subarray", prefix="/" ) with pytest.raises(KeyError): # We've chosen to trust the consolidated metadata, which doesn't # contain this array group["subgroup/subarray"] with pytest.raises(KeyError, match="subarray/subsubarray"): group["subarray/subsubarray"] def test_group_get_with_default(store: Store, zarr_format: ZarrFormat) -> None: group = Group.from_store(store, zarr_format=zarr_format) # default behavior result = group.get("subgroup") assert result is None # custom default result = group.get("subgroup", 8) assert result == 8 # now with a group subgroup = group.require_group("subgroup") if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): subgroup.attrs["foo"] = "bar" else: subgroup.attrs["foo"] = "bar" result = group.get("subgroup", 8) assert result.attrs["foo"] == "bar" @pytest.mark.parametrize("consolidated", [True, False]) def test_group_delitem(store: Store, zarr_format: ZarrFormat, consolidated: bool) -> None: """ Test the `Group.__delitem__` method. """ if not store.supports_deletes: pytest.skip("store does not support deletes") group = Group.from_store(store, zarr_format=zarr_format) subgroup = group.create_group(name="subgroup") subarray = group.create_array(name="subarray", shape=(10,), chunks=(10,), dtype="uint8") if consolidated: if zarr_format == 3: with pytest.warns( # noqa: PT031 ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): group = zarr.api.synchronous.consolidate_metadata( store=store, zarr_format=zarr_format ) else: group = zarr.api.synchronous.consolidate_metadata( store=store, zarr_format=zarr_format ) else: group = zarr.api.synchronous.consolidate_metadata(store=store, zarr_format=zarr_format) object.__setattr__( subgroup.metadata, "consolidated_metadata", ConsolidatedMetadata(metadata={}) ) assert group["subgroup"] == subgroup assert group["subarray"] == subarray del group["subgroup"] with pytest.raises(KeyError): group["subgroup"] del group["subarray"] with pytest.raises(KeyError): group["subarray"] def test_group_iter(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__iter__` method. """ group = Group.from_store(store, zarr_format=zarr_format) assert list(group) == [] def test_group_len(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__len__` method. """ group = Group.from_store(store, zarr_format=zarr_format) assert len(group) == 0 def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__setitem__` method. """ group = Group.from_store(store, zarr_format=zarr_format) arr = np.ones((2, 4)) group["key"] = arr assert list(group.array_keys()) == ["key"] assert group["key"].shape == (2, 4) np.testing.assert_array_equal(group["key"][:], arr) if store.supports_deletes: key = "key" else: # overwriting with another array requires deletes # for stores that don't support this, we just use a new key key = "key2" # overwrite with another array arr = np.zeros((3, 5)) group[key] = arr assert key in list(group.array_keys()) assert group[key].shape == (3, 5) np.testing.assert_array_equal(group[key], arr) def test_group_contains(store: Store, zarr_format: ZarrFormat) -> None: """ Test the `Group.__contains__` method """ group = Group.from_store(store, zarr_format=zarr_format) assert "foo" not in group _ = group.create_group(name="foo") assert "foo" in group @pytest.mark.parametrize("consolidate", [True, False]) def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidate: bool): group = Group.from_store(store, zarr_format=zarr_format) expected_group_keys = ["g0", "g1"] expected_group_values = [group.create_group(name=name) for name in expected_group_keys] expected_groups = list(zip(expected_group_keys, expected_group_values, strict=False)) fill_value = 3 dtype = UInt8() expected_group_values[0].create_group("subgroup") expected_group_values[0].create_array( "subarray", shape=(1,), dtype=dtype, fill_value=fill_value ) expected_array_keys = ["a0", "a1"] expected_array_values = [ group.create_array(name=name, shape=(1,), dtype=dtype, fill_value=fill_value) for name in expected_array_keys ] expected_arrays = list(zip(expected_array_keys, expected_array_values, strict=False)) if consolidate: if zarr_format == 3: with pytest.warns( # noqa: PT031 ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): group = zarr.consolidate_metadata(store) else: group = zarr.consolidate_metadata(store) else: if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): group = zarr.consolidate_metadata(store) else: group = zarr.consolidate_metadata(store) if zarr_format == 2: metadata = { "subarray": { "attributes": {}, "dtype": unpack_dtype_json(dtype.to_json(zarr_format=zarr_format)), "fill_value": fill_value, "shape": (1,), "chunks": (1,), "order": "C", "filters": None, "compressor": Blosc(), "zarr_format": zarr_format, }, "subgroup": { "attributes": {}, "consolidated_metadata": { "metadata": {}, "kind": "inline", "must_understand": False, }, "node_type": "group", "zarr_format": zarr_format, }, } else: metadata = { "subarray": { "attributes": {}, "chunk_grid": { "configuration": {"chunk_shape": (1,)}, "name": "regular", }, "chunk_key_encoding": { "configuration": {"separator": "/"}, "name": "default", }, "codecs": ( {"configuration": {"endian": "little"}, "name": "bytes"}, {"configuration": {}, "name": "zstd"}, ), "data_type": unpack_dtype_json(dtype.to_json(zarr_format=zarr_format)), "fill_value": fill_value, "node_type": "array", "shape": (1,), "zarr_format": zarr_format, }, "subgroup": { "attributes": {}, "consolidated_metadata": { "metadata": {}, "kind": "inline", "must_understand": False, }, "node_type": "group", "zarr_format": zarr_format, }, } object.__setattr__( expected_group_values[0].metadata, "consolidated_metadata", ConsolidatedMetadata.from_dict( { "kind": "inline", "metadata": metadata, "must_understand": False, } ), ) object.__setattr__( expected_group_values[1].metadata, "consolidated_metadata", ConsolidatedMetadata(metadata={}), ) result = sorted(group.groups(), key=operator.itemgetter(0)) assert result == expected_groups assert sorted(group.groups(), key=operator.itemgetter(0)) == expected_groups assert sorted(group.group_keys()) == expected_group_keys assert sorted(group.group_values(), key=lambda x: x.name) == expected_group_values assert sorted(group.arrays(), key=operator.itemgetter(0)) == expected_arrays assert sorted(group.array_keys()) == expected_array_keys assert sorted(group.array_values(), key=lambda x: x.name) == expected_array_values def test_group_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group.update_attributes` """ attrs = {"foo": 100} group = Group.from_store(store, zarr_format=zarr_format, attributes=attrs) assert group.attrs == attrs new_attrs = {"bar": 100} if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): new_group = group.update_attributes(new_attrs) else: new_group = group.update_attributes(new_attrs) updated_attrs = attrs.copy() updated_attrs.update(new_attrs) assert new_group.attrs == updated_attrs async def test_group_update_attributes_async(store: Store, zarr_format: ZarrFormat) -> None: """ Test the behavior of `Group.update_attributes_async` """ attrs = {"foo": 100} group = Group.from_store(store, zarr_format=zarr_format, attributes=attrs) assert group.attrs == attrs new_attrs = {"bar": 100} if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name: "): new_group = await group.update_attributes_async(new_attrs) else: new_group = await group.update_attributes_async(new_attrs) assert new_group.attrs == new_attrs @pytest.mark.parametrize("method", ["create_array", "array"]) @pytest.mark.parametrize("name", ["a", "/a"]) def test_group_create_array( store: Store, zarr_format: ZarrFormat, overwrite: bool, method: Literal["create_array", "array"], name: str, ) -> None: """ Test `Group.from_store` """ group = Group.from_store(store, zarr_format=zarr_format) shape = (10, 10) dtype = "uint8" data = np.arange(np.prod(shape)).reshape(shape).astype(dtype) if method == "create_array": array = group.create_array(name=name, shape=shape, dtype=dtype) array[:] = data elif method == "array": with pytest.warns(ZarrDeprecationWarning, match=r"Group\.create_array instead\."): with pytest.warns( ZarrUserWarning, match="The `compressor` argument is deprecated. Use `compressors` instead.", ): array = group.array(name=name, data=data, shape=shape, dtype=dtype) else: raise AssertionError if not overwrite: if method == "create_array": with pytest.raises(ContainsArrayError): # noqa: PT012 a = group.create_array(name=name, shape=shape, dtype=dtype) a[:] = data elif method == "array": with pytest.raises(ContainsArrayError): # noqa: PT012 with pytest.warns(ZarrDeprecationWarning, match=r"Group\.create_array instead\."): with pytest.warns( ZarrUserWarning, match="The `compressor` argument is deprecated. Use `compressors` instead.", ): a = group.array(name=name, shape=shape, dtype=dtype) a[:] = data assert array.path == normalize_path(name) assert array.name == "/" + array.path assert array.shape == shape assert array.dtype == np.dtype(dtype) assert np.array_equal(array[:], data) @pytest.mark.parametrize("method", ["create_array", "create_group"]) def test_create_with_parent_array(store: Store, zarr_format: ZarrFormat, method: str): """Test that groups/arrays cannot be created under a parent array.""" # create a group with a child array group = Group.from_store(store, zarr_format=zarr_format) group.create_array(name="arr_1", shape=(10, 10), dtype="uint8") error_msg = r"A parent of .* is an array - only groups may have child nodes." if method == "create_array": with pytest.raises(ValueError, match=error_msg): group.create_array("arr_1/group_1/group_2/arr_2", shape=(10, 10), dtype="uint8") else: with pytest.raises(ValueError, match=error_msg): group.create_group("arr_1/group_1/group_2/group_3") LikeMethodName = Literal["zeros_like", "ones_like", "empty_like", "full_like"] @pytest.mark.parametrize("method_name", get_args(LikeMethodName)) @pytest.mark.parametrize("out_shape", ["keep", (10, 10)]) @pytest.mark.parametrize("out_chunks", ["keep", (10, 10)]) @pytest.mark.parametrize("out_dtype", ["keep", "int8"]) def test_group_array_like_creation( zarr_format: ZarrFormat, method_name: LikeMethodName, out_shape: Literal["keep"] | tuple[int, ...], out_chunks: Literal["keep"] | tuple[int, ...], out_dtype: str, ) -> None: """ Test Group.{zeros_like, ones_like, empty_like, full_like}, ensuring that we can override the shape, chunks, and dtype of the array-like object provided to these functions with appropriate keyword arguments """ ref_arr = zarr.ones(store={}, shape=(11, 12), dtype="uint8", chunks=(11, 12)) group = Group.from_store({}, zarr_format=zarr_format) kwargs = {} if method_name == "full_like": expect_fill = 4 kwargs["fill_value"] = expect_fill meth = group.full_like elif method_name == "zeros_like": expect_fill = 0 meth = group.zeros_like elif method_name == "ones_like": expect_fill = 1 meth = group.ones_like elif method_name == "empty_like": expect_fill = ref_arr.fill_value meth = group.empty_like else: raise AssertionError if out_shape != "keep": kwargs["shape"] = out_shape expect_shape = out_shape else: expect_shape = ref_arr.shape if out_chunks != "keep": kwargs["chunks"] = out_chunks expect_chunks = out_chunks else: expect_chunks = ref_arr.chunks if out_dtype != "keep": kwargs["dtype"] = out_dtype expect_dtype = out_dtype else: expect_dtype = ref_arr.dtype new_arr = meth(name="foo", data=ref_arr, **kwargs) assert new_arr.shape == expect_shape assert new_arr.chunks == expect_chunks assert new_arr.dtype == expect_dtype assert np.all(new_arr[:] == expect_fill) def test_group_array_creation( store: Store, zarr_format: ZarrFormat, ): group = Group.from_store(store, zarr_format=zarr_format) shape = (10, 10) empty_array = group.empty(name="empty", shape=shape) assert isinstance(empty_array, Array) assert empty_array.fill_value == 0 assert empty_array.shape == shape assert empty_array.store_path.store == store assert empty_array.store_path.path == "empty" empty_like_array = group.empty_like(name="empty_like", data=empty_array) assert isinstance(empty_like_array, Array) assert empty_like_array.fill_value == 0 assert empty_like_array.shape == shape assert empty_like_array.store_path.store == store empty_array_bool = group.empty(name="empty_bool", shape=shape, dtype=np.dtype("bool")) assert isinstance(empty_array_bool, Array) assert not empty_array_bool.fill_value assert empty_array_bool.shape == shape assert empty_array_bool.store_path.store == store empty_like_array_bool = group.empty_like(name="empty_like_bool", data=empty_array_bool) assert isinstance(empty_like_array_bool, Array) assert not empty_like_array_bool.fill_value assert empty_like_array_bool.shape == shape assert empty_like_array_bool.store_path.store == store zeros_array = group.zeros(name="zeros", shape=shape) assert isinstance(zeros_array, Array) assert zeros_array.fill_value == 0 assert zeros_array.shape == shape assert zeros_array.store_path.store == store zeros_like_array = group.zeros_like(name="zeros_like", data=zeros_array) assert isinstance(zeros_like_array, Array) assert zeros_like_array.fill_value == 0 assert zeros_like_array.shape == shape assert zeros_like_array.store_path.store == store ones_array = group.ones(name="ones", shape=shape) assert isinstance(ones_array, Array) assert ones_array.fill_value == 1 assert ones_array.shape == shape assert ones_array.store_path.store == store ones_like_array = group.ones_like(name="ones_like", data=ones_array) assert isinstance(ones_like_array, Array) assert ones_like_array.fill_value == 1 assert ones_like_array.shape == shape assert ones_like_array.store_path.store == store full_array = group.full(name="full", shape=shape, fill_value=42) assert isinstance(full_array, Array) assert full_array.fill_value == 42 assert full_array.shape == shape assert full_array.store_path.store == store full_like_array = group.full_like(name="full_like", data=full_array, fill_value=43) assert isinstance(full_like_array, Array) assert full_like_array.fill_value == 43 assert full_like_array.shape == shape assert full_like_array.store_path.store == store @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("overwrite", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_group_creation_existing_node( store: Store, zarr_format: ZarrFormat, overwrite: bool, extant_node: Literal["array", "group"], ) -> None: """ Check that an existing array or group is handled as expected during group creation. """ spath = StorePath(store) group = Group.from_store(spath, zarr_format=zarr_format) expected_exception: type[ContainsArrayError | ContainsGroupError] attributes: dict[str, JSON] = {"old": True} if extant_node == "array": expected_exception = ContainsArrayError _ = group.create_array("extant", shape=(10,), dtype="uint8", attributes=attributes) elif extant_node == "group": expected_exception = ContainsGroupError _ = group.create_group("extant", attributes=attributes) else: raise AssertionError new_attributes = {"new": True} if overwrite: if not store.supports_deletes: pytest.skip("store does not support deletes but overwrite is True") node_new = Group.from_store( spath / "extant", attributes=new_attributes, zarr_format=zarr_format, overwrite=overwrite, ) assert node_new.attrs == new_attributes else: with pytest.raises(expected_exception): node_new = Group.from_store( spath / "extant", attributes=new_attributes, zarr_format=zarr_format, overwrite=overwrite, ) async def test_asyncgroup_create( store: Store, overwrite: bool, zarr_format: ZarrFormat, ) -> None: """ Test that `AsyncGroup.from_store` works as expected. """ spath = StorePath(store=store) attributes = {"foo": 100} agroup = await AsyncGroup.from_store( store, attributes=attributes, overwrite=overwrite, zarr_format=zarr_format, ) assert agroup.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) assert agroup.store_path == await make_store_path(store) if not overwrite: with pytest.raises(ContainsGroupError): agroup = await AsyncGroup.from_store( spath, attributes=attributes, overwrite=overwrite, zarr_format=zarr_format, ) # create an array at our target path collision_name = "foo" _ = await zarr.api.asynchronous.create_array( spath / collision_name, shape=(10,), dtype="uint8", zarr_format=zarr_format ) with pytest.raises(ContainsArrayError): _ = await AsyncGroup.from_store( StorePath(store=store) / collision_name, attributes=attributes, overwrite=overwrite, zarr_format=zarr_format, ) async def test_asyncgroup_attrs(store: Store, zarr_format: ZarrFormat) -> None: attributes = {"foo": 100} agroup = await AsyncGroup.from_store(store, zarr_format=zarr_format, attributes=attributes) assert agroup.attrs == agroup.metadata.attributes == attributes async def test_asyncgroup_open( store: Store, zarr_format: ZarrFormat, ) -> None: """ Create an `AsyncGroup`, then ensure that we can open it using `AsyncGroup.open` """ attributes = {"foo": 100} group_w = await AsyncGroup.from_store( store=store, attributes=attributes, overwrite=False, zarr_format=zarr_format, ) group_r = await AsyncGroup.open(store=store, zarr_format=zarr_format) assert group_w.attrs == group_w.attrs == attributes assert group_w == group_r async def test_asyncgroup_open_wrong_format( store: Store, zarr_format: ZarrFormat, ) -> None: _ = await AsyncGroup.from_store(store=store, overwrite=False, zarr_format=zarr_format) zarr_format_wrong: ZarrFormat # try opening with the wrong zarr format if zarr_format == 3: zarr_format_wrong = 2 elif zarr_format == 2: zarr_format_wrong = 3 else: raise AssertionError with pytest.raises(FileNotFoundError): await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) # todo: replace the dict[str, Any] type with something a bit more specific # should this be async? @pytest.mark.parametrize( "data", [ {"zarr_format": 3, "node_type": "group", "attributes": {"foo": 100}}, {"zarr_format": 2, "attributes": {"foo": 100}}, ], ) def test_asyncgroup_from_dict(store: Store, data: dict[str, Any]) -> None: """ Test that we can create an AsyncGroup from a dict """ path = "test" store_path = StorePath(store=store, path=path) group = AsyncGroup.from_dict(store_path, data=data) assert group.metadata.zarr_format == data["zarr_format"] assert group.metadata.attributes == data["attributes"] # todo: replace this with a declarative API where we model a full hierarchy async def test_asyncgroup_getitem(store: Store, zarr_format: ZarrFormat) -> None: """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those members via the `AsyncGroup.getitem` method. """ agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) array_name = "sub_array" sub_array = await agroup.create_array(name=array_name, shape=(10,), dtype="uint8", chunks=(2,)) assert await agroup.getitem(array_name) == sub_array sub_group_path = "sub_group" sub_group = await agroup.create_group(sub_group_path, attributes={"foo": 100}) assert await agroup.getitem(sub_group_path) == sub_group # check that asking for a nonexistent key raises KeyError with pytest.raises(KeyError): await agroup.getitem("foo") async def test_asyncgroup_delitem(store: Store, zarr_format: ZarrFormat) -> None: if not store.supports_deletes: pytest.skip("store does not support deletes") agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) array_name = "sub_array" _ = await agroup.create_array( name=array_name, shape=(10,), dtype="uint8", chunks=(2,), attributes={"foo": 100}, ) await agroup.delitem(array_name) # todo: clean up the code duplication here if zarr_format == 2: assert not await agroup.store_path.store.exists(array_name + "/" + ".zarray") assert not await agroup.store_path.store.exists(array_name + "/" + ".zattrs") elif zarr_format == 3: assert not await agroup.store_path.store.exists(array_name + "/" + "zarr.json") else: raise AssertionError sub_group_path = "sub_group" _ = await agroup.create_group(sub_group_path, attributes={"foo": 100}) await agroup.delitem(sub_group_path) if zarr_format == 2: assert not await agroup.store_path.store.exists(array_name + "/" + ".zgroup") assert not await agroup.store_path.store.exists(array_name + "/" + ".zattrs") elif zarr_format == 3: assert not await agroup.store_path.store.exists(array_name + "/" + "zarr.json") else: raise AssertionError @pytest.mark.parametrize("name", ["a", "/a"]) async def test_asyncgroup_create_group( store: Store, name: str, zarr_format: ZarrFormat, ) -> None: agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) attributes = {"foo": 999} subgroup = await agroup.create_group(name=name, attributes=attributes) assert isinstance(subgroup, AsyncGroup) assert subgroup.path == normalize_path(name) assert subgroup.name == "/" + subgroup.path assert subgroup.attrs == attributes assert subgroup.store_path.path == subgroup.path assert subgroup.store_path.store == store assert subgroup.metadata.zarr_format == zarr_format async def test_asyncgroup_create_array( store: Store, zarr_format: ZarrFormat, overwrite: bool ) -> None: """ Test that the AsyncGroup.create_array method works correctly. We ensure that array properties specified in create_array are present on the resulting array. """ agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) if not overwrite: with pytest.raises(ContainsGroupError): agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) shape = (10,) dtype = "uint8" chunk_shape = (4,) attributes: dict[str, JSON] = {"foo": 100} sub_node_path = "sub_array" subnode = await agroup.create_array( name=sub_node_path, shape=shape, dtype=dtype, chunks=chunk_shape, attributes=attributes, ) assert isinstance(subnode, AsyncArray) assert subnode.attrs == attributes assert subnode.store_path.path == sub_node_path assert subnode.store_path.store == store assert subnode.shape == shape assert subnode.dtype == dtype # todo: fix the type annotation of array.metadata.chunk_grid so that we get some autocomplete # here. assert subnode.metadata.chunk_grid.chunk_shape == chunk_shape assert subnode.metadata.zarr_format == zarr_format async def test_asyncgroup_update_attributes(store: Store, zarr_format: ZarrFormat) -> None: """ Test that the AsyncGroup.update_attributes method works correctly. """ attributes_old = {"foo": 10} attributes_new = {"baz": "new"} agroup = await AsyncGroup.from_store( store=store, zarr_format=zarr_format, attributes=attributes_old ) if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name"): agroup_new_attributes = await agroup.update_attributes(attributes_new) else: agroup_new_attributes = await agroup.update_attributes(attributes_new) attributes_updated = attributes_old.copy() attributes_updated.update(attributes_new) assert agroup_new_attributes.attrs == attributes_updated @pytest.mark.parametrize("store", ["local"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_serializable_async_group(store: LocalStore, zarr_format: ZarrFormat) -> None: expected = await AsyncGroup.from_store( store=store, attributes={"foo": 999}, zarr_format=zarr_format ) p = pickle.dumps(expected) actual = pickle.loads(p) assert actual == expected @pytest.mark.parametrize("store", ["local"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [2, 3]) def test_serializable_sync_group(store: LocalStore, zarr_format: ZarrFormat) -> None: expected = Group.from_store(store=store, attributes={"foo": 999}, zarr_format=zarr_format) p = pickle.dumps(expected) actual = pickle.loads(p) assert actual == expected @pytest.mark.parametrize("consolidated_metadata", [True, False]) async def test_group_members_async(store: Store, consolidated_metadata: bool) -> None: group = await AsyncGroup.from_store( store=store, ) a0 = await group.create_array("a0", shape=(1,), dtype="uint8") g0 = await group.create_group("g0") a1 = await g0.create_array("a1", shape=(1,), dtype="uint8") g1 = await g0.create_group("g1") a2 = await g1.create_array("a2", shape=(1,), dtype="uint8") g2 = await g1.create_group("g2") # immediate children children = sorted([x async for x in group.members()], key=operator.itemgetter(0)) assert children == [ ("a0", a0), ("g0", g0), ] nmembers = await group.nmembers() assert nmembers == 2 # partial children = sorted([x async for x in group.members(max_depth=1)], key=operator.itemgetter(0)) expected = [ ("a0", a0), ("g0", g0), ("g0/a1", a1), ("g0/g1", g1), ] assert children == expected nmembers = await group.nmembers(max_depth=1) assert nmembers == 4 # all children all_children = sorted( [x async for x in group.members(max_depth=None)], key=operator.itemgetter(0) ) expected = [ ("a0", a0), ("g0", g0), ("g0/a1", a1), ("g0/g1", g1), ("g0/g1/a2", a2), ("g0/g1/g2", g2), ] assert all_children == expected if consolidated_metadata: with pytest.warns( # noqa: PT031 ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name"): await zarr.api.asynchronous.consolidate_metadata(store=store) else: await zarr.api.asynchronous.consolidate_metadata(store=store) group = await zarr.api.asynchronous.open_group(store=store) nmembers = await group.nmembers(max_depth=None) assert nmembers == 6 with pytest.raises(ValueError, match="max_depth"): [x async for x in group.members(max_depth=-1)] if consolidated_metadata: # test for mixed known and unknown metadata. # For now, we trust the consolidated metadata. object.__setattr__( group.metadata.consolidated_metadata.metadata["g0"].consolidated_metadata.metadata[ "g1" ], "consolidated_metadata", None, ) # test depth=0 nmembers = await group.nmembers(max_depth=0) assert nmembers == 2 # test depth=1 nmembers = await group.nmembers(max_depth=1) assert nmembers == 4 # test depth=None all_children = sorted( [x async for x in group.members(max_depth=None)], key=operator.itemgetter(0) ) assert len(all_children) == 4 nmembers = await group.nmembers(max_depth=None) assert nmembers == 4 # test depth<0 with pytest.raises(ValueError, match="max_depth"): await group.nmembers(max_depth=-1) async def test_require_group(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) # create foo group _ = await root.create_group("foo", attributes={"foo": 100}) # test that we can get the group using require_group foo_group = await root.require_group("foo") assert foo_group.attrs == {"foo": 100} # test that we can get the group using require_group and overwrite=True if store.supports_deletes: foo_group = await root.require_group("foo", overwrite=True) assert foo_group.attrs == {} _ = await foo_group.create_array( "bar", shape=(10,), dtype="uint8", chunks=(2,), attributes={"foo": 100} ) # test that overwriting a group w/ children fails # TODO: figure out why ensure_no_existing_node is not catching the foo.bar array # # with pytest.raises(ContainsArrayError): # await root.require_group("foo", overwrite=True) # test that requiring a group where an array is fails with pytest.raises(TypeError): await foo_group.require_group("bar") async def test_require_groups(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) # create foo group _ = await root.create_group("foo", attributes={"foo": 100}) # create bar group _ = await root.create_group("bar", attributes={"bar": 200}) foo_group, bar_group = await root.require_groups("foo", "bar") assert foo_group.attrs == {"foo": 100} assert bar_group.attrs == {"bar": 200} # get a mix of existing and new groups foo_group, spam_group = await root.require_groups("foo", "spam") assert foo_group.attrs == {"foo": 100} assert spam_group.attrs == {} # no names no_group = await root.require_groups() assert no_group == () def test_create_dataset_with_data(store: Store, zarr_format: ZarrFormat) -> None: """Check that deprecated create_dataset method allows input data. See https://github.com/zarr-developers/zarr-python/issues/2631. """ root = Group.from_store(store=store, zarr_format=zarr_format) arr = np.random.random((5, 5)) with pytest.warns(ZarrDeprecationWarning, match=r"Group\.create_array instead\."): data = root.create_dataset("random", data=arr, shape=arr.shape) np.testing.assert_array_equal(np.asarray(data), arr) async def test_create_dataset(store: Store, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) with pytest.warns(ZarrDeprecationWarning, match=r"Group\.create_array instead\."): foo = await root.create_dataset("foo", shape=(10,), dtype="uint8") assert foo.shape == (10,) with ( pytest.raises(ContainsArrayError), pytest.warns(ZarrDeprecationWarning, match=r"Group\.create_array instead\."), ): await root.create_dataset("foo", shape=(100,), dtype="int8") _ = await root.create_group("bar") with ( pytest.raises(ContainsGroupError), pytest.warns(ZarrDeprecationWarning, match=r"Group\.create_array instead\."), ): await root.create_dataset("bar", shape=(100,), dtype="int8") async def test_require_array(store: Store, zarr_format: ZarrFormat) -> None: root = await AsyncGroup.from_store(store=store, zarr_format=zarr_format) foo1 = await root.require_array("foo", shape=(10,), dtype="i8", attributes={"foo": 101}) assert foo1.attrs == {"foo": 101} foo2 = await root.require_array("foo", shape=(10,), dtype="i8") assert foo2.attrs == {"foo": 101} # exact = False _ = await root.require_array("foo", shape=10, dtype="f8") # errors w/ exact True with pytest.raises(TypeError, match="Incompatible dtype"): await root.require_array("foo", shape=(10,), dtype="f8", exact=True) with pytest.raises(TypeError, match="Incompatible shape"): await root.require_array("foo", shape=(100, 100), dtype="i8") with pytest.raises(TypeError, match="Incompatible dtype"): await root.require_array("foo", shape=(10,), dtype="f4") _ = await root.create_group("bar") with pytest.raises(TypeError, match="Incompatible object"): await root.require_array("bar", shape=(10,), dtype="int8") @pytest.mark.parametrize("consolidate", [True, False]) async def test_members_name(store: Store, consolidate: bool, zarr_format: ZarrFormat): group = Group.from_store(store=store, zarr_format=zarr_format) a = group.create_group(name="a") a.create_array("array", shape=(1,), dtype="uint8") b = a.create_group(name="b") b.create_array("array", shape=(1,), dtype="uint8") if consolidate: if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name"): # noqa: PT031 if zarr_format == 3: with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): group = zarr.api.synchronous.consolidate_metadata(store) else: group = zarr.api.synchronous.consolidate_metadata(store) else: if zarr_format == 3: with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): group = zarr.api.synchronous.consolidate_metadata(store) else: group = zarr.api.synchronous.consolidate_metadata(store) result = group["a"]["b"] assert result.name == "/a/b" paths = sorted(x.name for _, x in group.members(max_depth=None)) expected = ["/a", "/a/array", "/a/b", "/a/b/array"] assert paths == expected # regression test for https://github.com/zarr-developers/zarr-python/pull/2356 g = zarr.open_group(store, use_consolidated=False) with warnings.catch_warnings(): warnings.simplefilter("error") assert list(g) async def test_open_mutable_mapping(): group = await zarr.api.asynchronous.open_group( store={}, ) assert isinstance(group.store_path.store, MemoryStore) def test_open_mutable_mapping_sync(): group = zarr.open_group( store={}, ) assert isinstance(group.store_path.store, MemoryStore) async def test_open_ambiguous_node(): zarr_json_bytes = default_buffer_prototype().buffer.from_bytes( json.dumps({"zarr_format": 3, "node_type": "group"}).encode("utf-8") ) zgroup_bytes = default_buffer_prototype().buffer.from_bytes( json.dumps({"zarr_format": 2}).encode("utf-8") ) store: dict[str, Buffer] = {"zarr.json": zarr_json_bytes, ".zgroup": zgroup_bytes} with pytest.warns( ZarrUserWarning, match=r"Both zarr\.json \(Zarr format 3\) and \.zgroup \(Zarr format 2\) metadata objects exist at", ): await AsyncGroup.open(store, zarr_format=None) class TestConsolidated: async def test_group_getitem_consolidated(self, store: Store) -> None: root = await AsyncGroup.from_store(store=store) # Set up the test structure with # / # g0/ # group /g0 # g1/ # group /g0/g1 # g2/ # group /g0/g1/g2 # x1/ # group /x0 # x2/ # group /x0/x1 # x3/ # group /x0/x1/x2 g0 = await root.create_group("g0") g1 = await g0.create_group("g1") await g1.create_group("g2") x0 = await root.create_group("x0") x1 = await x0.create_group("x1") await x1.create_group("x2") with pytest.warns( # noqa: PT031 ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name"): await zarr.api.asynchronous.consolidate_metadata(store) else: await zarr.api.asynchronous.consolidate_metadata(store) # On disk, we've consolidated all the metadata in the root zarr.json group = await zarr.api.asynchronous.open(store=store) rg0 = await group.getitem("g0") expected = ConsolidatedMetadata( metadata={ "g1": GroupMetadata( attributes={}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata( metadata={ "g2": GroupMetadata( attributes={}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}), ) } ), ), } ) assert rg0.metadata.consolidated_metadata == expected rg1 = await rg0.getitem("g1") assert rg1.metadata.consolidated_metadata == expected.metadata["g1"].consolidated_metadata rg2 = await rg1.getitem("g2") assert rg2.metadata.consolidated_metadata == ConsolidatedMetadata(metadata={}) async def test_group_delitem_consolidated(self, store: Store) -> None: if isinstance(store, ZipStore): raise pytest.skip("Not implemented") root = await AsyncGroup.from_store(store=store) # Set up the test structure with # / # g0/ # group /g0 # g1/ # group /g0/g1 # g2/ # group /g0/g1/g2 # data # array # x1/ # group /x0 # x2/ # group /x0/x1 # x3/ # group /x0/x1/x2 # data # array g0 = await root.create_group("g0") g1 = await g0.create_group("g1") g2 = await g1.create_group("g2") await g2.create_array("data", shape=(1,), dtype="uint8") x0 = await root.create_group("x0") x1 = await x0.create_group("x1") x2 = await x1.create_group("x2") await x2.create_array("data", shape=(1,), dtype="uint8") with pytest.warns( # noqa: PT031 ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): if isinstance(store, ZipStore): with pytest.warns(UserWarning, match="Duplicate name"): await zarr.api.asynchronous.consolidate_metadata(store) else: await zarr.api.asynchronous.consolidate_metadata(store) group = await zarr.api.asynchronous.open_consolidated(store=store) assert len(group.metadata.consolidated_metadata.metadata) == 2 assert "g0" in group.metadata.consolidated_metadata.metadata await group.delitem("g0") assert len(group.metadata.consolidated_metadata.metadata) == 1 assert "g0" not in group.metadata.consolidated_metadata.metadata def test_open_consolidated_raises(self, store: Store) -> None: if isinstance(store, ZipStore): raise pytest.skip("Not implemented") root = Group.from_store(store=store) # fine to be missing by default zarr.open_group(store=store) with pytest.raises(ValueError, match="Consolidated metadata requested."): zarr.open_group(store=store, use_consolidated=True) # Now create consolidated metadata... root.create_group("g0") with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): zarr.consolidate_metadata(store) # and explicitly ignore it. group = zarr.open_group(store=store, use_consolidated=False) assert group.metadata.consolidated_metadata is None async def test_open_consolidated_raises_async(self, store: Store) -> None: if isinstance(store, ZipStore): raise pytest.skip("Not implemented") root = await AsyncGroup.from_store(store=store) # fine to be missing by default await zarr.api.asynchronous.open_group(store=store) with pytest.raises(ValueError, match="Consolidated metadata requested."): await zarr.api.asynchronous.open_group(store=store, use_consolidated=True) # Now create consolidated metadata... await root.create_group("g0") with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): await zarr.api.asynchronous.consolidate_metadata(store) # and explicitly ignore it. group = await zarr.api.asynchronous.open_group(store=store, use_consolidated=False) assert group.metadata.consolidated_metadata is None class TestGroupMetadata: def test_from_dict_extra_fields(self): data = { "attributes": {"key": "value"}, "_nczarr_superblock": {"version": "2.0.0"}, "zarr_format": 2, } result = GroupMetadata.from_dict(data) expected = GroupMetadata(attributes={"key": "value"}, zarr_format=2) assert result == expected class TestInfo: def test_info(self): store = zarr.storage.MemoryStore() A = zarr.group(store=store, path="A") B = A.create_group(name="B") B.create_array(name="x", shape=(1,), dtype="uint8") B.create_array(name="y", shape=(2,), dtype="uint8") result = A.info expected = GroupInfo( _name="A", _read_only=False, _store_type="MemoryStore", _zarr_format=3, ) assert result == expected result = A.info_complete() expected = GroupInfo( _name="A", _read_only=False, _store_type="MemoryStore", _zarr_format=3, _count_members=3, _count_arrays=2, _count_groups=1, ) assert result == expected def test_update_attrs() -> None: # regression test for https://github.com/zarr-developers/zarr-python/issues/2328 root = Group.from_store( MemoryStore(), ) root.attrs["foo"] = "bar" assert root.attrs["foo"] == "bar" @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) def test_delitem_removes_children(store: Store, zarr_format: ZarrFormat) -> None: # https://github.com/zarr-developers/zarr-python/issues/2191 g1 = zarr.group(store=store, zarr_format=zarr_format) g1.create_group("0") g1.create_group("0/0") arr = g1.create_array("0/0/0", shape=(1,), dtype="uint8") arr[:] = 1 del g1["0"] with pytest.raises(KeyError): g1["0/0"] @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("impl", ["async", "sync"]) async def test_create_nodes( impl: Literal["async", "sync"], store: Store, zarr_format: ZarrFormat ) -> None: """ Ensure that ``create_nodes`` can create a zarr hierarchy from a model of that hierarchy in dict form. Note that this creates an incomplete Zarr hierarchy. """ node_spec = { "group": GroupMetadata(attributes={"foo": 10}), "group/array_0": meta_from_array(np.arange(3), zarr_format=zarr_format), "group/array_1": meta_from_array(np.arange(4), zarr_format=zarr_format), "group/subgroup/array_0": meta_from_array(np.arange(4), zarr_format=zarr_format), "group/subgroup/array_1": meta_from_array(np.arange(5), zarr_format=zarr_format), } if impl == "sync": observed_nodes = dict(sync_group.create_nodes(store=store, nodes=node_spec)) elif impl == "async": observed_nodes = dict(await _collect_aiterator(create_nodes(store=store, nodes=node_spec))) else: raise ValueError(f"Invalid impl: {impl}") assert node_spec == {k: v.metadata for k, v in observed_nodes.items()} @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_create_nodes_concurrency_limit(store: MemoryStore) -> None: """ Test that the execution time of create_nodes can be constrained by the async concurrency configuration setting. """ set_latency = 0.02 num_groups = 10 groups = {str(idx): GroupMetadata() for idx in range(num_groups)} latency_store = LatencyStore(store, set_latency=set_latency) # check how long it takes to iterate over the groups # if create_nodes is sensitive to IO latency, # this should take (num_groups * get_latency) seconds # otherwise, it should take only marginally more than get_latency seconds with zarr_config.set({"async.concurrency": 1}): start = time.time() _ = tuple(sync_group.create_nodes(store=latency_store, nodes=groups)) elapsed = time.time() - start assert elapsed > num_groups * set_latency @pytest.mark.parametrize( ("a_func", "b_func"), [ (zarr.core.group.AsyncGroup.create_array, zarr.core.group.Group.create_array), (zarr.core.group.AsyncGroup.create_hierarchy, zarr.core.group.Group.create_hierarchy), (zarr.core.group.create_hierarchy, zarr.core.sync_group.create_hierarchy), (zarr.core.group.create_nodes, zarr.core.sync_group.create_nodes), (zarr.core.group.create_rooted_hierarchy, zarr.core.sync_group.create_rooted_hierarchy), (zarr.core.group.get_node, zarr.core.sync_group.get_node), ], ) def test_consistent_signatures( a_func: Callable[[object], object], b_func: Callable[[object], object] ) -> None: """ Ensure that pairs of functions have consistent signatures """ base_sig = inspect.signature(a_func) test_sig = inspect.signature(b_func) wrong: dict[str, list[object]] = { "missing_from_test": [], "missing_from_base": [], "wrong_type": [], } for key, value in base_sig.parameters.items(): if key not in test_sig.parameters: wrong["missing_from_test"].append((key, value)) for key, value in test_sig.parameters.items(): if key not in base_sig.parameters: wrong["missing_from_base"].append((key, value)) if base_sig.parameters[key] != value: wrong["wrong_type"].append({key: {"test": value, "base": base_sig.parameters[key]}}) assert wrong["missing_from_base"] == [] assert wrong["missing_from_test"] == [] assert wrong["wrong_type"] == [] @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("overwrite", [True, False]) @pytest.mark.parametrize("impl", ["async", "sync"]) async def test_create_hierarchy( impl: Literal["async", "sync"], store: Store, overwrite: bool, zarr_format: ZarrFormat ) -> None: """ Test that ``create_hierarchy`` can create a complete Zarr hierarchy, even if the input describes an incomplete one. """ hierarchy_spec = { "group": GroupMetadata(attributes={"path": "group"}, zarr_format=zarr_format), "group/array_0": meta_from_array( np.arange(3), attributes={"path": "group/array_0"}, zarr_format=zarr_format ), "group/subgroup/array_0": meta_from_array( np.arange(4), attributes={"path": "group/subgroup/array_0"}, zarr_format=zarr_format ), } pre_existing_nodes = { "group/extra": GroupMetadata(zarr_format=zarr_format, attributes={"path": "group/extra"}), "": GroupMetadata(zarr_format=zarr_format, attributes={"name": "root"}), } # we expect create_hierarchy to insert a group that was missing from the hierarchy spec expected_meta = hierarchy_spec | {"group/subgroup": GroupMetadata(zarr_format=zarr_format)} # initialize the group with some nodes _ = dict(sync_group.create_nodes(store=store, nodes=pre_existing_nodes)) if impl == "sync": created = dict( sync_group.create_hierarchy(store=store, nodes=hierarchy_spec, overwrite=overwrite) ) elif impl == "async": created = { k: v async for k, v in create_hierarchy( store=store, nodes=hierarchy_spec, overwrite=overwrite ) } else: raise ValueError(f"Invalid impl: {impl}") if not overwrite: extra_group = sync_group.get_node(store=store, path="group/extra", zarr_format=zarr_format) assert extra_group.metadata.attributes == {"path": "group/extra"} else: with pytest.raises(FileNotFoundError): await get_node(store=store, path="group/extra", zarr_format=zarr_format) assert expected_meta == {k: v.metadata for k, v in created.items()} @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("extant_node", ["array", "group"]) @pytest.mark.parametrize("impl", ["async", "sync"]) async def test_create_hierarchy_existing_nodes( impl: Literal["async", "sync"], store: Store, extant_node: Literal["array", "group"], zarr_format: ZarrFormat, ) -> None: """ Test that create_hierarchy with overwrite = False will not overwrite an existing array or group, and raises an exception instead. """ extant_node_path = "node" if extant_node == "array": extant_metadata = meta_from_array( np.zeros(4), zarr_format=zarr_format, attributes={"extant": True} ) new_metadata = meta_from_array(np.zeros(4), zarr_format=zarr_format) err_cls = ContainsArrayError else: extant_metadata = GroupMetadata(zarr_format=zarr_format, attributes={"extant": True}) new_metadata = GroupMetadata(zarr_format=zarr_format) err_cls = ContainsGroupError # write the extant metadata tuple(sync_group.create_nodes(store=store, nodes={extant_node_path: extant_metadata})) msg = f"{extant_node} exists in store {store!r} at path {extant_node_path!r}." # ensure that we cannot invoke create_hierarchy with overwrite=False here if impl == "sync": with pytest.raises(err_cls, match=re.escape(msg)): tuple( sync_group.create_hierarchy( store=store, nodes={"node": new_metadata}, overwrite=False ) ) elif impl == "async": with pytest.raises(err_cls, match=re.escape(msg)): tuple( [ x async for x in create_hierarchy( store=store, nodes={"node": new_metadata}, overwrite=False ) ] ) else: raise ValueError(f"Invalid impl: {impl}") # ensure that the extant metadata was not overwritten assert ( await get_node(store=store, path=extant_node_path, zarr_format=zarr_format) ).metadata.attributes == {"extant": True} @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("overwrite", [True, False]) @pytest.mark.parametrize("group_path", ["", "foo"]) @pytest.mark.parametrize("impl", ["async", "sync"]) async def test_group_create_hierarchy( store: Store, zarr_format: ZarrFormat, overwrite: bool, group_path: str, impl: Literal["async", "sync"], ) -> None: """ Test that the Group.create_hierarchy method creates specified nodes and returns them in a dict. Also test that off-target nodes are not deleted, and that the root group is not deleted """ root_attrs = {"root": True} g = sync_group.create_rooted_hierarchy( store=store, nodes={group_path: GroupMetadata(zarr_format=zarr_format, attributes=root_attrs)}, ) node_spec = { "a": GroupMetadata(zarr_format=zarr_format, attributes={"name": "a"}), "a/b": GroupMetadata(zarr_format=zarr_format, attributes={"name": "a/b"}), "a/b/c": meta_from_array( np.zeros(5), zarr_format=zarr_format, attributes={"name": "a/b/c"} ), } # This node should be kept if overwrite is True extant_spec = {"b": GroupMetadata(zarr_format=zarr_format, attributes={"name": "b"})} if impl == "async": extant_created = dict( await _collect_aiterator(g._async_group.create_hierarchy(extant_spec, overwrite=False)) ) nodes_created = dict( await _collect_aiterator( g._async_group.create_hierarchy(node_spec, overwrite=overwrite) ) ) elif impl == "sync": extant_created = dict(g.create_hierarchy(extant_spec, overwrite=False)) nodes_created = dict(g.create_hierarchy(node_spec, overwrite=overwrite)) all_members = dict(g.members(max_depth=None)) for k, v in node_spec.items(): assert all_members[k].metadata == v == nodes_created[k].metadata # if overwrite is True, the extant nodes should be erased for k, v in extant_spec.items(): if overwrite: assert k in all_members else: assert all_members[k].metadata == v == extant_created[k].metadata # ensure that we left the root group as-is assert ( sync_group.get_node(store=store, path=group_path, zarr_format=zarr_format).attrs.asdict() == root_attrs ) @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("overwrite", [True, False]) def test_group_create_hierarchy_no_root( store: Store, zarr_format: ZarrFormat, overwrite: bool ) -> None: """ Test that the Group.create_hierarchy method will error if the dict provided contains a root. """ g = Group.from_store(store, zarr_format=zarr_format) tree = { "": GroupMetadata(zarr_format=zarr_format, attributes={"name": "a"}), } with pytest.raises( ValueError, match="It is an error to use this method to create a root node. " ): _ = dict(g.create_hierarchy(tree, overwrite=overwrite)) class TestParseHierarchyDict: """ Tests for the function that parses dicts of str : Metadata pairs, ensuring that the output models a valid Zarr hierarchy """ @staticmethod def test_normed_keys() -> None: """ Test that keys get normalized properly """ nodes = { "a": GroupMetadata(), "/b": GroupMetadata(), "": GroupMetadata(), "/a//c////": GroupMetadata(), } observed = _parse_hierarchy_dict(data=nodes) expected = {normalize_path(k): v for k, v in nodes.items()} assert observed == expected @staticmethod def test_empty() -> None: """ Test that an empty dict passes through """ assert _parse_hierarchy_dict(data={}) == {} @staticmethod def test_implicit_groups() -> None: """ Test that implicit groups were added as needed. """ requested = {"a/b/c": GroupMetadata()} expected = requested | { "": ImplicitGroupMarker(), "a": ImplicitGroupMarker(), "a/b": ImplicitGroupMarker(), } observed = _parse_hierarchy_dict(data=requested) assert observed == expected @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_group_create_hierarchy_invalid_mixed_zarr_format( store: Store, zarr_format: ZarrFormat ) -> None: """ Test that ``Group.create_hierarchy`` will raise an error if the zarr_format of the nodes is different from the parent group. """ other_format = 2 if zarr_format == 3 else 3 g = Group.from_store(store, zarr_format=other_format) tree = { "a": GroupMetadata(zarr_format=zarr_format, attributes={"name": "a"}), "a/b": meta_from_array(np.zeros(5), zarr_format=zarr_format, attributes={"name": "a/c"}), } msg = "The zarr_format of the nodes must be the same as the parent group." with pytest.raises(ValueError, match=msg): _ = tuple(g.create_hierarchy(tree)) @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("defect", ["array/array", "array/group"]) @pytest.mark.parametrize("impl", ["async", "sync"]) async def test_create_hierarchy_invalid_nested( impl: Literal["async", "sync"], store: Store, defect: tuple[str, str], zarr_format: ZarrFormat ) -> None: """ Test that create_hierarchy will not create a Zarr array that contains a Zarr group or Zarr array. """ if defect == "array/array": hierarchy_spec = { "array_0": meta_from_array(np.arange(3), zarr_format=zarr_format), "array_0/subarray": meta_from_array(np.arange(4), zarr_format=zarr_format), } elif defect == "array/group": hierarchy_spec = { "array_0": meta_from_array(np.arange(3), zarr_format=zarr_format), "array_0/subgroup": GroupMetadata(attributes={"foo": 10}, zarr_format=zarr_format), } msg = "Only Zarr groups can contain other nodes." if impl == "sync": with pytest.raises(ValueError, match=msg): tuple(sync_group.create_hierarchy(store=store, nodes=hierarchy_spec)) elif impl == "async": with pytest.raises(ValueError, match=msg): await _collect_aiterator(create_hierarchy(store=store, nodes=hierarchy_spec)) @pytest.mark.parametrize("store", ["memory"], indirect=True) @pytest.mark.parametrize("impl", ["async", "sync"]) async def test_create_hierarchy_invalid_mixed_format( impl: Literal["async", "sync"], store: Store ) -> None: """ Test that create_hierarchy will not create a Zarr group that contains a both Zarr v2 and Zarr v3 nodes. """ msg = ( "Got data with both Zarr v2 and Zarr v3 nodes, which is invalid. " "The following keys map to Zarr v2 nodes: ['v2']. " "The following keys map to Zarr v3 nodes: ['v3']." "Ensure that all nodes have the same Zarr format." ) nodes = { "v2": GroupMetadata(zarr_format=2), "v3": GroupMetadata(zarr_format=3), } if impl == "sync": with pytest.raises(ValueError, match=re.escape(msg)): tuple( sync_group.create_hierarchy( store=store, nodes=nodes, ) ) elif impl == "async": with pytest.raises(ValueError, match=re.escape(msg)): await _collect_aiterator( create_hierarchy( store=store, nodes=nodes, ) ) else: raise ValueError(f"Invalid impl: {impl}") @pytest.mark.parametrize("store", ["memory", "local"], indirect=True) @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("root_key", ["", "root"]) @pytest.mark.parametrize("impl", ["async", "sync"]) async def test_create_rooted_hierarchy_group( impl: Literal["async", "sync"], store: Store, zarr_format, root_key: str ) -> None: """ Test that the _create_rooted_hierarchy can create a group. """ root_meta = {root_key: GroupMetadata(zarr_format=zarr_format, attributes={"path": root_key})} group_names = ["a", "a/b"] array_names = ["a/b/c", "a/b/d"] # just to ensure that we don't use the same name twice in tests assert set(group_names) & set(array_names) == set() groups_expected_meta = { _join_paths([root_key, node_name]): GroupMetadata( zarr_format=zarr_format, attributes={"path": node_name} ) for node_name in group_names } arrays_expected_meta = { _join_paths([root_key, node_name]): meta_from_array(np.zeros(4), zarr_format=zarr_format) for node_name in array_names } nodes_create = root_meta | groups_expected_meta | arrays_expected_meta if impl == "sync": g = sync_group.create_rooted_hierarchy(store=store, nodes=nodes_create) assert isinstance(g, Group) members = g.members(max_depth=None) elif impl == "async": g = await create_rooted_hierarchy(store=store, nodes=nodes_create) assert isinstance(g, AsyncGroup) members = await _collect_aiterator(g.members(max_depth=None)) else: raise ValueError(f"Unknown implementation: {impl}") assert g.metadata.attributes == {"path": root_key} members_observed_meta = {k: v.metadata for k, v in members} members_expected_meta_relative = { k.removeprefix(root_key).lstrip("/"): v for k, v in (groups_expected_meta | arrays_expected_meta).items() } assert members_observed_meta == members_expected_meta_relative @pytest.mark.parametrize("store", ["memory", "local"], indirect=True) @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("root_key", ["", "root"]) @pytest.mark.parametrize("impl", ["async", "sync"]) async def test_create_rooted_hierarchy_array( impl: Literal["async", "sync"], store: Store, zarr_format, root_key: str ) -> None: """ Test that _create_rooted_hierarchy can create an array. """ root_meta = { root_key: meta_from_array( np.arange(3), zarr_format=zarr_format, attributes={"path": root_key} ) } nodes_create = root_meta if impl == "sync": a = sync_group.create_rooted_hierarchy(store=store, nodes=nodes_create, overwrite=True) assert isinstance(a, Array) elif impl == "async": a = await create_rooted_hierarchy(store=store, nodes=nodes_create, overwrite=True) assert isinstance(a, AsyncArray) else: raise ValueError(f"Invalid impl: {impl}") assert a.metadata.attributes == {"path": root_key} @pytest.mark.parametrize("impl", ["async", "sync"]) async def test_create_rooted_hierarchy_invalid(impl: Literal["async", "sync"]) -> None: """ Ensure _create_rooted_hierarchy will raise a ValueError if the input does not contain a root node. """ zarr_format = 3 nodes = { "a": GroupMetadata(zarr_format=zarr_format), "b": GroupMetadata(zarr_format=zarr_format), } msg = "The input does not specify a root node. " if impl == "sync": with pytest.raises(ValueError, match=msg): sync_group.create_rooted_hierarchy(store=store, nodes=nodes) elif impl == "async": with pytest.raises(ValueError, match=msg): await create_rooted_hierarchy(store=store, nodes=nodes) else: raise ValueError(f"Invalid impl: {impl}") @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_group_members_performance(store: Store) -> None: """ Test that the execution time of Group.members is less than the number of members times the latency for accessing each member. """ get_latency = 0.1 # use the input store to create some groups group_create = zarr.group(store=store) num_groups = 10 # Create some groups for i in range(num_groups): group_create.create_group(f"group{i}") latency_store = LatencyStore(store, get_latency=get_latency) # create a group with some latency on get operations group_read = zarr.group(store=latency_store) # check how long it takes to iterate over the groups # if .members is sensitive to IO latency, # this should take (num_groups * get_latency) seconds # otherwise, it should take only marginally more than get_latency seconds start = time.time() _ = group_read.members() elapsed = time.time() - start assert elapsed < (num_groups * get_latency) @pytest.mark.parametrize("store", ["memory"], indirect=True) def test_group_members_concurrency_limit(store: MemoryStore) -> None: """ Test that the execution time of Group.members can be constrained by the async concurrency configuration setting. """ get_latency = 0.02 # use the input store to create some groups group_create = zarr.group(store=store) num_groups = 10 # Create some groups for i in range(num_groups): group_create.create_group(f"group{i}") latency_store = LatencyStore(store, get_latency=get_latency) # create a group with some latency on get operations group_read = zarr.group(store=latency_store) # check how long it takes to iterate over the groups # if .members is sensitive to IO latency, # this should take (num_groups * get_latency) seconds # otherwise, it should take only marginally more than get_latency seconds with zarr_config.set({"async.concurrency": 1}): start = time.time() _ = group_read.members() elapsed = time.time() - start assert elapsed > num_groups * get_latency @pytest.mark.parametrize("option", ["array", "group", "invalid"]) def test_build_metadata_v3(option: Literal["array", "group", "invalid"]) -> None: """ Test that _build_metadata_v3 returns the correct metadata for a v3 array or group """ match option: case "array": metadata_dict = meta_from_array(np.arange(10), zarr_format=3).to_dict() assert _build_metadata_v3(metadata_dict) == ArrayV3Metadata.from_dict(metadata_dict) case "group": metadata_dict = GroupMetadata(attributes={"foo": 10}, zarr_format=3).to_dict() assert _build_metadata_v3(metadata_dict) == GroupMetadata.from_dict(metadata_dict) case "invalid": metadata_dict = GroupMetadata(zarr_format=3).to_dict() metadata_dict.pop("node_type") # TODO: fix the error message msg = "Required key 'node_type' is missing from the provided metadata document." with pytest.raises(MetadataValidationError, match=msg): _build_metadata_v3(metadata_dict) @pytest.mark.parametrize("roots", [("",), ("a", "b")]) def test_get_roots(roots: tuple[str, ...]): root_nodes = {k: GroupMetadata(attributes={"name": k}) for k in roots} child_nodes = { _join_paths([k, "foo"]): GroupMetadata(attributes={"name": _join_paths([k, "foo"])}) for k in roots } data = root_nodes | child_nodes assert set(_get_roots(data)) == set(roots) def test_open_array_as_group(): z = zarr.create_array(shape=(40, 50), chunks=(10, 10), dtype="f8", store={}) with pytest.raises(ContainsArrayError): zarr.open_group(z.store) zarr-python-3.1.5/tests/test_indexing.py000066400000000000000000002133661511007055700204110ustar00rootroot00000000000000from __future__ import annotations import itertools from collections import Counter from typing import TYPE_CHECKING, Any from uuid import uuid4 import numpy as np import numpy.typing as npt import pytest from numpy.testing import assert_array_equal import zarr from zarr import Array from zarr.core.buffer import default_buffer_prototype from zarr.core.indexing import ( BasicSelection, CoordinateSelection, OrthogonalSelection, Selection, _ArrayIndexingOrder, _iter_grid, _iter_regions, ceildiv, make_slice_selection, normalize_integer_selection, oindex, oindex_set, replace_ellipsis, ) from zarr.registry import get_ndbuffer_class from zarr.storage import MemoryStore, StorePath if TYPE_CHECKING: from collections.abc import AsyncGenerator from zarr.core.buffer import BufferPrototype from zarr.core.buffer.core import Buffer @pytest.fixture async def store() -> AsyncGenerator[StorePath]: return StorePath(await MemoryStore.open()) def zarr_array_from_numpy_array( store: StorePath, a: npt.NDArray[Any], chunk_shape: tuple[int, ...] | None = None, ) -> zarr.Array: z = zarr.create_array( store=store / str(uuid4()), shape=a.shape, dtype=a.dtype, chunks=chunk_shape or a.shape, chunk_key_encoding={"name": "v2", "separator": "."}, ) z[()] = a return z class CountingDict(MemoryStore): counter: Counter[tuple[str, str]] @classmethod async def open(cls) -> CountingDict: store = await super().open() store.counter = Counter() return store async def get( self, key: str, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None = None, ) -> Buffer | None: key_suffix = "/".join(key.split("/")[1:]) self.counter["__getitem__", key_suffix] += 1 return await super().get(key, prototype, byte_range) async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: key_suffix = "/".join(key.split("/")[1:]) self.counter["__setitem__", key_suffix] += 1 return await super().set(key, value, byte_range) def test_normalize_integer_selection() -> None: assert 1 == normalize_integer_selection(1, 100) assert 99 == normalize_integer_selection(-1, 100) with pytest.raises(IndexError): normalize_integer_selection(100, 100) with pytest.raises(IndexError): normalize_integer_selection(1000, 100) with pytest.raises(IndexError): normalize_integer_selection(-1000, 100) def test_replace_ellipsis() -> None: # 1D, single item assert (0,) == replace_ellipsis(0, (100,)) # 1D assert (slice(None),) == replace_ellipsis(Ellipsis, (100,)) assert (slice(None),) == replace_ellipsis(slice(None), (100,)) assert (slice(None, 100),) == replace_ellipsis(slice(None, 100), (100,)) assert (slice(0, None),) == replace_ellipsis(slice(0, None), (100,)) assert (slice(None),) == replace_ellipsis((slice(None), Ellipsis), (100,)) assert (slice(None),) == replace_ellipsis((Ellipsis, slice(None)), (100,)) # 2D, single item assert (0, 0) == replace_ellipsis((0, 0), (100, 100)) assert (-1, 1) == replace_ellipsis((-1, 1), (100, 100)) # 2D, single col/row assert (0, slice(None)) == replace_ellipsis((0, slice(None)), (100, 100)) assert (0, slice(None)) == replace_ellipsis((0,), (100, 100)) assert (slice(None), 0) == replace_ellipsis((slice(None), 0), (100, 100)) # 2D slice assert (slice(None), slice(None)) == replace_ellipsis(Ellipsis, (100, 100)) assert (slice(None), slice(None)) == replace_ellipsis(slice(None), (100, 100)) assert (slice(None), slice(None)) == replace_ellipsis((slice(None), slice(None)), (100, 100)) assert (slice(None), slice(None)) == replace_ellipsis((Ellipsis, slice(None)), (100, 100)) assert (slice(None), slice(None)) == replace_ellipsis((slice(None), Ellipsis), (100, 100)) assert (slice(None), slice(None)) == replace_ellipsis( (slice(None), Ellipsis, slice(None)), (100, 100) ) assert (slice(None), slice(None)) == replace_ellipsis( (Ellipsis, slice(None), slice(None)), (100, 100) ) assert (slice(None), slice(None)) == replace_ellipsis( (slice(None), slice(None), Ellipsis), (100, 100) ) @pytest.mark.parametrize( ("value", "dtype"), [ (42, "uint8"), pytest.param( (b"aaa", 1, 4.2), [("foo", "S3"), ("bar", "i4"), ("baz", "f8")], marks=pytest.mark.xfail ), ], ) @pytest.mark.parametrize("use_out", [True, False]) def test_get_basic_selection_0d(store: StorePath, use_out: bool, value: Any, dtype: Any) -> None: # setup arr_np = np.array(value, dtype=dtype) arr_z = zarr_array_from_numpy_array(store, arr_np) assert_array_equal(arr_np, arr_z.get_basic_selection(Ellipsis)) assert_array_equal(arr_np, arr_z[...]) assert value == arr_z.get_basic_selection(()) assert value == arr_z[()] if use_out: # test out param b = default_buffer_prototype().nd_buffer.from_numpy_array(np.zeros_like(arr_np)) arr_z.get_basic_selection(Ellipsis, out=b) assert_array_equal(arr_np, b.as_ndarray_like()) # todo: uncomment the structured array tests when we can make them pass, # or delete them if we formally decide not to support structured dtypes. # test structured array # value = (b"aaa", 1, 4.2) # a = np.array(value, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) # z = zarr_array_from_numpy_array(store, a) # z[()] = value # assert_array_equal(a, z.get_basic_selection(Ellipsis)) # assert_array_equal(a, z[...]) # assert a[()] == z.get_basic_selection(()) # assert a[()] == z[()] # assert b"aaa" == z.get_basic_selection((), fields="foo") # assert b"aaa" == z["foo"] # assert a[["foo", "bar"]] == z.get_basic_selection((), fields=["foo", "bar"]) # assert a[["foo", "bar"]] == z["foo", "bar"] # # test out param # b = NDBuffer.from_numpy_array(np.zeros_like(a)) # z.get_basic_selection(Ellipsis, out=b) # assert_array_equal(a, b) # c = NDBuffer.from_numpy_array(np.zeros_like(a[["foo", "bar"]])) # z.get_basic_selection(Ellipsis, out=c, fields=["foo", "bar"]) # assert_array_equal(a[["foo", "bar"]], c) basic_selections_1d: list[BasicSelection] = [ # single value 42, -1, # slices slice(0, 1050), slice(50, 150), slice(0, 2000), slice(-150, -50), slice(-2000, 2000), slice(0, 0), # empty result slice(-1, 0), # empty result # total selections slice(None), Ellipsis, (), (Ellipsis, slice(None)), # slice with step slice(None), slice(None, None), slice(None, None, 1), slice(None, None, 10), slice(None, None, 100), slice(None, None, 1000), slice(None, None, 10000), slice(0, 1050), slice(0, 1050, 1), slice(0, 1050, 10), slice(0, 1050, 100), slice(0, 1050, 1000), slice(0, 1050, 10000), slice(1, 31, 3), slice(1, 31, 30), slice(1, 31, 300), slice(81, 121, 3), slice(81, 121, 30), slice(81, 121, 300), slice(50, 150), slice(50, 150, 1), slice(50, 150, 10), ] basic_selections_1d_bad = [ # only positive step supported slice(None, None, -1), slice(None, None, -10), slice(None, None, -100), slice(None, None, -1000), slice(None, None, -10000), slice(1050, -1, -1), slice(1050, -1, -10), slice(1050, -1, -100), slice(1050, -1, -1000), slice(1050, -1, -10000), slice(1050, 0, -1), slice(1050, 0, -10), slice(1050, 0, -100), slice(1050, 0, -1000), slice(1050, 0, -10000), slice(150, 50, -1), slice(150, 50, -10), slice(31, 1, -3), slice(121, 81, -3), slice(-1, 0, -1), # bad stuff 2.3, "foo", b"xxx", None, (0, 0), (slice(None), slice(None)), ] def _test_get_basic_selection( a: npt.NDArray[Any] | Array, z: Array, selection: BasicSelection ) -> None: expect = a[selection] actual = z.get_basic_selection(selection) assert_array_equal(expect, actual) actual = z[selection] assert_array_equal(expect, actual) # test out param b = default_buffer_prototype().nd_buffer.from_numpy_array( np.empty(shape=expect.shape, dtype=expect.dtype) ) z.get_basic_selection(selection, out=b) assert_array_equal(expect, b.as_numpy_array()) # noinspection PyStatementEffect def test_get_basic_selection_1d(store: StorePath) -> None: # setup a = np.arange(1050, dtype=int) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) for selection in basic_selections_1d: _test_get_basic_selection(a, z, selection) for selection_bad in basic_selections_1d_bad: with pytest.raises(IndexError): z.get_basic_selection(selection_bad) # type: ignore[arg-type] with pytest.raises(IndexError): z[selection_bad] # type: ignore[index] with pytest.raises(IndexError): z.get_basic_selection([1, 0]) # type: ignore[arg-type] basic_selections_2d: list[BasicSelection] = [ # single row 42, -1, (42, slice(None)), (-1, slice(None)), # single col (slice(None), 4), (slice(None), -1), # row slices slice(None), slice(0, 1000), slice(250, 350), slice(0, 2000), slice(-350, -250), slice(0, 0), # empty result slice(-1, 0), # empty result slice(-2000, 0), slice(-2000, 2000), # 2D slices (slice(None), slice(1, 5)), (slice(250, 350), slice(None)), (slice(250, 350), slice(1, 5)), (slice(250, 350), slice(-5, -1)), (slice(250, 350), slice(-50, 50)), (slice(250, 350, 10), slice(1, 5)), (slice(250, 350), slice(1, 5, 2)), (slice(250, 350, 33), slice(1, 5, 3)), # total selections (slice(None), slice(None)), Ellipsis, (), (Ellipsis, slice(None)), (Ellipsis, slice(None), slice(None)), ] basic_selections_2d_bad = [ # bad stuff 2.3, "foo", b"xxx", None, (2.3, slice(None)), # only positive step supported slice(None, None, -1), (slice(None, None, -1), slice(None)), (0, 0, 0), (slice(None), slice(None), slice(None)), ] # noinspection PyStatementEffect def test_get_basic_selection_2d(store: StorePath) -> None: # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) for selection in basic_selections_2d: _test_get_basic_selection(a, z, selection) bad_selections = basic_selections_2d_bad + [ # integer arrays [0, 1], (slice(None), [0, 1]), ] for selection_bad in bad_selections: with pytest.raises(IndexError): z.get_basic_selection(selection_bad) # type: ignore[arg-type] # check fallback on fancy indexing fancy_selection = ([0, 1], [0, 1]) np.testing.assert_array_equal(z[fancy_selection], [0, 11]) def test_fancy_indexing_fallback_on_get_setitem(store: StorePath) -> None: z = zarr_array_from_numpy_array(store, np.zeros((20, 20))) z[[1, 2, 3], [1, 2, 3]] = 1 np.testing.assert_array_equal( z[:4, :4], [ [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], ], ) np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) # test broadcasting np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) # test 1D fancy indexing z2 = zarr_array_from_numpy_array(store, np.zeros(5)) z2[[1, 2, 3]] = 1 np.testing.assert_array_equal(z2[:], [0, 1, 1, 1, 0]) @pytest.mark.parametrize( ("index", "expected_result"), [ # Single iterable of integers ([0, 1], [[0, 1, 2], [3, 4, 5]]), # List first, then slice (([0, 1], slice(None)), [[0, 1, 2], [3, 4, 5]]), # List first, then slice (([0, 1], slice(1, None)), [[1, 2], [4, 5]]), # Slice first, then list ((slice(0, 2), [0, 2]), [[0, 2], [3, 5]]), # Slices only ((slice(0, 2), slice(0, 2)), [[0, 1], [3, 4]]), # List with repeated index (([1, 0, 1], slice(1, None)), [[4, 5], [1, 2], [4, 5]]), # 1D indexing (([1, 0, 1]), [[3, 4, 5], [0, 1, 2], [3, 4, 5]]), ], ) def test_orthogonal_indexing_fallback_on_getitem_2d( store: StorePath, index: Selection, expected_result: npt.ArrayLike ) -> None: """ Tests the orthogonal indexing fallback on __getitem__ for a 2D matrix. In addition to checking expected behavior, all indexing is also checked against numpy. """ # [0, 1, 2], # [3, 4, 5], # [6, 7, 8] a = np.arange(9).reshape(3, 3) z = zarr_array_from_numpy_array(store, a) np.testing.assert_array_equal(z[index], a[index], err_msg="Indexing disagrees with numpy") np.testing.assert_array_equal(z[index], expected_result) @pytest.mark.skip(reason="fails on ubuntu, windows; numpy=2.2; in CI") def test_setitem_repeated_index(): array = zarr.array(data=np.zeros((4,)), chunks=(1,)) indexer = np.array([-1, -1, 0, 0]) array.oindex[(indexer,)] = [0, 1, 2, 3] np.testing.assert_array_equal(array[:], np.array([3, 0, 0, 1])) indexer = np.array([-1, 0, 0, -1]) array.oindex[(indexer,)] = [0, 1, 2, 3] np.testing.assert_array_equal(array[:], np.array([2, 0, 0, 3])) Index = list[int] | tuple[slice | int | list[int], ...] @pytest.mark.parametrize( ("index", "expected_result"), [ # Single iterable of integers ([0, 1], [[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[9, 10, 11], [12, 13, 14], [15, 16, 17]]]), # One slice, two integers ((slice(0, 2), 1, 1), [4, 13]), # One integer, two slices ((slice(0, 2), 1, slice(0, 2)), [[3, 4], [12, 13]]), # Two slices and a list ((slice(0, 2), [1, 2], slice(0, 2)), [[[3, 4], [6, 7]], [[12, 13], [15, 16]]]), ], ) def test_orthogonal_indexing_fallback_on_getitem_3d( store: StorePath, index: Selection, expected_result: npt.ArrayLike ) -> None: """ Tests the orthogonal indexing fallback on __getitem__ for a 3D matrix. In addition to checking expected behavior, all indexing is also checked against numpy. """ # [[[ 0, 1, 2], # [ 3, 4, 5], # [ 6, 7, 8]], # [[ 9, 10, 11], # [12, 13, 14], # [15, 16, 17]], # [[18, 19, 20], # [21, 22, 23], # [24, 25, 26]]] a = np.arange(27).reshape(3, 3, 3) z = zarr_array_from_numpy_array(store, a) np.testing.assert_array_equal(z[index], a[index], err_msg="Indexing disagrees with numpy") np.testing.assert_array_equal(z[index], expected_result) @pytest.mark.parametrize( ("index", "expected_result"), [ # Single iterable of integers ([0, 1], [[1, 1, 1], [1, 1, 1], [0, 0, 0]]), # List and slice combined (([0, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), # Index repetition is ignored on setitem (([0, 1, 1, 1, 1, 1, 1], slice(1, 3)), [[0, 1, 1], [0, 1, 1], [0, 0, 0]]), # Slice with step (([0, 2], slice(None, None, 2)), [[1, 0, 1], [0, 0, 0], [1, 0, 1]]), ], ) def test_orthogonal_indexing_fallback_on_setitem_2d( store: StorePath, index: Selection, expected_result: npt.ArrayLike ) -> None: """ Tests the orthogonal indexing fallback on __setitem__ for a 3D matrix. In addition to checking expected behavior, all indexing is also checked against numpy. """ # Slice + fancy index a = np.zeros((3, 3)) z = zarr_array_from_numpy_array(store, a) z[index] = 1 a[index] = 1 np.testing.assert_array_equal(z[:], expected_result) np.testing.assert_array_equal(z[:], a, err_msg="Indexing disagrees with numpy") def test_fancy_indexing_doesnt_mix_with_implicit_slicing(store: StorePath) -> None: z2 = zarr_array_from_numpy_array(store, np.zeros((5, 5, 5))) with pytest.raises(IndexError): z2[[1, 2, 3], [1, 2, 3]] = 2 with pytest.raises(IndexError): np.testing.assert_array_equal(z2[[1, 2, 3], [1, 2, 3]], 0) with pytest.raises(IndexError): z2[..., [1, 2, 3]] = 2 # type: ignore[index] with pytest.raises(IndexError): np.testing.assert_array_equal(z2[..., [1, 2, 3]], 0) # type: ignore[index] @pytest.mark.parametrize( ("value", "dtype"), [ (42, "uint8"), pytest.param( (b"aaa", 1, 4.2), [("foo", "S3"), ("bar", "i4"), ("baz", "f8")], marks=pytest.mark.xfail ), ], ) def test_set_basic_selection_0d( store: StorePath, value: Any, dtype: str | list[tuple[str, str]] ) -> None: arr_np = np.array(value, dtype=dtype) arr_np_zeros = np.zeros_like(arr_np, dtype=dtype) arr_z = zarr_array_from_numpy_array(store, arr_np_zeros) assert_array_equal(arr_np_zeros, arr_z) arr_z.set_basic_selection(Ellipsis, value) assert_array_equal(value, arr_z) arr_z[...] = 0 assert_array_equal(arr_np_zeros, arr_z) arr_z[...] = value assert_array_equal(value, arr_z) # todo: uncomment the structured array tests when we can make them pass, # or delete them if we formally decide not to support structured dtypes. # arr_z.set_basic_selection(Ellipsis, v["foo"], fields="foo") # assert v["foo"] == arr_z["foo"] # assert arr_np_zeros["bar"] == arr_z["bar"] # assert arr_np_zeros["baz"] == arr_z["baz"] # arr_z["bar"] = v["bar"] # assert v["foo"] == arr_z["foo"] # assert v["bar"] == arr_z["bar"] # assert arr_np_zeros["baz"] == arr_z["baz"] # # multiple field assignment not supported # with pytest.raises(IndexError): # arr_z.set_basic_selection(Ellipsis, v[["foo", "bar"]], fields=["foo", "bar"]) # with pytest.raises(IndexError): # arr_z[..., "foo", "bar"] = v[["foo", "bar"]] def _test_get_orthogonal_selection( a: npt.NDArray[Any], z: Array, selection: OrthogonalSelection ) -> None: expect = oindex(a, selection) actual = z.get_orthogonal_selection(selection) assert_array_equal(expect, actual) actual = z.oindex[selection] assert_array_equal(expect, actual) # noinspection PyStatementEffect def test_get_orthogonal_selection_1d_bool(store: StorePath) -> None: # setup a = np.arange(1050, dtype=int) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_get_orthogonal_selection(a, z, ix) # test errors with pytest.raises(IndexError): z.oindex[np.zeros(50, dtype=bool)] # too short with pytest.raises(IndexError): z.oindex[np.zeros(2000, dtype=bool)] # too long with pytest.raises(IndexError): # too many dimensions z.oindex[[[True, False], [False, True]]] # type: ignore[index] # noinspection PyStatementEffect def test_get_orthogonal_selection_1d_int(store: StorePath) -> None: # setup a = np.arange(550, dtype=int) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.01: # sorted integer arrays ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) ix.sort() _test_get_orthogonal_selection(a, z, ix) selections = basic_selections_1d + [ # test wraparound [0, 3, 10, -23, -12, -1], # explicit test not sorted [3, 105, 23, 127], ] for selection in selections: _test_get_orthogonal_selection(a, z, selection) bad_selections = basic_selections_1d_bad + [ [a.shape[0] + 1], # out of bounds [-(a.shape[0] + 1)], # out of bounds [[2, 4], [6, 8]], # too many dimensions ] for bad_selection in bad_selections: with pytest.raises(IndexError): z.get_orthogonal_selection(bad_selection) # type: ignore[arg-type] with pytest.raises(IndexError): z.oindex[bad_selection] # type: ignore[index] def _test_get_orthogonal_selection_2d( a: npt.NDArray[Any], z: Array, ix0: npt.NDArray[np.bool], ix1: npt.NDArray[np.bool] ) -> None: selections = [ # index both axes with array (ix0, ix1), # mixed indexing with array / slice (ix0, slice(1, 5)), (ix0, slice(1, 5, 2)), (slice(250, 350), ix1), (slice(250, 350, 10), ix1), # mixed indexing with array / int (ix0, 4), (42, ix1), ] for selection in selections: _test_get_orthogonal_selection(a, z, selection) # noinspection PyStatementEffect def test_get_orthogonal_selection_2d(store: StorePath) -> None: # setup a = np.arange(5400, dtype=int).reshape(600, 9) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.01: # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) _test_get_orthogonal_selection_2d(a, z, ix0, ix1) # mixed int array / bool array selections = ( (ix0, np.nonzero(ix1)[0]), (np.nonzero(ix0)[0], ix1), ) for selection in selections: _test_get_orthogonal_selection(a, z, selection) # sorted integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) ix0.sort() ix1.sort() _test_get_orthogonal_selection_2d(a, z, ix0, ix1) for selection_2d in basic_selections_2d: _test_get_orthogonal_selection(a, z, selection_2d) for selection_2d_bad in basic_selections_2d_bad: with pytest.raises(IndexError): z.get_orthogonal_selection(selection_2d_bad) # type: ignore[arg-type] with pytest.raises(IndexError): z.oindex[selection_2d_bad] # type: ignore[index] def _test_get_orthogonal_selection_3d( a: npt.NDArray, z: Array, ix0: npt.NDArray[np.bool], ix1: npt.NDArray[np.bool], ix2: npt.NDArray[np.bool], ) -> None: selections = [ # single value (60, 15, 4), (-1, -1, -1), # index all axes with array (ix0, ix1, ix2), # mixed indexing with single array / slices (ix0, slice(10, 20), slice(1, 5)), (slice(30, 50), ix1, slice(1, 5)), (slice(30, 50), slice(10, 20), ix2), (ix0, slice(10, 20, 5), slice(1, 5, 2)), (slice(30, 50, 3), ix1, slice(1, 5, 2)), (slice(30, 50, 3), slice(10, 20, 5), ix2), # mixed indexing with single array / ints (ix0, 15, 4), (60, ix1, 4), (60, 15, ix2), # mixed indexing with single array / slice / int (ix0, slice(10, 20), 4), (15, ix1, slice(1, 5)), (slice(30, 50), 15, ix2), # mixed indexing with two array / slice (ix0, ix1, slice(1, 5)), (slice(30, 50), ix1, ix2), (ix0, slice(10, 20), ix2), # mixed indexing with two array / integer (ix0, ix1, 4), (15, ix1, ix2), (ix0, 15, ix2), ] for selection in selections: _test_get_orthogonal_selection(a, z, selection) def test_get_orthogonal_selection_3d(store: StorePath) -> None: # setup a = np.arange(32400, dtype=int).reshape(120, 30, 9) z = zarr_array_from_numpy_array(store, a, chunk_shape=(60, 20, 3)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.01: # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) # sorted integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) ix0.sort() ix1.sort() ix2.sort() _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2) def test_orthogonal_indexing_edge_cases(store: StorePath) -> None: a = np.arange(6).reshape(1, 2, 3) z = zarr_array_from_numpy_array(store, a, chunk_shape=(1, 2, 3)) expect = oindex(a, (0, slice(None), [0, 1, 2])) actual = z.oindex[0, :, [0, 1, 2]] assert_array_equal(expect, actual) expect = oindex(a, (0, slice(None), [True, True, True])) actual = z.oindex[0, :, [True, True, True]] assert_array_equal(expect, actual) def _test_set_orthogonal_selection( v: npt.NDArray[np.int_], a: npt.NDArray[Any], z: Array, selection: OrthogonalSelection ) -> None: for value in 42, oindex(v, selection), oindex(v, selection).tolist(): if isinstance(value, list) and value == []: # skip these cases as cannot preserve all dimensions continue # setup expectation a[:] = 0 oindex_set(a, selection, value) # long-form API z[:] = 0 z.set_orthogonal_selection(selection, value) assert_array_equal(a, z[:]) # short-form API z[:] = 0 z.oindex[selection] = value assert_array_equal(a, z[:]) def test_set_orthogonal_selection_1d(store: StorePath) -> None: # setup v = np.arange(550, dtype=int) a = np.empty(v.shape, dtype=int) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) # test with different degrees of sparseness np.random.seed(42) for p in 0.5, 0.01: # boolean arrays ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_set_orthogonal_selection(v, a, z, ix) # sorted integer arrays ix = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) ix.sort() _test_set_orthogonal_selection(v, a, z, ix) # basic selections for selection in basic_selections_1d: _test_set_orthogonal_selection(v, a, z, selection) def test_set_item_1d_last_two_chunks(store: StorePath): # regression test for GH2849 g = zarr.open_group(store=store, zarr_format=3, mode="w") a = g.create_array("bar", shape=(10,), chunks=(3,), dtype=int) data = np.array([7, 8, 9]) a[slice(7, 10)] = data np.testing.assert_array_equal(a[slice(7, 10)], data) z = zarr.open_group(store=store, mode="w") z.create_array("zoo", dtype=float, shape=()) z["zoo"][...] = np.array(1) # why doesn't [:] work? np.testing.assert_equal(z["zoo"][()], np.array(1)) z = zarr.open_group(store=store, mode="w") z.create_array("zoo", dtype=float, shape=()) z["zoo"][...] = 1 # why doesn't [:] work? np.testing.assert_equal(z["zoo"][()], np.array(1)) def _test_set_orthogonal_selection_2d( v: npt.NDArray[np.int_], a: npt.NDArray[np.int_], z: Array, ix0: npt.NDArray[np.bool], ix1: npt.NDArray[np.bool], ) -> None: selections = [ # index both axes with array (ix0, ix1), # mixed indexing with array / slice or int (ix0, slice(1, 5)), (slice(250, 350), ix1), (ix0, 4), (42, ix1), ] for selection in selections: _test_set_orthogonal_selection(v, a, z, selection) def test_set_orthogonal_selection_2d(store: StorePath) -> None: # setup v = np.arange(5400, dtype=int).reshape(600, 9) a = np.empty_like(v) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.01: # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) # sorted integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) ix0.sort() ix1.sort() _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1) for selection in basic_selections_2d: _test_set_orthogonal_selection(v, a, z, selection) def _test_set_orthogonal_selection_3d( v: npt.NDArray[np.int_], a: npt.NDArray[np.int_], z: Array, ix0: npt.NDArray[np.bool], ix1: npt.NDArray[np.bool], ix2: npt.NDArray[np.bool], ) -> None: selections = ( # single value (60, 15, 4), (-1, -1, -1), # index all axes with bool array (ix0, ix1, ix2), # mixed indexing with single bool array / slice or int (ix0, slice(10, 20), slice(1, 5)), (slice(30, 50), ix1, slice(1, 5)), (slice(30, 50), slice(10, 20), ix2), (ix0, 15, 4), (60, ix1, 4), (60, 15, ix2), (ix0, slice(10, 20), 4), (slice(30, 50), ix1, 4), (slice(30, 50), 15, ix2), # indexing with two arrays / slice (ix0, ix1, slice(1, 5)), # indexing with two arrays / integer (ix0, ix1, 4), ) for selection in selections: _test_set_orthogonal_selection(v, a, z, selection) def test_set_orthogonal_selection_3d(store: StorePath) -> None: # setup v = np.arange(32400, dtype=int).reshape(120, 30, 9) a = np.empty_like(v) z = zarr_array_from_numpy_array(store, a, chunk_shape=(60, 20, 3)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.01: # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) ix2 = np.random.binomial(1, 0.5, size=a.shape[2]).astype(bool) _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) # sorted integer arrays ix0 = np.random.choice(a.shape[0], size=int(a.shape[0] * p), replace=True) ix1 = np.random.choice(a.shape[1], size=int(a.shape[1] * 0.5), replace=True) ix2 = np.random.choice(a.shape[2], size=int(a.shape[2] * 0.5), replace=True) ix0.sort() ix1.sort() ix2.sort() _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2) def test_orthogonal_indexing_fallback_on_get_setitem(store: StorePath) -> None: z = zarr_array_from_numpy_array(store, np.zeros((20, 20))) z[[1, 2, 3], [1, 2, 3]] = 1 np.testing.assert_array_equal( z[:4, :4], [ [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], ], ) np.testing.assert_array_equal(z[[1, 2, 3], [1, 2, 3]], 1) # test broadcasting np.testing.assert_array_equal(z[1, [1, 2, 3]], [1, 0, 0]) # test 1D fancy indexing z2 = zarr_array_from_numpy_array(store, np.zeros(5)) z2[[1, 2, 3]] = 1 np.testing.assert_array_equal(z2[:], [0, 1, 1, 1, 0]) def _test_get_coordinate_selection( a: npt.NDArray, z: Array, selection: CoordinateSelection ) -> None: expect = a[selection] actual = z.get_coordinate_selection(selection) assert_array_equal(expect, actual) actual = z.vindex[selection] assert_array_equal(expect, actual) coordinate_selections_1d_bad = [ # slice not supported slice(5, 15), slice(None), Ellipsis, # bad stuff 2.3, "foo", b"xxx", None, (0, 0), (slice(None), slice(None)), ] # noinspection PyStatementEffect def test_get_coordinate_selection_1d(store: StorePath) -> None: # setup a = np.arange(1050, dtype=int) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) np.random.seed(42) # test with different degrees of sparseness for p in 2, 0.5, 0.1, 0.01: n = int(a.size * p) ix = np.random.choice(a.shape[0], size=n, replace=True) _test_get_coordinate_selection(a, z, ix) ix.sort() _test_get_coordinate_selection(a, z, ix) ix = ix[::-1] _test_get_coordinate_selection(a, z, ix) selections = [ # test single item 42, -1, # test wraparound [0, 3, 10, -23, -12, -1], # test out of order [3, 105, 23, 127], # not monotonically increasing # test multi-dimensional selection np.array([[2, 4], [6, 8]]), ] for selection in selections: _test_get_coordinate_selection(a, z, selection) # test errors bad_selections = coordinate_selections_1d_bad + [ [a.shape[0] + 1], # out of bounds [-(a.shape[0] + 1)], # out of bounds ] for selection in bad_selections: with pytest.raises(IndexError): z.get_coordinate_selection(selection) # type: ignore[arg-type] with pytest.raises(IndexError): z.vindex[selection] # type: ignore[index] def test_get_coordinate_selection_2d(store: StorePath) -> None: # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) np.random.seed(42) ix0: npt.ArrayLike ix1: npt.ArrayLike # test with different degrees of sparseness for p in 2, 0.5, 0.1, 0.01: n = int(a.size * p) ix0 = np.random.choice(a.shape[0], size=n, replace=True) ix1 = np.random.choice(a.shape[1], size=n, replace=True) selections = [ # single value (42, 4), (-1, -1), # index both axes with array (ix0, ix1), # mixed indexing with array / int (ix0, 4), (42, ix1), (42, 4), ] for selection in selections: _test_get_coordinate_selection(a, z, selection) # not monotonically increasing (first dim) ix0 = [3, 3, 4, 2, 5] ix1 = [1, 3, 5, 7, 9] _test_get_coordinate_selection(a, z, (ix0, ix1)) # not monotonically increasing (second dim) ix0 = [1, 1, 2, 2, 5] ix1 = [1, 3, 2, 1, 0] _test_get_coordinate_selection(a, z, (ix0, ix1)) # multi-dimensional selection ix0 = np.array([[1, 1, 2], [2, 2, 5]]) ix1 = np.array([[1, 3, 2], [1, 0, 0]]) _test_get_coordinate_selection(a, z, (ix0, ix1)) selection = slice(5, 15), [1, 2, 3] with pytest.raises(IndexError): z.get_coordinate_selection(selection) # type:ignore[arg-type] selection = [1, 2, 3], slice(5, 15) with pytest.raises(IndexError): z.get_coordinate_selection(selection) # type:ignore[arg-type] selection = Ellipsis, [1, 2, 3] with pytest.raises(IndexError): z.get_coordinate_selection(selection) # type:ignore[arg-type] selection = Ellipsis with pytest.raises(IndexError): z.get_coordinate_selection(selection) # type:ignore[arg-type] def _test_set_coordinate_selection( v: npt.NDArray, a: npt.NDArray, z: Array, selection: CoordinateSelection ) -> None: for value in 42, v[selection], v[selection].tolist(): # setup expectation a[:] = 0 a[selection] = value # test long-form API z[:] = 0 z.set_coordinate_selection(selection, value) assert_array_equal(a, z[:]) # test short-form API z[:] = 0 z.vindex[selection] = value assert_array_equal(a, z[:]) def test_set_coordinate_selection_1d(store: StorePath) -> None: # setup v = np.arange(550, dtype=int) a = np.empty(v.shape, dtype=v.dtype) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.01: n = int(a.size * p) ix = np.random.choice(a.shape[0], size=n, replace=True) _test_set_coordinate_selection(v, a, z, ix) # multi-dimensional selection ix = np.array([[2, 4], [6, 8]]) _test_set_coordinate_selection(v, a, z, ix) for selection in coordinate_selections_1d_bad: with pytest.raises(IndexError): z.set_coordinate_selection(selection, 42) # type:ignore[arg-type] with pytest.raises(IndexError): z.vindex[selection] = 42 # type:ignore[index] def test_set_coordinate_selection_2d(store: StorePath) -> None: # setup v = np.arange(5400, dtype=int).reshape(600, 9) a = np.empty_like(v) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.01: n = int(a.size * p) ix0 = np.random.choice(a.shape[0], size=n, replace=True) ix1 = np.random.choice(a.shape[1], size=n, replace=True) selections = ( (42, 4), (-1, -1), # index both axes with array (ix0, ix1), # mixed indexing with array / int (ix0, 4), (42, ix1), ) for selection in selections: _test_set_coordinate_selection(v, a, z, selection) # multi-dimensional selection ix0 = np.array([[1, 2, 3], [4, 5, 6]]) ix1 = np.array([[1, 3, 2], [2, 0, 5]]) _test_set_coordinate_selection(v, a, z, (ix0, ix1)) def _test_get_block_selection( a: npt.NDArray[Any], z: Array, selection: BasicSelection, expected_idx: slice | tuple[slice, ...], ) -> None: expect = a[expected_idx] actual = z.get_block_selection(selection) assert_array_equal(expect, actual) actual = z.blocks[selection] assert_array_equal(expect, actual) block_selections_1d: list[BasicSelection] = [ # test single item 0, 5, # test wraparound -1, -4, # test slice slice(5), slice(None, 3), slice(5, 6), slice(-3, -1), slice(None), # Full slice ] block_selections_1d_array_projection: list[slice] = [ # test single item slice(100), slice(500, 600), # test wraparound slice(1000, None), slice(700, 800), # test slice slice(500), slice(None, 300), slice(500, 600), slice(800, 1000), slice(None), ] block_selections_1d_bad = [ # slice not supported slice(3, 8, 2), # bad stuff 2.3, # "foo", # TODO b"xxx", None, (0, 0), (slice(None), slice(None)), [0, 5, 3], ] def test_get_block_selection_1d(store: StorePath) -> None: # setup a = np.arange(1050, dtype=int) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) for selection, expected_idx in zip( block_selections_1d, block_selections_1d_array_projection, strict=True ): _test_get_block_selection(a, z, selection, expected_idx) bad_selections = block_selections_1d_bad + [ z.metadata.chunk_grid.get_nchunks(z.shape) + 1, # out of bounds -(z.metadata.chunk_grid.get_nchunks(z.shape) + 1), # out of bounds ] for selection_bad in bad_selections: with pytest.raises(IndexError): z.get_block_selection(selection_bad) # type:ignore[arg-type] with pytest.raises(IndexError): z.blocks[selection_bad] # type:ignore[index] block_selections_2d: list[BasicSelection] = [ # test single item (0, 0), (1, 2), # test wraparound (-1, -1), (-3, -2), # test slice (slice(1), slice(2)), (slice(None, 2), slice(-2, -1)), (slice(2, 3), slice(-2, None)), (slice(-3, -1), slice(-3, -2)), (slice(None), slice(None)), # Full slice ] block_selections_2d_array_projection: list[tuple[slice, slice]] = [ # test single item (slice(300), slice(3)), (slice(300, 600), slice(6, 9)), # test wraparound (slice(900, None), slice(9, None)), (slice(300, 600), slice(6, 9)), # test slice (slice(300), slice(6)), (slice(None, 600), slice(6, 9)), (slice(600, 900), slice(6, None)), (slice(300, 900), slice(3, 6)), (slice(None), slice(None)), # Full slice ] def test_get_block_selection_2d(store: StorePath) -> None: # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) for selection, expected_idx in zip( block_selections_2d, block_selections_2d_array_projection, strict=True ): _test_get_block_selection(a, z, selection, expected_idx) selection = slice(5, 15), [1, 2, 3] with pytest.raises(IndexError): z.get_block_selection(selection) selection = Ellipsis, [1, 2, 3] with pytest.raises(IndexError): z.get_block_selection(selection) selection = slice(15, 20), slice(None) with pytest.raises(IndexError): # out of bounds z.get_block_selection(selection) def _test_set_block_selection( v: npt.NDArray[Any], a: npt.NDArray[Any], z: zarr.Array, selection: BasicSelection, expected_idx: slice, ) -> None: for value in 42, v[expected_idx], v[expected_idx].tolist(): # setup expectation a[:] = 0 a[expected_idx] = value # test long-form API z[:] = 0 z.set_block_selection(selection, value) assert_array_equal(a, z[:]) # test short-form API z[:] = 0 z.blocks[selection] = value assert_array_equal(a, z[:]) def test_set_block_selection_1d(store: StorePath) -> None: # setup v = np.arange(1050, dtype=int) a = np.empty(v.shape, dtype=v.dtype) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) for selection, expected_idx in zip( block_selections_1d, block_selections_1d_array_projection, strict=True ): _test_set_block_selection(v, a, z, selection, expected_idx) for selection_bad in block_selections_1d_bad: with pytest.raises(IndexError): z.set_block_selection(selection_bad, 42) # type:ignore[arg-type] with pytest.raises(IndexError): z.blocks[selection_bad] = 42 # type:ignore[index] def test_set_block_selection_2d(store: StorePath) -> None: # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty(v.shape, dtype=v.dtype) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) for selection, expected_idx in zip( block_selections_2d, block_selections_2d_array_projection, strict=True ): _test_set_block_selection(v, a, z, selection, expected_idx) selection = slice(5, 15), [1, 2, 3] with pytest.raises(IndexError): z.set_block_selection(selection, 42) selection = Ellipsis, [1, 2, 3] with pytest.raises(IndexError): z.set_block_selection(selection, 42) selection = slice(15, 20), slice(None) with pytest.raises(IndexError): # out of bounds z.set_block_selection(selection, 42) def _test_get_mask_selection(a: npt.NDArray[Any], z: Array, selection: npt.NDArray) -> None: expect = a[selection] actual = z.get_mask_selection(selection) assert_array_equal(expect, actual) actual = z.vindex[selection] assert_array_equal(expect, actual) actual = z[selection] assert_array_equal(expect, actual) mask_selections_1d_bad = [ # slice not supported slice(5, 15), slice(None), Ellipsis, # bad stuff 2.3, "foo", b"xxx", None, (0, 0), (slice(None), slice(None)), ] # noinspection PyStatementEffect def test_get_mask_selection_1d(store: StorePath) -> None: # setup a = np.arange(1050, dtype=int) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_get_mask_selection(a, z, ix) # test errors bad_selections = mask_selections_1d_bad + [ np.zeros(50, dtype=bool), # too short np.zeros(2000, dtype=bool), # too long [[True, False], [False, True]], # too many dimensions ] for selection in bad_selections: with pytest.raises(IndexError): z.get_mask_selection(selection) # type: ignore[arg-type] with pytest.raises(IndexError): z.vindex[selection] # type:ignore[index] # noinspection PyStatementEffect def test_get_mask_selection_2d(store: StorePath) -> None: # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape) _test_get_mask_selection(a, z, ix) # test errors with pytest.raises(IndexError): z.vindex[np.zeros((1000, 5), dtype=bool)] # too short with pytest.raises(IndexError): z.vindex[np.zeros((2000, 10), dtype=bool)] # too long with pytest.raises(IndexError): z.vindex[[True, False]] # wrong no. dimensions def _test_set_mask_selection( v: npt.NDArray, a: npt.NDArray, z: Array, selection: npt.NDArray ) -> None: a[:] = 0 z[:] = 0 a[selection] = v[selection] z.set_mask_selection(selection, v[selection]) assert_array_equal(a, z[:]) z[:] = 0 z.vindex[selection] = v[selection] assert_array_equal(a, z[:]) z[:] = 0 z[selection] = v[selection] assert_array_equal(a, z[:]) def test_set_mask_selection_1d(store: StorePath) -> None: # setup v = np.arange(1050, dtype=int) a = np.empty_like(v) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_set_mask_selection(v, a, z, ix) for selection in mask_selections_1d_bad: with pytest.raises(IndexError): z.set_mask_selection(selection, 42) # type: ignore[arg-type] with pytest.raises(IndexError): z.vindex[selection] = 42 # type: ignore[index] def test_set_mask_selection_2d(store: StorePath) -> None: # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix = np.random.binomial(1, p, size=a.size).astype(bool).reshape(a.shape) _test_set_mask_selection(v, a, z, ix) def test_get_selection_out(store: StorePath) -> None: # basic selections a = np.arange(1050) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) selections = [ slice(50, 150), slice(0, 1050), slice(1, 2), ] for selection in selections: expect = a[selection] out = get_ndbuffer_class().from_numpy_array(np.empty(expect.shape)) z.get_basic_selection(selection, out=out) assert_array_equal(expect, out.as_numpy_array()[:]) with pytest.raises(TypeError): z.get_basic_selection(Ellipsis, out=[]) # type: ignore[arg-type] # orthogonal selections a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) selections = [ # index both axes with array (ix0, ix1), # mixed indexing with array / slice (ix0, slice(1, 5)), (slice(250, 350), ix1), # mixed indexing with array / int (ix0, 4), (42, ix1), # mixed int array / bool array (ix0, np.nonzero(ix1)[0]), (np.nonzero(ix0)[0], ix1), ] for selection in selections: expect = oindex(a, selection) out = get_ndbuffer_class().from_numpy_array(np.zeros(expect.shape, dtype=expect.dtype)) z.get_orthogonal_selection(selection, out=out) assert_array_equal(expect, out.as_numpy_array()[:]) # coordinate selections a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: n = int(a.size * p) ix0 = np.random.choice(a.shape[0], size=n, replace=True) ix1 = np.random.choice(a.shape[1], size=n, replace=True) selections = [ # index both axes with array (ix0, ix1), # mixed indexing with array / int (ix0, 4), (42, ix1), ] for selection in selections: expect = a[selection] out = get_ndbuffer_class().from_numpy_array(np.zeros(expect.shape, dtype=expect.dtype)) z.get_coordinate_selection(selection, out=out) assert_array_equal(expect, out.as_numpy_array()[:]) @pytest.mark.xfail(reason="fields are not supported in v3") def test_get_selections_with_fields(store: StorePath) -> None: a = np.array( [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)], dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")], ) z = zarr_array_from_numpy_array(store, a, chunk_shape=(2,)) fields_fixture: list[str | list[str]] = [ "foo", ["foo"], ["foo", "bar"], ["foo", "baz"], ["bar", "baz"], ["foo", "bar", "baz"], ["bar", "foo"], ["baz", "bar", "foo"], ] for fields in fields_fixture: # total selection expect = a[fields] actual = z.get_basic_selection(Ellipsis, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z[fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z[fields[0], fields[1]] assert_array_equal(expect, actual) if isinstance(fields, str): actual = z[..., fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z[..., fields[0], fields[1]] assert_array_equal(expect, actual) # basic selection with slice expect = a[fields][0:2] actual = z.get_basic_selection(slice(0, 2), fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z[0:2, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z[0:2, fields[0], fields[1]] assert_array_equal(expect, actual) # basic selection with single item expect = a[fields][1] actual = z.get_basic_selection(1, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z[1, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z[1, fields[0], fields[1]] assert_array_equal(expect, actual) # orthogonal selection ix = [0, 2] expect = a[fields][ix] actual = z.get_orthogonal_selection(ix, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z.oindex[ix, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z.oindex[ix, fields[0], fields[1]] assert_array_equal(expect, actual) # coordinate selection ix = [0, 2] expect = a[fields][ix] actual = z.get_coordinate_selection(ix, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z.vindex[ix, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z.vindex[ix, fields[0], fields[1]] assert_array_equal(expect, actual) # mask selection ix = [True, False, True] expect = a[fields][ix] actual = z.get_mask_selection(ix, fields=fields) assert_array_equal(expect, actual) # alternative API if isinstance(fields, str): actual = z.vindex[ix, fields] assert_array_equal(expect, actual) elif len(fields) == 2: actual = z.vindex[ix, fields[0], fields[1]] assert_array_equal(expect, actual) # missing/bad fields with pytest.raises(IndexError): z.get_basic_selection(Ellipsis, fields=["notafield"]) with pytest.raises(IndexError): z.get_basic_selection(Ellipsis, fields=slice(None)) # type: ignore[arg-type] @pytest.mark.xfail(reason="fields are not supported in v3") def test_set_selections_with_fields(store: StorePath) -> None: v = np.array( [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)], dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")], ) a = np.empty_like(v) z = zarr_array_from_numpy_array(store, v, chunk_shape=(2,)) fields_fixture: list[str | list[str]] = [ "foo", [], ["foo"], ["foo", "bar"], ["foo", "baz"], ["bar", "baz"], ["foo", "bar", "baz"], ["bar", "foo"], ["baz", "bar", "foo"], ] for fields in fields_fixture: # currently multi-field assignment is not supported in numpy, so we won't support # it either if isinstance(fields, list) and len(fields) > 1: with pytest.raises(IndexError): z.set_basic_selection(Ellipsis, v, fields=fields) with pytest.raises(IndexError): z.set_orthogonal_selection([0, 2], v, fields=fields) # type: ignore[arg-type] with pytest.raises(IndexError): z.set_coordinate_selection([0, 2], v, fields=fields) with pytest.raises(IndexError): z.set_mask_selection([True, False, True], v, fields=fields) # type: ignore[arg-type] else: if isinstance(fields, list) and len(fields) == 1: # work around numpy does not support multi-field assignment even if there # is only one field key = fields[0] elif isinstance(fields, list) and len(fields) == 0: # work around numpy ambiguity about what is a field selection key = Ellipsis else: key = fields # setup expectation a[:] = ("", 0, 0) z[:] = ("", 0, 0) assert_array_equal(a, z[:]) a[key] = v[key] # total selection z.set_basic_selection(Ellipsis, v[key], fields=fields) assert_array_equal(a, z[:]) # basic selection with slice a[:] = ("", 0, 0) z[:] = ("", 0, 0) a[key][0:2] = v[key][0:2] z.set_basic_selection(slice(0, 2), v[key][0:2], fields=fields) assert_array_equal(a, z[:]) # orthogonal selection a[:] = ("", 0, 0) z[:] = ("", 0, 0) ix = [0, 2] a[key][ix] = v[key][ix] z.set_orthogonal_selection(ix, v[key][ix], fields=fields) assert_array_equal(a, z[:]) # coordinate selection a[:] = ("", 0, 0) z[:] = ("", 0, 0) ix = [0, 2] a[key][ix] = v[key][ix] z.set_coordinate_selection(ix, v[key][ix], fields=fields) assert_array_equal(a, z[:]) # mask selection a[:] = ("", 0, 0) z[:] = ("", 0, 0) ix = [True, False, True] a[key][ix] = v[key][ix] z.set_mask_selection(ix, v[key][ix], fields=fields) assert_array_equal(a, z[:]) def test_slice_selection_uints() -> None: arr = np.arange(24).reshape((4, 6)) idx = np.uint64(3) slice_sel = make_slice_selection((idx,)) assert arr[tuple(slice_sel)].shape == (1, 6) def test_numpy_int_indexing(store: StorePath) -> None: a = np.arange(1050) z = zarr_array_from_numpy_array(store, a, chunk_shape=(100,)) assert a[42] == z[42] assert a[np.int64(42)] == z[np.int64(42)] @pytest.mark.parametrize( ("shape", "chunks", "ops"), [ # 1D test cases ((1070,), (50,), [("__getitem__", (slice(200, 400),))]), ((1070,), (50,), [("__getitem__", (slice(200, 400, 100),))]), ( (1070,), (50,), [ ("__getitem__", (slice(200, 400),)), ("__setitem__", (slice(200, 400, 100),)), ], ), # 2D test cases ( (40, 50), (5, 8), [ ("__getitem__", (slice(6, 37, 13), (slice(4, 10)))), ("__setitem__", (slice(None), (slice(None)))), ], ), ], ) async def test_accessed_chunks( shape: tuple[int, ...], chunks: tuple[int, ...], ops: list[tuple[str, tuple[slice, ...]]] ) -> None: # Test that only the required chunks are accessed during basic selection operations # shape: array shape # chunks: chunk size # ops: list of tuples with (optype, tuple of slices) # optype = "__getitem__" or "__setitem__", tuple length must match number of dims # Use a counting dict as the backing store so we can track the items access store = await CountingDict.open() z = zarr_array_from_numpy_array(StorePath(store), np.zeros(shape), chunk_shape=chunks) for ii, (optype, slices) in enumerate(ops): # Resolve the slices into the accessed chunks for each dimension chunks_per_dim = [] for N, C, sl in zip(shape, chunks, slices, strict=True): chunk_ind = np.arange(N, dtype=int)[sl] // C chunks_per_dim.append(np.unique(chunk_ind)) # Combine and generate the cartesian product to determine the chunks keys that # will be accessed chunks_accessed = [".".join(map(str, comb)) for comb in itertools.product(*chunks_per_dim)] counts_before = store.counter.copy() # Perform the operation if optype == "__getitem__": z[slices] else: z[slices] = ii # Get the change in counts delta_counts = store.counter - counts_before # Check that the access counts for the operation have increased by one for all # the chunks we expect to be included for ci in chunks_accessed: assert delta_counts.pop((optype, ci)) == 1 # If the chunk was partially written to it will also have been read once. We # don't determine if the chunk was actually partial here, just that the # counts are consistent that this might have happened if optype == "__setitem__": assert ("__getitem__", ci) not in delta_counts or delta_counts.pop( ("__getitem__", ci) ) == 1 # Check that no other chunks were accessed assert len(delta_counts) == 0 @pytest.mark.parametrize( "selection", [ # basic selection [...], [1, ...], [slice(None)], [1, 3], [[1, 2, 3], 9], [np.arange(1000)], [slice(5, 15)], [slice(2, 4), 4], [[1, 3]], # mask selection [np.tile([True, False], (1000, 5))], [np.full((1000, 10), False)], # coordinate selection [[1, 2, 3, 4], [5, 6, 7, 8]], [[100, 200, 300], [4, 5, 6]], ], ) def test_indexing_equals_numpy(store: StorePath, selection: Selection) -> None: a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) # note: in python 3.10 a[*selection] is not valid unpacking syntax expected = a[*selection,] actual = z[*selection,] assert_array_equal(expected, actual, err_msg=f"selection: {selection}") @pytest.mark.parametrize( "selection", [ [np.tile([True, False], 500), np.tile([True, False], 5)], [np.full(1000, False), np.tile([True, False], 5)], [np.full(1000, True), np.full(10, True)], [np.full(1000, True), [True, False] * 5], ], ) def test_orthogonal_bool_indexing_like_numpy_ix( store: StorePath, selection: list[npt.ArrayLike] ) -> None: a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr_array_from_numpy_array(store, a, chunk_shape=(300, 3)) expected = a[np.ix_(*selection)] # note: in python 3.10 z[*selection] is not valid unpacking syntax actual = z[*selection,] assert_array_equal(expected, actual, err_msg=f"{selection=}") @pytest.mark.parametrize("ndim", [1, 2, 3]) @pytest.mark.parametrize("origin_0d", [None, (0,), (1,)]) @pytest.mark.parametrize("selection_shape_0d", [None, (2,), (3,)]) def test_iter_grid( ndim: int, origin_0d: tuple[int] | None, selection_shape_0d: tuple[int] | None ) -> None: """ Test that iter_grid works as expected for 1, 2, and 3 dimensions. """ grid_shape = (10, 5, 7)[:ndim] if origin_0d is not None: origin_kwarg = origin_0d * ndim origin = origin_kwarg else: origin_kwarg = None origin = (0,) * ndim if selection_shape_0d is not None: selection_shape_kwarg = selection_shape_0d * ndim selection_shape = selection_shape_kwarg else: selection_shape_kwarg = None selection_shape = tuple(gs - o for gs, o in zip(grid_shape, origin, strict=False)) observed = tuple( _iter_grid(grid_shape, origin=origin_kwarg, selection_shape=selection_shape_kwarg) ) # generate a numpy array of indices, and index it coord_array = np.array(list(itertools.product(*[range(s) for s in grid_shape]))).reshape( (*grid_shape, ndim) ) coord_array_indexed = coord_array[ tuple(slice(o, o + s, 1) for o, s in zip(origin, selection_shape, strict=False)) + (range(ndim),) ] expected = tuple(map(tuple, coord_array_indexed.reshape(-1, ndim).tolist())) assert observed == expected def test_iter_grid_invalid() -> None: """ Ensure that a selection_shape that exceeds the grid_shape + origin produces an indexing error. """ with pytest.raises(IndexError): list(_iter_grid((5,), origin=(0,), selection_shape=(10,))) def test_indexing_with_zarr_array(store: StorePath) -> None: # regression test for https://github.com/zarr-developers/zarr-python/issues/2133 a = np.arange(10) za = zarr.array(a, chunks=2, store=store, path="a") ix = [False, True, False, True, False, True, False, True, False, True] ii = [0, 2, 4, 5] zix = zarr.array(ix, chunks=2, store=store, dtype="bool", path="ix") zii = zarr.array(ii, chunks=2, store=store, dtype="i4", path="ii") assert_array_equal(a[ix], za[zix]) assert_array_equal(a[ix], za.oindex[zix]) assert_array_equal(a[ix], za.vindex[zix]) assert_array_equal(a[ii], za[zii]) assert_array_equal(a[ii], za.oindex[zii]) @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) @pytest.mark.parametrize("shape", [(0, 2, 3), (0), (3, 0)]) def test_zero_sized_chunks(store: StorePath, shape: list[int]) -> None: z = zarr.create_array(store=store, shape=shape, chunks=shape, zarr_format=3, dtype="f8") z[...] = 42 assert_array_equal(z[...], np.zeros(shape, dtype="f8")) @pytest.mark.parametrize("store", ["memory"], indirect=["store"]) def test_vectorized_indexing_incompatible_shape(store) -> None: # GH2469 shape = (4, 4) chunks = (2, 2) fill_value = 32767 arr = zarr.create( shape, store=store, chunks=chunks, dtype=np.int16, fill_value=fill_value, codecs=[zarr.codecs.BytesCodec(), zarr.codecs.BloscCodec()], ) with pytest.raises(ValueError, match="Attempting to set"): arr[np.array([1, 2]), np.array([1, 2])] = np.array([[-1, -2], [-3, -4]]) def test_iter_chunk_regions(): chunks = (2, 3) a = zarr.create((10, 10), chunks=chunks) a[:] = 1 for region in a._iter_chunk_regions(): assert_array_equal(a[region], np.ones_like(a[region])) a[region] = 0 assert_array_equal(a[region], np.zeros_like(a[region])) @pytest.mark.parametrize( ("domain_shape", "region_shape", "origin", "selection_shape"), [ ((9,), (1,), None, (9,)), ((9,), (1,), (0,), (9,)), ((3,), (2,), (0,), (1,)), ((9,), (2,), (2,), (2,)), ((9, 9), (2, 1), None, None), ((9, 9), (4, 1), None, None), ], ) @pytest.mark.parametrize("order", ["lexicographic"]) @pytest.mark.parametrize("trim_excess", [True, False]) def test_iter_regions( domain_shape: tuple[int, ...], region_shape: tuple[int, ...], origin: tuple[int, ...] | None, selection_shape: tuple[int, ...] | None, order: _ArrayIndexingOrder, trim_excess: bool, ) -> None: """ Test that iter_regions properly iterates over contiguous regions of a gridded domain. """ expected_slices_by_dim: list[list[slice]] = [] origin_parsed: tuple[int, ...] selection_shape_parsed: tuple[int, ...] if origin is None: origin_parsed = (0,) * len(domain_shape) else: origin_parsed = origin if selection_shape is None: selection_shape_parsed = tuple( ceildiv(ds, rs) - o for ds, o, rs in zip(domain_shape, origin_parsed, region_shape, strict=True) ) else: selection_shape_parsed = selection_shape for d_s, r_s, o, ss in zip( domain_shape, region_shape, origin_parsed, selection_shape_parsed, strict=True ): _expected_slices: list[slice] = [] start = o * r_s for incr in range(start, start + ss * r_s, r_s): if trim_excess: term = min(incr + r_s, d_s) else: term = incr + r_s _expected_slices.append(slice(incr, term, 1)) expected_slices_by_dim.append(_expected_slices) expected = tuple(itertools.product(*expected_slices_by_dim)) observed = tuple( _iter_regions( domain_shape, region_shape, origin=origin, selection_shape=selection_shape, order=order, trim_excess=trim_excess, ) ) assert observed == expected class TestAsync: @pytest.mark.parametrize( ("indexer", "expected"), [ # int ((0,), np.array([1, 2])), ((1,), np.array([3, 4])), ((0, 1), np.array(2)), # slice ((slice(None),), np.array([[1, 2], [3, 4]])), ((slice(0, 1),), np.array([[1, 2]])), ((slice(1, 2),), np.array([[3, 4]])), ((slice(0, 2),), np.array([[1, 2], [3, 4]])), ((slice(0, 0),), np.empty(shape=(0, 2), dtype="i8")), # ellipsis ((...,), np.array([[1, 2], [3, 4]])), ((0, ...), np.array([1, 2])), ((..., 0), np.array([1, 3])), ((0, 1, ...), np.array(2)), # combined ((0, slice(None)), np.array([1, 2])), ((slice(None), 0), np.array([1, 3])), ((slice(None), slice(None)), np.array([[1, 2], [3, 4]])), # array of ints (([0]), np.array([[1, 2]])), (([1]), np.array([[3, 4]])), (([0], [1]), np.array(2)), (([0, 1], [0]), np.array([[1], [3]])), (([0, 1], [0, 1]), np.array([[1, 2], [3, 4]])), # boolean array (np.array([True, True]), np.array([[1, 2], [3, 4]])), (np.array([True, False]), np.array([[1, 2]])), (np.array([False, True]), np.array([[3, 4]])), (np.array([False, False]), np.empty(shape=(0, 2), dtype="i8")), ], ) @pytest.mark.asyncio async def test_async_oindex(self, store, indexer, expected): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") z[...] = np.array([[1, 2], [3, 4]]) async_zarr = z._async_array result = await async_zarr.oindex.getitem(indexer) assert_array_equal(result, expected) @pytest.mark.asyncio async def test_async_oindex_with_zarr_array(self, store): group = zarr.create_group(store=store, zarr_format=3) z1 = group.create_array(name="z1", shape=(2, 2), chunks=(1, 1), dtype="i8") z1[...] = np.array([[1, 2], [3, 4]]) async_zarr = z1._async_array # create boolean zarr array to index with z2 = group.create_array(name="z2", shape=(2,), chunks=(1,), dtype="?") z2[...] = np.array([True, False]) result = await async_zarr.oindex.getitem(z2) expected = np.array([[1, 2]]) assert_array_equal(result, expected) @pytest.mark.parametrize( ("indexer", "expected"), [ (([0], [0]), np.array(1)), (([0, 1], [0, 1]), np.array([1, 4])), (np.array([[False, True], [False, True]]), np.array([2, 4])), ], ) @pytest.mark.asyncio async def test_async_vindex(self, store, indexer, expected): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") z[...] = np.array([[1, 2], [3, 4]]) async_zarr = z._async_array result = await async_zarr.vindex.getitem(indexer) assert_array_equal(result, expected) @pytest.mark.asyncio async def test_async_vindex_with_zarr_array(self, store): group = zarr.create_group(store=store, zarr_format=3) z1 = group.create_array(name="z1", shape=(2, 2), chunks=(1, 1), dtype="i8") z1[...] = np.array([[1, 2], [3, 4]]) async_zarr = z1._async_array # create boolean zarr array to index with z2 = group.create_array(name="z2", shape=(2, 2), chunks=(1, 1), dtype="?") z2[...] = np.array([[False, True], [False, True]]) result = await async_zarr.vindex.getitem(z2) expected = np.array([2, 4]) assert_array_equal(result, expected) @pytest.mark.asyncio async def test_async_invalid_indexer(self, store): z = zarr.create_array(store=store, shape=(2, 2), chunks=(1, 1), zarr_format=3, dtype="i8") z[...] = np.array([[1, 2], [3, 4]]) async_zarr = z._async_array with pytest.raises(IndexError): await async_zarr.vindex.getitem("invalid_indexer") with pytest.raises(IndexError): await async_zarr.oindex.getitem("invalid_indexer") zarr-python-3.1.5/tests/test_info.py000066400000000000000000000104471511007055700175320ustar00rootroot00000000000000import textwrap import pytest from zarr.codecs.bytes import BytesCodec from zarr.core._info import ArrayInfo, GroupInfo, human_readable_size from zarr.core.common import ZarrFormat from zarr.core.dtype.npy.int import Int32 ZARR_FORMATS = [2, 3] @pytest.mark.parametrize("zarr_format", ZARR_FORMATS) def test_group_info_repr(zarr_format: ZarrFormat) -> None: info = GroupInfo( _name="a", _store_type="MemoryStore", _read_only=False, _zarr_format=zarr_format ) result = repr(info) expected = textwrap.dedent(f"""\ Name : a Type : Group Zarr format : {zarr_format} Read-only : False Store type : MemoryStore""") assert result == expected @pytest.mark.parametrize("zarr_format", ZARR_FORMATS) def test_group_info_complete(zarr_format: ZarrFormat) -> None: info = GroupInfo( _name="a", _store_type="MemoryStore", _zarr_format=zarr_format, _read_only=False, _count_arrays=10, _count_groups=4, _count_members=14, ) result = repr(info) expected = textwrap.dedent(f"""\ Name : a Type : Group Zarr format : {zarr_format} Read-only : False Store type : MemoryStore No. members : 14 No. arrays : 10 No. groups : 4""") assert result == expected @pytest.mark.parametrize("zarr_format", ZARR_FORMATS) def test_array_info(zarr_format: ZarrFormat) -> None: info = ArrayInfo( _zarr_format=zarr_format, _data_type=Int32(), _fill_value=0, _shape=(100, 100), _chunk_shape=(10, 100), _order="C", _read_only=True, _store_type="MemoryStore", _serializer=BytesCodec(), ) result = repr(info) assert result == textwrap.dedent(f"""\ Type : Array Zarr format : {zarr_format} Data type : Int32(endianness='little') Fill value : 0 Shape : (100, 100) Chunk shape : (10, 100) Order : C Read-only : True Store type : MemoryStore Filters : () Serializer : BytesCodec(endian=) Compressors : ()""") @pytest.mark.parametrize("zarr_format", ZARR_FORMATS) @pytest.mark.parametrize("bytes_things", [(1_000_000, "976.6K", 500_000, "488.3K", "2.0", 5)]) def test_array_info_complete( zarr_format: ZarrFormat, bytes_things: tuple[int, str, int, str, str, int] ) -> None: ( count_bytes, count_bytes_formatted, count_bytes_stored, count_bytes_stored_formatted, storage_ratio_formatted, count_chunks_initialized, ) = bytes_things info = ArrayInfo( _zarr_format=zarr_format, _data_type=Int32(), _fill_value=0, _shape=(100, 100), _chunk_shape=(10, 100), _order="C", _read_only=True, _store_type="MemoryStore", _serializer=BytesCodec(), _count_bytes=count_bytes, _count_bytes_stored=count_bytes_stored, _count_chunks_initialized=count_chunks_initialized, ) result = repr(info) assert result == textwrap.dedent(f"""\ Type : Array Zarr format : {zarr_format} Data type : Int32(endianness='little') Fill value : 0 Shape : (100, 100) Chunk shape : (10, 100) Order : C Read-only : True Store type : MemoryStore Filters : () Serializer : BytesCodec(endian=) Compressors : () No. bytes : {count_bytes} ({count_bytes_formatted}) No. bytes stored : {count_bytes_stored} ({count_bytes_stored_formatted}) Storage ratio : {storage_ratio_formatted} Chunks Initialized : 5""") @pytest.mark.parametrize( ("size", "expected"), [ (1, "1"), (2**10, "1.0K"), (2**20, "1.0M"), (2**30, "1.0G"), (2**40, "1.0T"), (2**50, "1.0P"), ], ) def test_human_readable_size(size: int, expected: str) -> None: result = human_readable_size(size) assert result == expected zarr-python-3.1.5/tests/test_metadata/000077500000000000000000000000001511007055700177775ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_metadata/__init__.py000066400000000000000000000000001511007055700220760ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_metadata/test_consolidated.py000066400000000000000000001002451511007055700240620ustar00rootroot00000000000000from __future__ import annotations import json from typing import TYPE_CHECKING, Any import numpy as np import pytest from numcodecs import Blosc import zarr.api.asynchronous import zarr.api.synchronous import zarr.storage from zarr import AsyncGroup from zarr.api.asynchronous import ( consolidate_metadata, group, open, open_consolidated, ) from zarr.core.buffer import cpu, default_buffer_prototype from zarr.core.dtype import parse_dtype from zarr.core.group import ConsolidatedMetadata, GroupMetadata from zarr.core.metadata import ArrayV3Metadata from zarr.core.metadata.v2 import ArrayV2Metadata from zarr.errors import ZarrUserWarning from zarr.storage import StorePath if TYPE_CHECKING: from zarr.abc.store import Store from zarr.core.common import JSON, ZarrFormat @pytest.fixture async def memory_store_with_hierarchy(memory_store: Store) -> Store: g = await group(store=memory_store, attributes={"foo": "bar"}) dtype = "uint8" await g.create_array(name="air", shape=(1, 2, 3), dtype=dtype) await g.create_array(name="lat", shape=(1,), dtype=dtype) await g.create_array(name="lon", shape=(2,), dtype=dtype) await g.create_array(name="time", shape=(3,), dtype=dtype) child = await g.create_group("child", attributes={"key": "child"}) await child.create_array("array", shape=(4, 4), attributes={"key": "child"}, dtype=dtype) grandchild = await child.create_group("grandchild", attributes={"key": "grandchild"}) await grandchild.create_array( "array", shape=(4, 4), attributes={"key": "grandchild"}, dtype=dtype ) await grandchild.create_group("empty_group", attributes={"key": "empty"}) return memory_store class TestConsolidated: async def test_open_consolidated_false_raises(self) -> None: store = zarr.storage.MemoryStore() with pytest.raises(TypeError, match="use_consolidated"): await zarr.api.asynchronous.open_consolidated(store, use_consolidated=False) # type: ignore[arg-type] def test_open_consolidated_false_raises_sync(self) -> None: store = zarr.storage.MemoryStore() with pytest.raises(TypeError, match="use_consolidated"): zarr.open_consolidated(store, use_consolidated=False) # type: ignore[arg-type] async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None: # TODO: Figure out desired keys in # TODO: variety in the hierarchies # More nesting # arrays under arrays # single array # etc. with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): await consolidate_metadata(memory_store_with_hierarchy) group2 = await AsyncGroup.open(memory_store_with_hierarchy) array_metadata: dict[str, JSON] = { "attributes": {}, "chunk_key_encoding": { "configuration": {"separator": "/"}, "name": "default", }, "codecs": ( {"configuration": {"endian": "little"}, "name": "bytes"}, {"configuration": {"level": 0, "checksum": False}, "name": "zstd"}, ), "data_type": "uint8", "fill_value": 0, "node_type": "array", # "shape": (1, 2, 3), "zarr_format": 3, } expected = GroupMetadata( attributes={"foo": "bar"}, consolidated_metadata=ConsolidatedMetadata( kind="inline", must_understand=False, metadata={ "air": ArrayV3Metadata.from_dict( { "shape": (1, 2, 3), "chunk_grid": { "configuration": {"chunk_shape": (1, 2, 3)}, "name": "regular", }, **array_metadata, } ), "lat": ArrayV3Metadata.from_dict( { "shape": (1,), "chunk_grid": { "configuration": {"chunk_shape": (1,)}, "name": "regular", }, **array_metadata, } ), "lon": ArrayV3Metadata.from_dict( { "shape": (2,), "chunk_grid": { "configuration": {"chunk_shape": (2,)}, "name": "regular", }, **array_metadata, } ), "time": ArrayV3Metadata.from_dict( { "shape": (3,), "chunk_grid": { "configuration": {"chunk_shape": (3,)}, "name": "regular", }, **array_metadata, } ), "child": GroupMetadata( attributes={"key": "child"}, consolidated_metadata=ConsolidatedMetadata( metadata={ "array": ArrayV3Metadata.from_dict( { **array_metadata, "attributes": {"key": "child"}, "shape": (4, 4), "chunk_grid": { "configuration": {"chunk_shape": (4, 4)}, "name": "regular", }, } ), "grandchild": GroupMetadata( attributes={"key": "grandchild"}, consolidated_metadata=ConsolidatedMetadata( metadata={ # known to be empty child group "empty_group": GroupMetadata( consolidated_metadata=ConsolidatedMetadata( metadata={} ), attributes={"key": "empty"}, ), "array": ArrayV3Metadata.from_dict( { **array_metadata, "attributes": {"key": "grandchild"}, "shape": (4, 4), "chunk_grid": { "configuration": {"chunk_shape": (4, 4)}, "name": "regular", }, } ), } ), ), }, ), ), }, ), ) assert group2.metadata == expected group3 = await open(store=memory_store_with_hierarchy) assert group3.metadata == expected group4 = await open_consolidated(store=memory_store_with_hierarchy) assert group4.metadata == expected buf = await memory_store_with_hierarchy.get( "zarr.json", prototype=default_buffer_prototype() ) assert buf is not None result_raw = json.loads(buf.to_bytes())["consolidated_metadata"] assert result_raw["kind"] == "inline" assert sorted(result_raw["metadata"]) == [ "air", "child", "child/array", "child/grandchild", "child/grandchild/array", "child/grandchild/empty_group", "lat", "lon", "time", ] def test_consolidated_sync(self, memory_store: Store) -> None: g = zarr.api.synchronous.group(store=memory_store, attributes={"foo": "bar"}) dtype = "uint8" g.create_array(name="air", shape=(1, 2, 3), dtype=dtype) g.create_array(name="lat", shape=(1,), dtype=dtype) g.create_array(name="lon", shape=(2,), dtype=dtype) g.create_array(name="time", shape=(3,), dtype=dtype) with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): zarr.api.synchronous.consolidate_metadata(memory_store) group2 = zarr.Group.open(memory_store) array_metadata: dict[str, JSON] = { "attributes": {}, "chunk_key_encoding": { "configuration": {"separator": "/"}, "name": "default", }, "codecs": ( {"configuration": {"endian": "little"}, "name": "bytes"}, {"configuration": {"level": 0, "checksum": False}, "name": "zstd"}, ), "data_type": dtype, "fill_value": 0, "node_type": "array", # "shape": (1, 2, 3), "zarr_format": 3, } expected = GroupMetadata( attributes={"foo": "bar"}, consolidated_metadata=ConsolidatedMetadata( kind="inline", must_understand=False, metadata={ "air": ArrayV3Metadata.from_dict( { "shape": (1, 2, 3), "chunk_grid": { "configuration": {"chunk_shape": (1, 2, 3)}, "name": "regular", }, **array_metadata, } ), "lat": ArrayV3Metadata.from_dict( { "shape": (1,), "chunk_grid": { "configuration": {"chunk_shape": (1,)}, "name": "regular", }, **array_metadata, } ), "lon": ArrayV3Metadata.from_dict( { "shape": (2,), "chunk_grid": { "configuration": {"chunk_shape": (2,)}, "name": "regular", }, **array_metadata, } ), "time": ArrayV3Metadata.from_dict( { "shape": (3,), "chunk_grid": { "configuration": {"chunk_shape": (3,)}, "name": "regular", }, **array_metadata, } ), }, ), ) assert group2.metadata == expected group3 = zarr.api.synchronous.open(store=memory_store) assert group3.metadata == expected group4 = zarr.api.synchronous.open_consolidated(store=memory_store) assert group4.metadata == expected async def test_not_writable_raises(self, memory_store: zarr.storage.MemoryStore) -> None: await group(store=memory_store, attributes={"foo": "bar"}) read_store = zarr.storage.MemoryStore(store_dict=memory_store._store_dict, read_only=True) with pytest.raises(ValueError, match="does not support writing"): await consolidate_metadata(read_store) async def test_non_root_node(self, memory_store_with_hierarchy: Store) -> None: with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): await consolidate_metadata(memory_store_with_hierarchy, path="child") root = await AsyncGroup.open(memory_store_with_hierarchy) child = await AsyncGroup.open(StorePath(memory_store_with_hierarchy) / "child") assert root.metadata.consolidated_metadata is None assert child.metadata.consolidated_metadata is not None assert "air" not in child.metadata.consolidated_metadata.metadata assert "grandchild" in child.metadata.consolidated_metadata.metadata def test_consolidated_metadata_from_dict(self) -> None: data: dict[str, JSON] = {"must_understand": False} # missing kind with pytest.raises(ValueError, match="kind='None'"): ConsolidatedMetadata.from_dict(data) # invalid kind data["kind"] = "invalid" with pytest.raises(ValueError, match="kind='invalid'"): ConsolidatedMetadata.from_dict(data) # missing metadata data["kind"] = "inline" with pytest.raises(TypeError, match="Unexpected type for 'metadata'"): ConsolidatedMetadata.from_dict(data) data["kind"] = "inline" # empty is fine data["metadata"] = {} ConsolidatedMetadata.from_dict(data) def test_flatten(self) -> None: array_metadata: dict[str, Any] = { "attributes": {}, "chunk_key_encoding": { "configuration": {"separator": "/"}, "name": "default", }, "codecs": ({"configuration": {"endian": "little"}, "name": "bytes"},), "data_type": "float64", "fill_value": np.float64(0.0), "node_type": "array", # "shape": (1, 2, 3), "zarr_format": 3, } metadata = ConsolidatedMetadata( kind="inline", must_understand=False, metadata={ "air": ArrayV3Metadata.from_dict( { "shape": (1, 2, 3), "chunk_grid": { "configuration": {"chunk_shape": (1, 2, 3)}, "name": "regular", }, **array_metadata, } ), "lat": ArrayV3Metadata.from_dict( { "shape": (1,), "chunk_grid": { "configuration": {"chunk_shape": (1,)}, "name": "regular", }, **array_metadata, } ), "child": GroupMetadata( attributes={"key": "child"}, consolidated_metadata=ConsolidatedMetadata( metadata={ "array": ArrayV3Metadata.from_dict( { **array_metadata, "attributes": {"key": "child"}, "shape": (4, 4), "chunk_grid": { "configuration": {"chunk_shape": (4, 4)}, "name": "regular", }, } ), "grandchild": GroupMetadata( attributes={"key": "grandchild"}, consolidated_metadata=ConsolidatedMetadata( metadata={ "array": ArrayV3Metadata.from_dict( { **array_metadata, "attributes": {"key": "grandchild"}, "shape": (4, 4), "chunk_grid": { "configuration": {"chunk_shape": (4, 4)}, "name": "regular", }, } ) } ), ), }, ), ), }, ) result = metadata.flattened_metadata expected = { "air": metadata.metadata["air"], "lat": metadata.metadata["lat"], "child": GroupMetadata( attributes={"key": "child"}, consolidated_metadata=ConsolidatedMetadata(metadata={}) ), "child/array": metadata.metadata["child"].consolidated_metadata.metadata["array"], # type: ignore[union-attr] "child/grandchild": GroupMetadata( attributes={"key": "grandchild"}, consolidated_metadata=ConsolidatedMetadata(metadata={}), ), "child/grandchild/array": ( metadata.metadata["child"] .consolidated_metadata.metadata["grandchild"] # type: ignore[union-attr] .consolidated_metadata.metadata["array"] ), } assert result == expected def test_invalid_metadata_raises(self) -> None: payload: dict[str, JSON] = { "kind": "inline", "must_understand": False, "metadata": { "foo": [1, 2, 3] # invalid }, } with pytest.raises(TypeError, match="key='foo', type='list'"): ConsolidatedMetadata.from_dict(payload) def test_to_dict_empty(self) -> None: meta = ConsolidatedMetadata( metadata={ "empty": GroupMetadata( attributes={"key": "empty"}, consolidated_metadata=ConsolidatedMetadata(metadata={}), ) } ) result = meta.to_dict() expected = { "kind": "inline", "must_understand": False, "metadata": { "empty": { "attributes": {"key": "empty"}, "consolidated_metadata": { "kind": "inline", "must_understand": False, "metadata": {}, }, "node_type": "group", "zarr_format": 3, } }, } assert result == expected @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_to_dict_order( self, memory_store: zarr.storage.MemoryStore, zarr_format: ZarrFormat ) -> None: with zarr.config.set(default_zarr_format=zarr_format): g = await group(store=memory_store) # Create groups in non-lexicographix order dtype = "float32" await g.create_array(name="b", shape=(1,), dtype=dtype) child = await g.create_group("c", attributes={"key": "child"}) await g.create_array(name="a", shape=(1,), dtype=dtype) await child.create_array("e", shape=(1,), dtype=dtype) await child.create_array("d", shape=(1,), dtype=dtype) # Consolidate metadata and re-open store if zarr_format == 3: with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): await zarr.api.asynchronous.consolidate_metadata(memory_store) else: await zarr.api.asynchronous.consolidate_metadata(memory_store) g2 = await zarr.api.asynchronous.open_group(store=memory_store) assert g2.metadata.consolidated_metadata is not None assert list(g2.metadata.consolidated_metadata.metadata) == ["a", "b", "c"] assert list(g2.metadata.consolidated_metadata.flattened_metadata) == [ "a", "b", "c", "c/d", "c/e", ] @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_open_consolidated_raises_async(self, zarr_format: ZarrFormat) -> None: store = zarr.storage.MemoryStore() await AsyncGroup.from_store(store, zarr_format=zarr_format) with pytest.raises(ValueError): await zarr.api.asynchronous.open_consolidated(store, zarr_format=zarr_format) with pytest.raises(ValueError): await zarr.api.asynchronous.open_consolidated(store, zarr_format=None) @pytest.fixture async def v2_consolidated_metadata_empty_dataset( self, memory_store: zarr.storage.MemoryStore ) -> AsyncGroup: zgroup_bytes = cpu.Buffer.from_bytes(json.dumps({"zarr_format": 2}).encode()) zmetadata_bytes = cpu.Buffer.from_bytes( b'{"metadata":{".zgroup":{"zarr_format":2}},"zarr_consolidated_format":1}' ) return AsyncGroup._from_bytes_v2( StorePath(memory_store, path=""), zgroup_bytes, zattrs_bytes=None, consolidated_metadata_bytes=zmetadata_bytes, ) async def test_consolidated_metadata_backwards_compatibility( self, v2_consolidated_metadata_empty_dataset: AsyncGroup ) -> None: """ Test that consolidated metadata handles a missing .zattrs key. This is necessary for backwards compatibility with zarr-python 2.x. See https://github.com/zarr-developers/zarr-python/issues/2694 """ store = zarr.storage.MemoryStore() await zarr.api.asynchronous.open(store=store, zarr_format=2) await zarr.api.asynchronous.consolidate_metadata(store) result = await zarr.api.asynchronous.open_consolidated(store, zarr_format=2) assert result.metadata == v2_consolidated_metadata_empty_dataset.metadata async def test_consolidated_metadata_v2(self) -> None: store = zarr.storage.MemoryStore() g = await AsyncGroup.from_store(store, attributes={"key": "root"}, zarr_format=2) dtype = parse_dtype("uint8", zarr_format=2) await g.create_array(name="a", shape=(1,), attributes={"key": "a"}, dtype=dtype) g1 = await g.create_group(name="g1", attributes={"key": "g1"}) await g1.create_group(name="g2", attributes={"key": "g2"}) await zarr.api.asynchronous.consolidate_metadata(store) result = await zarr.api.asynchronous.open_consolidated(store, zarr_format=2) expected = GroupMetadata( attributes={"key": "root"}, zarr_format=2, consolidated_metadata=ConsolidatedMetadata( metadata={ "a": ArrayV2Metadata( shape=(1,), dtype=dtype, attributes={"key": "a"}, chunks=(1,), fill_value=0, compressor=Blosc(), order="C", ), "g1": GroupMetadata( attributes={"key": "g1"}, zarr_format=2, consolidated_metadata=ConsolidatedMetadata( metadata={ "g2": GroupMetadata( attributes={"key": "g2"}, zarr_format=2, consolidated_metadata=ConsolidatedMetadata(metadata={}), ) } ), ), } ), ) assert result.metadata == expected @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_use_consolidated_false( self, memory_store: zarr.storage.MemoryStore, zarr_format: ZarrFormat ) -> None: with zarr.config.set(default_zarr_format=zarr_format): g = await group(store=memory_store, attributes={"foo": "bar"}) await g.create_group(name="a") # test a stale read if zarr_format == 3: with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): await zarr.api.asynchronous.consolidate_metadata(memory_store) else: await zarr.api.asynchronous.consolidate_metadata(memory_store) await g.create_group(name="b") stale = await zarr.api.asynchronous.open_group(store=memory_store) assert len([x async for x in stale.members()]) == 1 assert stale.metadata.consolidated_metadata assert list(stale.metadata.consolidated_metadata.metadata) == ["a"] # bypass stale data good = await zarr.api.asynchronous.open_group( store=memory_store, use_consolidated=False ) assert len([x async for x in good.members()]) == 2 # reconsolidate if zarr_format == 3: with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): await zarr.api.asynchronous.consolidate_metadata(memory_store) else: await zarr.api.asynchronous.consolidate_metadata(memory_store) good = await zarr.api.asynchronous.open_group(store=memory_store) assert len([x async for x in good.members()]) == 2 assert good.metadata.consolidated_metadata assert sorted(good.metadata.consolidated_metadata.metadata) == ["a", "b"] async def test_stale_child_metadata_ignored( self, memory_store: zarr.storage.MemoryStore ) -> None: # https://github.com/zarr-developers/zarr-python/issues/2921 # When consolidating metadata, we should ignore any (possibly stale) metadata # from previous consolidations, *including at child nodes*. root = await zarr.api.asynchronous.group(store=memory_store, zarr_format=3) await root.create_group("foo") await zarr.api.asynchronous.consolidate_metadata(memory_store, path="foo") await root.create_group("foo/bar/spam") with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): await zarr.api.asynchronous.consolidate_metadata(memory_store) reopened = await zarr.api.asynchronous.open_consolidated(store=memory_store, zarr_format=3) result = [x[0] async for x in reopened.members(max_depth=None)] expected = ["foo", "foo/bar", "foo/bar/spam"] assert result == expected async def test_use_consolidated_for_children_members( self, memory_store: zarr.storage.MemoryStore ) -> None: # A test that has *unconsolidated* metadata at the root group, but discovers # a child group with consolidated metadata. root = await zarr.api.asynchronous.create_group(store=memory_store) await root.create_group("a/b") # Consolidate metadata at "a/b" await zarr.api.asynchronous.consolidate_metadata(memory_store, path="a/b") # Add a new group a/b/c, that's not present in the CM at "a/b" await root.create_group("a/b/c") # Now according to the consolidated metadata, "a" has children ["b"] # but according to the unconsolidated metadata, "a" has children ["b", "c"] group = await zarr.api.asynchronous.open_group(store=memory_store, path="a") with pytest.warns(ZarrUserWarning, match="Object at 'c' not found"): result = sorted([x[0] async for x in group.members(max_depth=None)]) expected = ["b"] assert result == expected result = sorted( [x[0] async for x in group.members(max_depth=None, use_consolidated_for_children=False)] ) expected = ["b", "b/c"] assert result == expected async def test_absolute_path_for_subgroup(self, memory_store: zarr.storage.MemoryStore) -> None: root = await zarr.api.asynchronous.create_group(store=memory_store) await root.create_group("a/b") with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): await zarr.api.asynchronous.consolidate_metadata(memory_store) group = await zarr.api.asynchronous.open_group(store=memory_store) subgroup = await group.getitem("/a") assert isinstance(subgroup, AsyncGroup) members = [x async for x in subgroup.keys()] # noqa: SIM118 assert members == ["b"] @pytest.mark.parametrize("fill_value", [np.nan, np.inf, -np.inf]) async def test_consolidated_metadata_encodes_special_chars( memory_store: Store, zarr_format: ZarrFormat, fill_value: float ) -> None: root = await group(store=memory_store, zarr_format=zarr_format) _time = await root.create_array("time", shape=(12,), dtype=np.float64, fill_value=fill_value) if zarr_format == 3: with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): await zarr.api.asynchronous.consolidate_metadata(memory_store) else: await zarr.api.asynchronous.consolidate_metadata(memory_store) root = await group(store=memory_store, zarr_format=zarr_format) root_buffer = root.metadata.to_buffer_dict(default_buffer_prototype()) if zarr_format == 2: root_metadata = json.loads(root_buffer[".zmetadata"].to_bytes().decode("utf-8"))["metadata"] elif zarr_format == 3: root_metadata = json.loads(root_buffer["zarr.json"].to_bytes().decode("utf-8"))[ "consolidated_metadata" ]["metadata"] expected_fill_value = _time._zdtype.to_json_scalar(fill_value, zarr_format=2) if zarr_format == 2: assert root_metadata["time/.zarray"]["fill_value"] == expected_fill_value elif zarr_format == 3: assert root_metadata["time"]["fill_value"] == expected_fill_value class NonConsolidatedStore(zarr.storage.MemoryStore): """A store that doesn't support consolidated metadata""" @property def supports_consolidated_metadata(self) -> bool: return False async def test_consolidate_metadata_raises_for_self_consolidating_stores() -> None: """Verify calling consolidate_metadata on a non supporting stores raises an error.""" memory_store = NonConsolidatedStore() root = await zarr.api.asynchronous.create_group(store=memory_store) await root.create_group("a/b") with pytest.raises(TypeError, match="doesn't support consolidated metadata"): await zarr.api.asynchronous.consolidate_metadata(memory_store) async def test_open_group_in_non_consolidating_stores() -> None: memory_store = NonConsolidatedStore() root = await zarr.api.asynchronous.create_group(store=memory_store) await root.create_group("a/b") # Opening a group without consolidatedion works as expected await AsyncGroup.open(memory_store, use_consolidated=False) # let the Store opt out of consolidation await AsyncGroup.open(memory_store, use_consolidated=None) # Opening a group with use_consolidated=True should fail with pytest.raises(ValueError, match="doesn't support consolidated metadata"): await AsyncGroup.open(memory_store, use_consolidated=True) zarr-python-3.1.5/tests/test_metadata/test_v2.py000066400000000000000000000275121511007055700217460ustar00rootroot00000000000000from __future__ import annotations import json from typing import TYPE_CHECKING, Literal import numpy as np import pytest import zarr.api.asynchronous import zarr.storage from zarr.core.buffer import cpu from zarr.core.buffer.core import default_buffer_prototype from zarr.core.dtype.npy.float import Float32, Float64 from zarr.core.dtype.npy.int import Int16 from zarr.core.group import ConsolidatedMetadata, GroupMetadata from zarr.core.metadata import ArrayV2Metadata from zarr.core.metadata.v2 import parse_zarr_format from zarr.errors import ZarrUserWarning if TYPE_CHECKING: from pathlib import Path from typing import Any from zarr.abc.codec import Codec from zarr.core.common import JSON def test_parse_zarr_format_valid() -> None: assert parse_zarr_format(2) == 2 @pytest.mark.parametrize("data", [None, 1, 3, 4, 5, "3"]) def test_parse_zarr_format_invalid(data: Any) -> None: with pytest.raises(ValueError, match=f"Invalid value. Expected 2. Got {data}"): parse_zarr_format(data) @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) @pytest.mark.parametrize("filters", [None, [{"id": "gzip", "level": 1}]]) @pytest.mark.parametrize("compressor", [None, {"id": "gzip", "level": 1}]) @pytest.mark.parametrize("fill_value", [None, 0, 1]) @pytest.mark.parametrize("order", ["C", "F"]) @pytest.mark.parametrize("dimension_separator", [".", "/", None]) def test_metadata_to_dict( compressor: Codec | None, filters: tuple[Codec] | None, fill_value: Any, order: Literal["C", "F"], dimension_separator: Literal[".", "/"] | None, attributes: dict[str, Any] | None, ) -> None: shape = (1, 2, 3) chunks = (1,) * len(shape) data_type = "|u1" metadata_dict = { "zarr_format": 2, "shape": shape, "chunks": chunks, "dtype": data_type, "order": order, "compressor": compressor, "filters": filters, "fill_value": fill_value, } if attributes is not None: metadata_dict["attributes"] = attributes if dimension_separator is not None: metadata_dict["dimension_separator"] = dimension_separator metadata = ArrayV2Metadata.from_dict(metadata_dict) observed = metadata.to_dict() expected = metadata_dict.copy() if attributes is None: assert observed["attributes"] == {} observed.pop("attributes") if dimension_separator is None: expected_dimension_sep = "." assert observed["dimension_separator"] == expected_dimension_sep observed.pop("dimension_separator") assert observed == expected def test_filters_empty_tuple_warns() -> None: metadata_dict = { "zarr_format": 2, "shape": (1,), "chunks": (1,), "dtype": "|u1", "order": "C", "compressor": None, "filters": (), "fill_value": 0, } with pytest.warns( ZarrUserWarning, match="Found an empty list of filters in the array metadata document." ): meta = ArrayV2Metadata.from_dict(metadata_dict) assert meta.filters is None class TestConsolidated: @pytest.fixture async def v2_consolidated_metadata( self, memory_store: zarr.storage.MemoryStore ) -> zarr.storage.MemoryStore: zmetadata: dict[str, JSON] = { "metadata": { ".zattrs": { "Conventions": "COARDS", }, ".zgroup": {"zarr_format": 2}, "air/.zarray": { "chunks": [730], "compressor": None, "dtype": " None: # .zgroup, .zattrs, .metadata store = v2_consolidated_metadata group = zarr.open_consolidated(store=store, zarr_format=2) assert group.metadata.consolidated_metadata is not None expected = ConsolidatedMetadata( metadata={ "air": ArrayV2Metadata( shape=(730,), fill_value=0, chunks=(730,), attributes={"_ARRAY_DIMENSIONS": ["time"], "dataset": "NMC Reanalysis"}, dtype=Int16(), order="C", filters=None, dimension_separator=".", compressor=None, ), "time": ArrayV2Metadata( shape=(730,), fill_value=0.0, chunks=(730,), attributes={ "_ARRAY_DIMENSIONS": ["time"], "calendar": "standard", "long_name": "Time", "standard_name": "time", "units": "hours since 1800-01-01", }, dtype=Float32(), order="C", filters=None, dimension_separator=".", compressor=None, ), "nested": GroupMetadata( attributes={"key": "value"}, zarr_format=2, consolidated_metadata=ConsolidatedMetadata( metadata={ "array": ArrayV2Metadata( shape=(730,), fill_value=0.0, chunks=(730,), attributes={ "calendar": "standard", }, dtype=Float32(), order="C", filters=None, dimension_separator=".", compressor=None, ) } ), ), }, kind="inline", must_understand=False, ) result = group.metadata.consolidated_metadata assert result == expected async def test_getitem_consolidated( self, v2_consolidated_metadata: zarr.storage.MemoryStore ) -> None: store = v2_consolidated_metadata group = await zarr.api.asynchronous.open_consolidated(store=store, zarr_format=2) air = await group.getitem("air") assert isinstance(air, zarr.AsyncArray) assert air.metadata.shape == (730,) def test_from_dict_extra_fields() -> None: data = { "_nczarr_array": {"dimrefs": ["/dim1", "/dim2"], "storage": "chunked"}, "attributes": {"key": "value"}, "chunks": [8], "compressor": None, "dtype": " None: arr = zarr.create_array( {}, shape=(10,), chunks=(10,), dtype="int32", compressors={"id": "zstd", "level": 5, "checksum": False}, zarr_format=2, ) metadata = json.loads( arr.metadata.to_buffer_dict(default_buffer_prototype())[".zarray"].to_bytes() ) assert "checksum" not in metadata["compressor"] @pytest.mark.parametrize("fill_value", [np.void((0, 0), np.dtype([("foo", "i4"), ("bar", "i4")]))]) def test_structured_dtype_fill_value_serialization( tmp_path: Path, fill_value: np.void | np.dtype[Any] ) -> None: zarr_format: Literal[2] = 2 group_path = tmp_path / "test.zarr" root_group = zarr.open_group(group_path, mode="w", zarr_format=zarr_format) dtype = np.dtype([("foo", "i4"), ("bar", "i4")]) root_group.create_array( name="structured_dtype", shape=(100, 100), chunks=(100, 100), dtype=dtype, fill_value=fill_value, ) zarr.consolidate_metadata(root_group.store, zarr_format=zarr_format) root_group = zarr.open_group(group_path, mode="r") observed = root_group.metadata.consolidated_metadata.metadata["structured_dtype"].fill_value # type: ignore[union-attr] assert observed == fill_value zarr-python-3.1.5/tests/test_metadata/test_v3.py000066400000000000000000000375571511007055700217610ustar00rootroot00000000000000from __future__ import annotations import json import re from typing import TYPE_CHECKING, Literal import numpy as np import pytest from zarr import consolidate_metadata, create_group from zarr.codecs.bytes import BytesCodec from zarr.core.buffer import default_buffer_prototype from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding from zarr.core.config import config from zarr.core.dtype import UInt8, get_data_type_from_native_dtype from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING from zarr.core.dtype.npy.time import DateTime64 from zarr.core.group import GroupMetadata, parse_node_type from zarr.core.metadata.v3 import ( ArrayMetadataJSON_V3, ArrayV3Metadata, parse_codecs, parse_dimension_names, parse_zarr_format, ) from zarr.errors import ( MetadataValidationError, NodeTypeValidationError, UnknownCodecError, ZarrUserWarning, ) if TYPE_CHECKING: from collections.abc import Sequence from typing import Any from zarr.core.types import JSON from zarr.abc.codec import Codec from zarr.core.metadata.v3 import ( parse_node_type_array, ) bool_dtypes = ("bool",) int_dtypes = ( "int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64", ) float_dtypes = ( "float16", "float32", "float64", ) complex_dtypes = ("complex64", "complex128") flexible_dtypes = ("str", "bytes", "void") if _NUMPY_SUPPORTS_VLEN_STRING: vlen_string_dtypes = ("T",) else: vlen_string_dtypes = ("O",) dtypes = ( *bool_dtypes, *int_dtypes, *float_dtypes, *complex_dtypes, *flexible_dtypes, *vlen_string_dtypes, ) @pytest.mark.parametrize("data", [None, 1, 2, 4, 5, "3"]) def test_parse_zarr_format_invalid(data: Any) -> None: with pytest.raises( MetadataValidationError, match=f"Invalid value for 'zarr_format'. Expected '3'. Got '{data}'.", ): parse_zarr_format(data) def test_parse_zarr_format_valid() -> None: assert parse_zarr_format(3) == 3 def test_parse_node_type_valid() -> None: assert parse_node_type("array") == "array" assert parse_node_type("group") == "group" @pytest.mark.parametrize("node_type", [None, 2, "other"]) def test_parse_node_type_invalid(node_type: Any) -> None: with pytest.raises( MetadataValidationError, match=f"Invalid value for 'node_type'. Expected 'array' or 'group'. Got '{node_type}'.", ): parse_node_type(node_type) @pytest.mark.parametrize("data", [None, "group"]) def test_parse_node_type_array_invalid(data: Any) -> None: with pytest.raises( NodeTypeValidationError, match=f"Invalid value for 'node_type'. Expected 'array'. Got '{data}'.", ): parse_node_type_array(data) def test_parse_node_typev_array_alid() -> None: assert parse_node_type_array("array") == "array" @pytest.mark.parametrize("data", [(), [1, 2, "a"], {"foo": 10}]) def parse_dimension_names_invalid(data: Any) -> None: with pytest.raises(TypeError, match="Expected either None or iterable of str,"): parse_dimension_names(data) @pytest.mark.parametrize("data", [None, ("a", "b", "c"), ["a", "a", "a"]]) def parse_dimension_names_valid(data: Sequence[str] | None) -> None: assert parse_dimension_names(data) == data @pytest.mark.parametrize("fill_value", [[1.0, 0.0], [0, 1]]) @pytest.mark.parametrize("dtype_str", [*complex_dtypes]) def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None: """ Test that parse_fill_value(fill_value, dtype) correctly handles complex values represented as length-2 sequences """ zarr_format: Literal[3] = 3 dtype = get_data_type_from_native_dtype(dtype_str) expected = dtype.to_native_dtype().type(complex(*fill_value)) observed = dtype.from_json_scalar(fill_value, zarr_format=zarr_format) assert observed == expected assert dtype.to_json_scalar(observed, zarr_format=zarr_format) == tuple(fill_value) @pytest.mark.parametrize("fill_value", [{"foo": 10}]) @pytest.mark.parametrize("dtype_str", [*int_dtypes, *float_dtypes, *complex_dtypes]) def test_parse_fill_value_invalid_type(fill_value: Any, dtype_str: str) -> None: """ Test that parse_fill_value(fill_value, dtype) raises TypeError for invalid non-sequential types. This test excludes bool because the bool constructor takes anything. """ dtype_instance = get_data_type_from_native_dtype(dtype_str) with pytest.raises(TypeError, match=f"Invalid type: {fill_value}"): dtype_instance.from_json_scalar(fill_value, zarr_format=3) @pytest.mark.parametrize( "fill_value", [ [ 1, ], (1, 23, 4), ], ) @pytest.mark.parametrize("dtype_str", [*int_dtypes, *float_dtypes]) def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str) -> None: """ Test that parse_fill_value(fill_value, dtype) raises TypeError for invalid sequential types. This test excludes bool because the bool constructor takes anything, and complex because complex values can be created from length-2 sequences. """ dtype_instance = get_data_type_from_native_dtype(dtype_str) with pytest.raises(TypeError, match=re.escape(f"Invalid type: {fill_value}")): dtype_instance.from_json_scalar(fill_value, zarr_format=3) @pytest.mark.parametrize("chunk_grid", ["regular"]) @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) @pytest.mark.parametrize("codecs", [[BytesCodec(endian=None)]]) @pytest.mark.parametrize("fill_value", [0, 1]) @pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"]) @pytest.mark.parametrize("dimension_separator", [".", "/", None]) @pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"]) @pytest.mark.parametrize("storage_transformers", [None, ()]) def test_metadata_to_dict( chunk_grid: str, codecs: list[Codec], fill_value: Any, chunk_key_encoding: Literal["v2", "default"], dimension_separator: Literal[".", "/"] | None, dimension_names: Literal["nones", "strings", "missing"], attributes: dict[str, Any] | None, storage_transformers: tuple[dict[str, JSON]] | None, ) -> None: shape = (1, 2, 3) data_type_str = "uint8" if chunk_grid == "regular": cgrid = {"name": "regular", "configuration": {"chunk_shape": (1, 1, 1)}} cke: dict[str, Any] cke_name_dict = {"name": chunk_key_encoding} if dimension_separator is not None: cke = cke_name_dict | {"configuration": {"separator": dimension_separator}} else: cke = cke_name_dict dnames: tuple[str | None, ...] | None if dimension_names == "strings": dnames = tuple(map(str, range(len(shape)))) elif dimension_names == "missing": dnames = None elif dimension_names == "nones": dnames = (None,) * len(shape) metadata_dict = { "zarr_format": 3, "node_type": "array", "shape": shape, "chunk_grid": cgrid, "data_type": data_type_str, "chunk_key_encoding": cke, "codecs": tuple(c.to_dict() for c in codecs), "fill_value": fill_value, "storage_transformers": storage_transformers, } if attributes is not None: metadata_dict["attributes"] = attributes if dnames is not None: metadata_dict["dimension_names"] = dnames metadata = ArrayV3Metadata.from_dict(metadata_dict) observed = metadata.to_dict() expected = metadata_dict.copy() # if unset or None or (), storage_transformers gets normalized to () assert observed["storage_transformers"] == () observed.pop("storage_transformers") expected.pop("storage_transformers") if attributes is None: assert observed["attributes"] == {} observed.pop("attributes") if dimension_separator is None: if chunk_key_encoding == "default": expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict() else: expected_cke_dict = V2ChunkKeyEncoding(separator=".").to_dict() assert observed["chunk_key_encoding"] == expected_cke_dict observed.pop("chunk_key_encoding") expected.pop("chunk_key_encoding") assert observed == expected @pytest.mark.parametrize("indent", [2, 4, None]) def test_json_indent(indent: int) -> None: with config.set({"json_indent": indent}): m = GroupMetadata() d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes() assert d == json.dumps(json.loads(d), indent=indent).encode() @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897]) @pytest.mark.parametrize("precision", ["ns", "D"]) async def test_datetime_metadata(fill_value: int, precision: Literal["ns", "D"]) -> None: dtype = DateTime64(unit=precision) metadata_dict: dict[str, Any] = { "zarr_format": 3, "node_type": "array", "shape": (1,), "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, "data_type": dtype.to_json(zarr_format=3), "chunk_key_encoding": {"name": "default", "separator": "."}, "codecs": (BytesCodec(),), "fill_value": dtype.to_json_scalar( dtype.to_native_dtype().type(fill_value, dtype.unit), zarr_format=3 ), } metadata = ArrayV3Metadata.from_dict(metadata_dict) # ensure there isn't a TypeError here. d = metadata.to_buffer_dict(default_buffer_prototype()) result = json.loads(d["zarr.json"].to_bytes()) assert result["fill_value"] == fill_value @pytest.mark.parametrize( ("data_type", "fill_value"), [("uint8", {}), ("int32", [0, 1]), ("float32", "foo")] ) async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> None: metadata_dict: dict[str, Any] = { "zarr_format": 3, "node_type": "array", "shape": (1,), "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, "data_type": data_type, "chunk_key_encoding": {"name": "default", "separator": "."}, "codecs": ({"name": "bytes"},), "fill_value": fill_value, # this is not a valid fill value for uint8 } # multiple things can go wrong here, so we don't match on the error message. with pytest.raises(TypeError): ArrayV3Metadata.from_dict(metadata_dict) @pytest.mark.parametrize("fill_value", [("NaN"), "Infinity", "-Infinity"]) async def test_special_float_fill_values(fill_value: str) -> None: metadata_dict: dict[str, Any] = { "zarr_format": 3, "node_type": "array", "shape": (1,), "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, "data_type": "float64", "chunk_key_encoding": {"name": "default", "separator": "."}, "codecs": [{"name": "bytes"}], "fill_value": fill_value, # this is not a valid fill value for uint8 } m = ArrayV3Metadata.from_dict(metadata_dict) d = json.loads(m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()) assert m.fill_value is not None if fill_value == "NaN": assert np.isnan(m.fill_value) assert d["fill_value"] == "NaN" elif fill_value == "Infinity": assert np.isposinf(m.fill_value) assert d["fill_value"] == "Infinity" elif fill_value == "-Infinity": assert np.isneginf(m.fill_value) assert d["fill_value"] == "-Infinity" def test_parse_codecs_unknown_codec_raises(monkeypatch: pytest.MonkeyPatch) -> None: from collections import defaultdict import zarr.registry from zarr.registry import Registry # to make sure the codec is always unknown (not sure if that's necessary) monkeypatch.setattr(zarr.registry, "__codec_registries", defaultdict(Registry)) codecs = [{"name": "unknown"}] with pytest.raises(UnknownCodecError): parse_codecs(codecs) @pytest.mark.parametrize( "extra_value", [ {"must_understand": False, "param": 10}, {"must_understand": True}, 10, ], ) def test_from_dict_extra_fields(extra_value: dict[str, object] | int) -> None: """ Test that from_dict accepts extra fields if they have are a JSON object with "must_understand": false, and raises an exception otherwise. """ metadata_dict: ArrayMetadataJSON_V3 = { # type: ignore[typeddict-unknown-key] "zarr_format": 3, "node_type": "array", "shape": (1,), "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}}, "data_type": "uint8", "chunk_key_encoding": {"name": "default", "configuration": {"separator": "."}}, "codecs": ({"name": "bytes"},), "fill_value": 0, "storage_transformers": (), "attributes": {}, "foo": extra_value, } if isinstance(extra_value, dict) and extra_value.get("must_understand") is False: # should be accepted metadata = ArrayV3Metadata.from_dict(metadata_dict) # type: ignore[arg-type] assert isinstance(metadata, ArrayV3Metadata) assert metadata.to_dict() == metadata_dict else: # should raise an exception with pytest.raises(MetadataValidationError, match="Got a Zarr V3 metadata document"): metadata = ArrayV3Metadata.from_dict(metadata_dict) # type: ignore[arg-type] def test_init_invalid_extra_fields() -> None: """ Test that initializing ArrayV3Metadata with extra fields fails when those fields shadow the array metadata fields. """ extra_fields: dict[str, object] = {"shape": (10,), "data_type": "uint8"} conflict_keys = set(extra_fields.keys()) msg = ( "Invalid extra fields. " "The following keys: " f"{sorted(conflict_keys)} " "are invalid because they collide with keys reserved for use by the " "array metadata document." ) with pytest.raises(ValueError, match=re.escape(msg)): ArrayV3Metadata( shape=(10,), data_type=UInt8(), chunk_grid={"name": "regular", "configuration": {"chunk_shape": (10,)}}, chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}}, fill_value=0, codecs=({"name": "bytes", "configuration": {"endian": "little"}},), attributes={}, dimension_names=None, extra_fields=extra_fields, # type: ignore[arg-type] ) @pytest.mark.parametrize("use_consolidated", [True, False]) @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}]) def test_group_to_dict(use_consolidated: bool, attributes: None | dict[str, Any]) -> None: """ Test that the output of GroupMetadata.to_dict() is what we expect """ store: dict[str, object] = {} if attributes is None: expect_attributes = {} else: expect_attributes = attributes group = create_group(store, attributes=attributes, zarr_format=3) group.create_group("foo") if use_consolidated: with pytest.warns( ZarrUserWarning, match="Consolidated metadata is currently not part in the Zarr format 3 specification.", ): group = consolidate_metadata(store) meta = group.metadata expect = { "node_type": "group", "zarr_format": 3, "consolidated_metadata": { "kind": "inline", "must_understand": False, "metadata": { "foo": { "attributes": {}, "zarr_format": 3, "node_type": "group", "consolidated_metadata": { "kind": "inline", "metadata": {}, "must_understand": False, }, } }, }, "attributes": expect_attributes, } else: meta = group.metadata expect = {"node_type": "group", "zarr_format": 3, "attributes": expect_attributes} assert meta.to_dict() == expect zarr-python-3.1.5/tests/test_properties.py000066400000000000000000000330131511007055700207650ustar00rootroot00000000000000import json import numbers from typing import Any import numpy as np import pytest from numpy.testing import assert_array_equal from zarr.core.buffer import default_buffer_prototype pytest.importorskip("hypothesis") import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import assume, given, settings from zarr.abc.store import Store from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata from zarr.core.sync import sync from zarr.testing.strategies import ( array_metadata, arrays, basic_indices, numpy_arrays, orthogonal_indices, simple_arrays, stores, zarr_formats, ) def deep_equal(a: Any, b: Any) -> bool: """Deep equality check with handling of special cases for array metadata classes""" if isinstance(a, (complex, np.complexfloating)) and isinstance( b, (complex, np.complexfloating) ): a_real, a_imag = float(a.real), float(a.imag) b_real, b_imag = float(b.real), float(b.imag) if np.isnan(a_real) and np.isnan(b_real): real_eq = True else: real_eq = a_real == b_real if np.isnan(a_imag) and np.isnan(b_imag): imag_eq = True else: imag_eq = a_imag == b_imag return real_eq and imag_eq if isinstance(a, (float, np.floating)) and isinstance(b, (float, np.floating)): if np.isnan(a) and np.isnan(b): return True return a == b if isinstance(a, np.datetime64) and isinstance(b, np.datetime64): if np.isnat(a) and np.isnat(b): return True return a == b if isinstance(a, np.ndarray) and isinstance(b, np.ndarray): if a.shape != b.shape: return False return all(deep_equal(x, y) for x, y in zip(a.flat, b.flat, strict=False)) if isinstance(a, dict) and isinstance(b, dict): if set(a.keys()) != set(b.keys()): return False return all(deep_equal(a[k], b[k]) for k in a) if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): if len(a) != len(b): return False return all(deep_equal(x, y) for x, y in zip(a, b, strict=False)) return a == b @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @given(data=st.data()) def test_array_roundtrip(data: st.DataObject) -> None: nparray = data.draw(numpy_arrays()) zarray = data.draw(arrays(arrays=st.just(nparray))) assert_array_equal(nparray, zarray[:]) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @given(array=arrays()) def test_array_creates_implicit_groups(array): path = array.path ancestry = path.split("/")[:-1] for i in range(len(ancestry)): parent = "/".join(ancestry[: i + 1]) if array.metadata.zarr_format == 2: assert ( sync(array.store.get(f"{parent}/.zgroup", prototype=default_buffer_prototype())) is not None ) elif array.metadata.zarr_format == 3: assert ( sync(array.store.get(f"{parent}/zarr.json", prototype=default_buffer_prototype())) is not None ) # this decorator removes timeout; not ideal but it should avoid intermittent CI failures @pytest.mark.asyncio @settings(deadline=None) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @given(data=st.data()) async def test_basic_indexing(data: st.DataObject) -> None: zarray = data.draw(simple_arrays()) nparray = zarray[:] indexer = data.draw(basic_indices(shape=nparray.shape)) # sync get actual = zarray[indexer] assert_array_equal(nparray[indexer], actual) # async get async_zarray = zarray._async_array actual = await async_zarray.getitem(indexer) assert_array_equal(nparray[indexer], actual) # sync set new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)) zarray[indexer] = new_data nparray[indexer] = new_data assert_array_equal(nparray, zarray[:]) # TODO test async setitem? @pytest.mark.asyncio @given(data=st.data()) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") async def test_oindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) nparray = zarray[:] zindexer, npindexer = data.draw(orthogonal_indices(shape=nparray.shape)) # sync get actual = zarray.oindex[zindexer] assert_array_equal(nparray[npindexer], actual) # async get async_zarray = zarray._async_array actual = await async_zarray.oindex.getitem(zindexer) assert_array_equal(nparray[npindexer], actual) # sync get assume(zarray.shards is None) # GH2834 for idxr in npindexer: if isinstance(idxr, np.ndarray) and idxr.size != np.unique(idxr).size: # behaviour of setitem with repeated indices is not guaranteed in practice assume(False) new_data = data.draw(numpy_arrays(shapes=st.just(actual.shape), dtype=nparray.dtype)) nparray[npindexer] = new_data zarray.oindex[zindexer] = new_data assert_array_equal(nparray, zarray[:]) # note: async oindex setitem not yet implemented @pytest.mark.asyncio @given(data=st.data()) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") async def test_vindex(data: st.DataObject) -> None: # integer_array_indices can't handle 0-size dimensions. zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))) nparray = zarray[:] indexer = data.draw( npst.integer_array_indices( shape=nparray.shape, result_shape=npst.array_shapes(min_side=1, max_dims=None) ) ) # sync get actual = zarray.vindex[indexer] assert_array_equal(nparray[indexer], actual) # async get async_zarray = zarray._async_array actual = await async_zarray.vindex.getitem(indexer) assert_array_equal(nparray[indexer], actual) # sync set # FIXME! # when the indexer is such that a value gets overwritten multiple times, # I think the output depends on chunking. # new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype)) # nparray[indexer] = new_data # zarray.vindex[indexer] = new_data # assert_array_equal(nparray, zarray[:]) # note: async vindex setitem not yet implemented @given(store=stores, meta=array_metadata()) # type: ignore[misc] @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") async def test_roundtrip_array_metadata_from_store( store: Store, meta: ArrayV2Metadata | ArrayV3Metadata ) -> None: """ Verify that the I/O for metadata in a store are lossless. This test serializes an ArrayV2Metadata or ArrayV3Metadata object to a dict of buffers via `to_buffer_dict`, writes each buffer to a store under keys prefixed with "0/", and then reads them back. The test asserts that each retrieved buffer exactly matches the original buffer. """ asdict = meta.to_buffer_dict(prototype=default_buffer_prototype()) for key, expected in asdict.items(): await store.set(f"0/{key}", expected) actual = await store.get(f"0/{key}", prototype=default_buffer_prototype()) assert actual == expected @given(data=st.data(), zarr_format=zarr_formats) @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") def test_roundtrip_array_metadata_from_json(data: st.DataObject, zarr_format: int) -> None: """ Verify that JSON serialization and deserialization of metadata is lossless. For Zarr v2: - The metadata is split into two JSON documents (one for array data and one for attributes). The test merges the attributes back before deserialization. For Zarr v3: - All metadata is stored in a single JSON document. No manual merger is necessary. The test then converts both the original and round-tripped metadata objects into dictionaries using `dataclasses.asdict` and uses a deep equality check to verify that the roundtrip has preserved all fields (including special cases like NaN, Infinity, complex numbers, and datetime values). """ metadata = data.draw(array_metadata(zarr_formats=st.just(zarr_format))) buffer_dict = metadata.to_buffer_dict(prototype=default_buffer_prototype()) if zarr_format == 2: zarray_dict = json.loads(buffer_dict[ZARRAY_JSON].to_bytes().decode()) zattrs_dict = json.loads(buffer_dict[ZATTRS_JSON].to_bytes().decode()) # zattrs and zarray are separate in v2, we have to add attributes back prior to `from_dict` zarray_dict["attributes"] = zattrs_dict metadata_roundtripped = ArrayV2Metadata.from_dict(zarray_dict) else: zarray_dict = json.loads(buffer_dict[ZARR_JSON].to_bytes().decode()) metadata_roundtripped = ArrayV3Metadata.from_dict(zarray_dict) orig = metadata.to_dict() rt = metadata_roundtripped.to_dict() assert deep_equal(orig, rt), f"Roundtrip mismatch:\nOriginal: {orig}\nRoundtripped: {rt}" # @st.composite # def advanced_indices(draw, *, shape): # basic_idxr = draw( # basic_indices( # shape=shape, min_dims=len(shape), max_dims=len(shape), allow_ellipsis=False # ).filter(lambda x: isinstance(x, tuple)) # ) # int_idxr = draw( # npst.integer_array_indices(shape=shape, result_shape=npst.array_shapes(max_dims=1)) # ) # args = tuple( # st.sampled_from((l, r)) for l, r in zip_longest(basic_idxr, int_idxr, fillvalue=slice(None)) # ) # return draw(st.tuples(*args)) # @given(st.data()) # def test_roundtrip_object_array(data): # nparray = data.draw(np_arrays) # zarray = data.draw(arrays(arrays=st.just(nparray))) # assert_array_equal(nparray, zarray[:]) def serialized_complex_float_is_valid( serialized: tuple[numbers.Real | str, numbers.Real | str], ) -> bool: """ Validate that the serialized representation of a complex float conforms to the spec. The specification requires that a serialized complex float must be either: - A JSON number, or - One of the strings "NaN", "Infinity", or "-Infinity". Args: serialized: The value produced by JSON serialization for a complex floating point number. Returns: bool: True if the serialized value is valid according to the spec, False otherwise. """ return ( isinstance(serialized, tuple) and len(serialized) == 2 and all(serialized_float_is_valid(x) for x in serialized) ) def serialized_float_is_valid(serialized: numbers.Real | str) -> bool: """ Validate that the serialized representation of a float conforms to the spec. The specification requires that a serialized float must be either: - A JSON number, or - One of the strings "NaN", "Infinity", or "-Infinity". Args: serialized: The value produced by JSON serialization for a floating point number. Returns: bool: True if the serialized value is valid according to the spec, False otherwise. """ if isinstance(serialized, numbers.Real): return True return serialized in ("NaN", "Infinity", "-Infinity") @given(meta=array_metadata()) # type: ignore[misc] @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") def test_array_metadata_meets_spec(meta: ArrayV2Metadata | ArrayV3Metadata) -> None: """ Validate that the array metadata produced by the library conforms to the relevant spec (V2 vs V3). For ArrayV2Metadata: - Ensures that 'zarr_format' is 2. - Verifies that 'filters' is either None or a tuple (and not an empty tuple). For ArrayV3Metadata: - Ensures that 'zarr_format' is 3. For both versions: - If the dtype is a floating point of some kind, verifies of fill values: * NaN is serialized as the string "NaN" * Positive Infinity is serialized as the string "Infinity" * Negative Infinity is serialized as the string "-Infinity" * Other fill values are preserved as-is. - If the dtype is a complex number of some kind, verifies that each component of the fill value (real and imaginary) satisfies the serialization rules for floating point numbers. - If the dtype is a datetime of some kind, verifies that `NaT` values are serialized as "NaT". Note: This test validates spec-compliance for array metadata serialization. It is a work-in-progress and should be expanded as further edge cases are identified. """ asdict_dict = meta.to_dict() # version-specific validations if isinstance(meta, ArrayV2Metadata): assert asdict_dict["filters"] != () assert asdict_dict["filters"] is None or isinstance(asdict_dict["filters"], tuple) assert asdict_dict["zarr_format"] == 2 else: assert asdict_dict["zarr_format"] == 3 # version-agnostic validations dtype_native = meta.dtype.to_native_dtype() if dtype_native.kind == "f": assert serialized_float_is_valid(asdict_dict["fill_value"]) elif dtype_native.kind == "c": # fill_value should be a two-element array [real, imag]. assert serialized_complex_float_is_valid(asdict_dict["fill_value"]) elif dtype_native.kind in ("M", "m") and np.isnat(meta.fill_value): assert asdict_dict["fill_value"] == -9223372036854775808 zarr-python-3.1.5/tests/test_regression/000077500000000000000000000000001511007055700203775ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_regression/__init__.py000066400000000000000000000000001511007055700224760ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_regression/scripts/000077500000000000000000000000001511007055700220665ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_regression/scripts/__init__.py000066400000000000000000000000001511007055700241650ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_regression/scripts/v2.18.py000066400000000000000000000047361511007055700232300ustar00rootroot00000000000000# /// script # requires-python = ">=3.11" # dependencies = [ # "zarr==2.18", # "numcodecs==0.15" # ] # /// import argparse import zarr from zarr._storage.store import BaseStore def copy_group( *, node: zarr.hierarchy.Group, store: zarr.storage.BaseStore, path: str, overwrite: bool ) -> zarr.hierarchy.Group: result = zarr.group(store=store, path=path, overwrite=overwrite) result.attrs.put(node.attrs.asdict()) for key, child in node.items(): child_path = f"{path}/{key}" if isinstance(child, zarr.hierarchy.Group): copy_group(node=child, store=store, path=child_path, overwrite=overwrite) elif isinstance(child, zarr.core.Array): copy_array(node=child, store=store, overwrite=overwrite, path=child_path) return result def copy_array( *, node: zarr.core.Array, store: BaseStore, path: str, overwrite: bool ) -> zarr.core.Array: result = zarr.create( shape=node.shape, dtype=node.dtype, fill_value=node.fill_value, chunks=node.chunks, compressor=node.compressor, filters=node.filters, order=node.order, dimension_separator=node._dimension_separator, store=store, path=path, overwrite=overwrite, ) result.attrs.put(node.attrs.asdict()) result[:] = node[:] return result def copy_node( node: zarr.hierarchy.Group | zarr.core.Array, store: BaseStore, path: str, overwrite: bool ) -> zarr.hierarchy.Group | zarr.core.Array: if isinstance(node, zarr.hierarchy.Group): return copy_group(node=node, store=store, path=path, overwrite=overwrite) elif isinstance(node, zarr.core.Array): return copy_array(node=node, store=store, path=path, overwrite=overwrite) else: raise TypeError(f"Unexpected node type: {type(node)}") # pragma: no cover def cli() -> None: parser = argparse.ArgumentParser( description="Copy a zarr hierarchy from one location to another" ) parser.add_argument("source", type=str, help="Path to the source zarr hierarchy") parser.add_argument("destination", type=str, help="Path to the destination zarr hierarchy") args = parser.parse_args() src, dst = args.source, args.destination root_src = zarr.open(src, mode="r") result = copy_node(node=root_src, store=zarr.NestedDirectoryStore(dst), path="", overwrite=True) print(f"successfully created {result} at {dst}") def main() -> None: cli() if __name__ == "__main__": main() zarr-python-3.1.5/tests/test_regression/scripts/v3.0.8.py000066400000000000000000000036361511007055700233040ustar00rootroot00000000000000# /// script # requires-python = "==3.12" # dependencies = [ # "zarr==3.0.8", # "numcodecs==0.16.3" # ] # /// import argparse import zarr from zarr.abc.store import Store def copy_group( *, node: zarr.Group, store: Store, path: str, overwrite: bool ) -> zarr.Group: result = zarr.create_group( store=store, path=path, overwrite=overwrite, attributes=node.attrs.asdict(), zarr_format=node.metadata.zarr_format) for key, child in node.members(): child_path = f"{path}/{key}" if isinstance(child, zarr.Group): copy_group(node=child, store=store, path=child_path, overwrite=overwrite) else: copy_array(node=child, store=store, overwrite=overwrite, path=child_path) return result def copy_array( *, node: zarr.Array, store: Store, path: str, overwrite: bool ) -> zarr.Array: result = zarr.from_array(store, name=path, data=node, write_data=True) return result def copy_node( node: zarr.Group | zarr.Array, store: Store, path: str, overwrite: bool ) -> zarr.Group | zarr.Array: if isinstance(node, zarr.Group): return copy_group(node=node, store=store, path=path, overwrite=overwrite) else: return copy_array(node=node, store=store, path=path, overwrite=overwrite) def cli() -> None: parser = argparse.ArgumentParser( description="Copy a zarr hierarchy from one location to another" ) parser.add_argument("source", type=str, help="Path to the source zarr hierarchy") parser.add_argument("destination", type=str, help="Path to the destination zarr hierarchy") args = parser.parse_args() src, dst = args.source, args.destination root_src = zarr.open(src, mode="r") result = copy_node(node=root_src, store=dst, path="", overwrite=True) print(f"successfully created {result} at {dst}") def main() -> None: cli() if __name__ == "__main__": main() zarr-python-3.1.5/tests/test_regression/test_v2_dtype_regression.py000066400000000000000000000176331511007055700260160ustar00rootroot00000000000000import subprocess from dataclasses import dataclass from itertools import product from pathlib import Path from typing import TYPE_CHECKING, Literal import numpy as np import pytest from numcodecs import LZ4, LZMA, Blosc, GZip, VLenBytes, VLenUTF8, Zstd import zarr import zarr.abc import zarr.abc.codec import zarr.codecs as zarrcodecs from zarr.abc.numcodec import Numcodec from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding from zarr.core.dtype.npy.bytes import VariableLengthBytes from zarr.core.dtype.npy.string import VariableLengthUTF8 from zarr.storage import LocalStore from zarr.types import ArrayV2, ArrayV3 if TYPE_CHECKING: from zarr.core.dtype import ZDTypeLike ZarrPythonVersion = Literal["2.18", "3.0.8"] def runner_installed() -> bool: """ Check if a PEP-723 compliant python script runner is installed. """ try: subprocess.check_output(["uv", "--version"]) return True # noqa: TRY300 except FileNotFoundError: return False @dataclass(kw_only=True) class ArrayParams: values: np.ndarray[tuple[int], np.dtype[np.generic]] fill_value: np.generic | str | int | bytes filters: tuple[Numcodec, ...] = () serializer: str | None = None compressor: Numcodec basic_codecs: tuple[Numcodec, ...] = GZip(), Blosc(), LZ4(), LZMA(), Zstd() basic_dtypes = "|b", ">i2", ">i4", ">f4", ">f8", "c8", "c16", "M8[10us]", "m8[4ps]" string_dtypes = "U4" bytes_dtypes = ">S1", "V10", " ArrayV2: """ Writes a zarr array to a temporary directory based on the provided ArrayParams. The array is returned. """ dest = tmp_path / "in" store = LocalStore(dest) array_params: ArrayParams = request.param compressor = array_params.compressor chunk_key_encoding = V2ChunkKeyEncoding(separator="/") dtype: ZDTypeLike if array_params.values.dtype == np.dtype("|O") and array_params.serializer == "vlen-utf8": dtype = VariableLengthUTF8() # type: ignore[assignment] filters = array_params.filters + (VLenUTF8(),) elif array_params.values.dtype == np.dtype("|O") and array_params.serializer == "vlen-bytes": dtype = VariableLengthBytes() filters = array_params.filters + (VLenBytes(),) else: dtype = array_params.values.dtype filters = array_params.filters z = zarr.create_array( store, shape=array_params.values.shape, dtype=dtype, chunks=array_params.values.shape, compressors=compressor, filters=filters, fill_value=array_params.fill_value, order="C", chunk_key_encoding=chunk_key_encoding, write_data=True, zarr_format=2, ) z[:] = array_params.values return z @pytest.fixture def source_array_v3(tmp_path: Path, request: pytest.FixtureRequest) -> ArrayV3: """ Writes a zarr array to a temporary directory based on the provided ArrayParams. The array is returned. """ dest = tmp_path / "in" store = LocalStore(dest) array_params: ArrayParams = request.param chunk_key_encoding = V2ChunkKeyEncoding(separator="/") dtype: ZDTypeLike serializer: Literal["auto"] | zarr.abc.codec.Codec if array_params.values.dtype == np.dtype("|O") and array_params.serializer == "vlen-utf8": dtype = VariableLengthUTF8() # type: ignore[assignment] serializer = zarrcodecs.VLenUTF8Codec() elif array_params.values.dtype == np.dtype("|O") and array_params.serializer == "vlen-bytes": dtype = VariableLengthBytes() serializer = zarrcodecs.VLenBytesCodec() else: dtype = array_params.values.dtype serializer = "auto" if array_params.compressor == GZip(): compressor = zarrcodecs.GzipCodec() else: msg = ( "This test is only compatible with gzip compression at the moment, because the author" "did not want to implement a complete abstraction layer for v2 and v3 codecs in this test." ) raise ValueError(msg) z = zarr.create_array( store, shape=array_params.values.shape, dtype=dtype, chunks=array_params.values.shape, compressors=compressor, filters=array_params.filters, serializer=serializer, fill_value=array_params.fill_value, chunk_key_encoding=chunk_key_encoding, write_data=True, zarr_format=3, ) z[:] = array_params.values return z # TODO: make this dynamic based on the installed scripts script_paths = [Path(__file__).resolve().parent / "scripts" / "v2.18.py"] @pytest.mark.skipif(not runner_installed(), reason="no python script runner installed") @pytest.mark.parametrize( "source_array_v2", array_cases_v2_18, indirect=True, ids=tuple(map(str, array_cases_v2_18)) ) @pytest.mark.parametrize("script_path", script_paths) def test_roundtrip_v2(source_array_v2: ArrayV2, tmp_path: Path, script_path: Path) -> None: out_path = tmp_path / "out" copy_op = subprocess.run( [ "uv", "run", str(script_path), str(source_array_v2.store).removeprefix("file://"), str(out_path), ], capture_output=True, text=True, ) assert copy_op.returncode == 0 out_array = zarr.open_array(store=out_path, mode="r", zarr_format=2) assert source_array_v2.metadata.to_dict() == out_array.metadata.to_dict() assert np.array_equal(source_array_v2[:], out_array[:]) @pytest.mark.skipif(not runner_installed(), reason="no python script runner installed") @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize( "source_array_v3", array_cases_v3_08, indirect=True, ids=tuple(map(str, array_cases_v3_08)) ) def test_roundtrip_v3(source_array_v3: ArrayV3, tmp_path: Path) -> None: script_path = Path(__file__).resolve().parent / "scripts" / "v3.0.8.py" out_path = tmp_path / "out" copy_op = subprocess.run( [ "uv", "run", str(script_path), str(source_array_v3.store).removeprefix("file://"), str(out_path), ], capture_output=True, text=True, ) assert copy_op.returncode == 0 out_array = zarr.open_array(store=out_path, mode="r", zarr_format=3) assert source_array_v3.metadata.to_dict() == out_array.metadata.to_dict() assert np.array_equal(source_array_v3[:], out_array[:]) zarr-python-3.1.5/tests/test_store/000077500000000000000000000000001511007055700173535ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_store/__init__.py000066400000000000000000000000001511007055700214520ustar00rootroot00000000000000zarr-python-3.1.5/tests/test_store/test_core.py000066400000000000000000000236761511007055700217320ustar00rootroot00000000000000import tempfile from collections.abc import Callable, Generator from pathlib import Path from typing import Any, Literal import pytest from _pytest.compat import LEGACY_PATH import zarr from zarr import Group from zarr.core.common import AccessModeLiteral, ZarrFormat from zarr.storage import FsspecStore, LocalStore, MemoryStore, StoreLike, StorePath, ZipStore from zarr.storage._common import contains_array, contains_group, make_store_path from zarr.storage._utils import ( _join_paths, _normalize_path_keys, _normalize_paths, _relativize_path, normalize_path, ) @pytest.fixture( params=["none", "temp_dir_str", "temp_dir_path", "store_path", "memory_store", "dict"] ) def store_like( request: pytest.FixtureRequest, ) -> Generator[None | str | Path | StorePath | MemoryStore | dict[Any, Any], None, None]: if request.param == "none": yield None elif request.param == "temp_dir_str": with tempfile.TemporaryDirectory() as temp_dir: yield temp_dir elif request.param == "temp_dir_path": with tempfile.TemporaryDirectory() as temp_dir: yield Path(temp_dir) elif request.param == "store_path": yield StorePath(store=MemoryStore(store_dict={}), path="/") elif request.param == "memory_store": yield MemoryStore(store_dict={}) elif request.param == "dict": yield {} @pytest.mark.parametrize("path", ["foo", "foo/bar"]) @pytest.mark.parametrize("write_group", [True, False]) @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_contains_group( local_store: LocalStore, path: str, write_group: bool, zarr_format: ZarrFormat ) -> None: """ Test that the contains_group method correctly reports the existence of a group. """ root = Group.from_store(store=local_store, zarr_format=zarr_format) if write_group: root.create_group(path) store_path = StorePath(local_store, path=path) assert await contains_group(store_path, zarr_format=zarr_format) == write_group @pytest.mark.parametrize("path", ["foo", "foo/bar"]) @pytest.mark.parametrize("write_array", [True, False]) @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_contains_array( local_store: LocalStore, path: str, write_array: bool, zarr_format: ZarrFormat ) -> None: """ Test that the contains array method correctly reports the existence of an array. """ root = Group.from_store(store=local_store, zarr_format=zarr_format) if write_array: root.create_array(path, shape=(100,), chunks=(10,), dtype="i4") store_path = StorePath(local_store, path=path) assert await contains_array(store_path, zarr_format=zarr_format) == write_array @pytest.mark.parametrize("func", [contains_array, contains_group]) async def test_contains_invalid_format_raises( local_store: LocalStore, func: Callable[[Any], Any] ) -> None: """ Test contains_group and contains_array raise errors for invalid zarr_formats """ store_path = StorePath(local_store) with pytest.raises(ValueError): assert await func(store_path, zarr_format="3.0") # type: ignore[call-arg] @pytest.mark.parametrize("path", [None, "", "bar"]) async def test_make_store_path_none(path: str) -> None: """ Test that creating a store_path with None creates a memorystore """ store_path = await make_store_path(None, path=path) assert isinstance(store_path.store, MemoryStore) assert store_path.path == normalize_path(path) @pytest.mark.parametrize("path", [None, "", "bar"]) @pytest.mark.parametrize("store_type", [str, Path]) @pytest.mark.parametrize("mode", ["r", "w"]) async def test_make_store_path_local( tmpdir: LEGACY_PATH, store_type: type[str] | type[Path] | type[LocalStore], path: str, mode: AccessModeLiteral, ) -> None: """ Test the various ways of invoking make_store_path that create a LocalStore """ store_like = store_type(str(tmpdir)) store_path = await make_store_path(store_like, path=path, mode=mode) assert isinstance(store_path.store, LocalStore) assert Path(store_path.store.root) == Path(tmpdir) assert store_path.path == normalize_path(path) assert store_path.read_only == (mode == "r") @pytest.mark.parametrize("path", [None, "", "bar"]) @pytest.mark.parametrize("mode", ["r", "w"]) async def test_make_store_path_store_path( tmp_path: Path, path: str, mode: AccessModeLiteral ) -> None: """ Test invoking make_store_path when the input is another store_path. In particular we want to ensure that a new path is handled correctly. """ ro = mode == "r" store_like = await StorePath.open( LocalStore(str(tmp_path), read_only=ro), path="root", mode=mode ) store_path = await make_store_path(store_like, path=path, mode=mode) assert isinstance(store_path.store, LocalStore) assert Path(store_path.store.root) == tmp_path path_normalized = normalize_path(path) assert store_path.path == (store_like / path_normalized).path assert store_path.read_only == ro @pytest.mark.parametrize("modes", [(True, "w"), (False, "x")]) async def test_store_path_invalid_mode_raises( tmp_path: Path, modes: tuple[bool, Literal["w", "x"]] ) -> None: """ Test that ValueErrors are raise for invalid mode. """ with pytest.raises(ValueError): await StorePath.open( LocalStore(str(tmp_path), read_only=modes[0]), path="", mode=modes[1], # type:ignore[arg-type] ) async def test_make_store_path_invalid() -> None: """ Test that invalid types raise TypeError """ with pytest.raises(TypeError): await make_store_path(1) async def test_make_store_path_fsspec() -> None: pytest.importorskip("fsspec") pytest.importorskip("requests") pytest.importorskip("aiohttp") store_path = await make_store_path("http://foo.com/bar") assert isinstance(store_path.store, FsspecStore) async def test_make_store_path_storage_options_raises(store_like: StoreLike) -> None: with pytest.raises(TypeError, match="storage_options"): await make_store_path(store_like, storage_options={"foo": "bar"}) async def test_unsupported() -> None: with pytest.raises(TypeError, match="Unsupported type for store_like: 'int'"): await make_store_path(1) @pytest.mark.parametrize( "path", [ "/foo/bar", "//foo/bar", "foo///bar", "foo/bar///", Path("foo/bar"), b"foo/bar", ], ) def test_normalize_path_valid(path: str | bytes | Path) -> None: assert normalize_path(path) == "foo/bar" def test_normalize_path_upath() -> None: upath = pytest.importorskip("upath") assert normalize_path(upath.UPath("foo/bar")) == "foo/bar" def test_normalize_path_none() -> None: assert normalize_path(None) == "" @pytest.mark.parametrize("path", [".", ".."]) def test_normalize_path_invalid(path: str) -> None: with pytest.raises(ValueError): normalize_path(path) @pytest.mark.parametrize("paths", [("", "foo"), ("foo", "bar")]) def test_join_paths(paths: tuple[str, str]) -> None: """ Test that _join_paths joins paths in a way that is robust to an empty string """ observed = _join_paths(paths) if paths[0] == "": assert observed == paths[1] else: assert observed == "/".join(paths) class TestNormalizePaths: @staticmethod def test_valid() -> None: """ Test that path normalization works as expected """ paths = ["a", "b", "c", "d", "", "//a///b//"] assert _normalize_paths(paths) == tuple(normalize_path(p) for p in paths) @staticmethod @pytest.mark.parametrize("paths", [("", "/"), ("///a", "a")]) def test_invalid(paths: tuple[str, str]) -> None: """ Test that name collisions after normalization raise a ``ValueError`` """ msg = ( f"After normalization, the value '{paths[1]}' collides with '{paths[0]}'. " f"Both '{paths[1]}' and '{paths[0]}' normalize to the same value: '{normalize_path(paths[0])}'. " f"You should use either '{paths[1]}' or '{paths[0]}', but not both." ) with pytest.raises(ValueError, match=msg): _normalize_paths(paths) def test_normalize_path_keys() -> None: """ Test that ``_normalize_path_keys`` just applies the normalize_path function to each key of its input """ data = {"a": 10, "//b": 10} assert _normalize_path_keys(data) == {normalize_path(k): v for k, v in data.items()} @pytest.mark.parametrize( ("path", "prefix", "expected"), [ ("a", "", "a"), ("a/b/c", "a/b", "c"), ("a/b/c", "a", "b/c"), ], ) def test_relativize_path_valid(path: str, prefix: str, expected: str) -> None: """ Test the normal behavior of the _relativize_path function. Prefixes should be removed from the path argument. """ assert _relativize_path(path=path, prefix=prefix) == expected def test_relativize_path_invalid() -> None: path = "a/b/c" prefix = "b" msg = f"The first component of {path} does not start with {prefix}." with pytest.raises(ValueError, match=msg): _relativize_path(path="a/b/c", prefix="b") def test_different_open_mode(tmp_path: LEGACY_PATH) -> None: # Test with a store that implements .with_read_only() store = MemoryStore() zarr.create((100,), store=store, zarr_format=2, path="a") arr = zarr.open_array(store=store, path="a", zarr_format=2, mode="r") assert arr.store.read_only # Test with a store that doesn't implement .with_read_only() zarr_path = tmp_path / "foo.zarr" zip_store = ZipStore(zarr_path, mode="w") zarr.create((100,), store=zip_store, zarr_format=2, path="a") with pytest.raises( ValueError, match="Store is not read-only but mode is 'r'. Unable to create a read-only copy of the store. Please use a read-only store or a storage class that implements .with_read_only().", ): zarr.open_array(store=zip_store, path="a", zarr_format=2, mode="r") zarr-python-3.1.5/tests/test_store/test_fsspec.py000066400000000000000000000370411511007055700222540ustar00rootroot00000000000000from __future__ import annotations import json import os import re from typing import TYPE_CHECKING, Any import numpy as np import pytest from packaging.version import parse as parse_version import zarr.api.asynchronous from zarr import Array from zarr.abc.store import OffsetByteRequest from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.core.sync import _collect_aiterator, sync from zarr.errors import ZarrUserWarning from zarr.storage import FsspecStore from zarr.storage._fsspec import _make_async from zarr.testing.store import StoreTests if TYPE_CHECKING: import pathlib from collections.abc import Generator from pathlib import Path import botocore.client import s3fs from zarr.core.common import JSON # Warning filter due to https://github.com/boto/boto3/issues/3889 pytestmark = [ pytest.mark.filterwarnings( re.escape("ignore:datetime.datetime.utcnow() is deprecated:DeprecationWarning") ), # TODO: fix these warnings pytest.mark.filterwarnings("ignore:Unclosed client session:ResourceWarning"), pytest.mark.filterwarnings( "ignore:coroutine 'ClientCreatorContext.__aexit__' was never awaited:RuntimeWarning" ), ] fsspec = pytest.importorskip("fsspec") s3fs = pytest.importorskip("s3fs") requests = pytest.importorskip("requests") moto_server = pytest.importorskip("moto.moto_server.threaded_moto_server") moto = pytest.importorskip("moto") botocore = pytest.importorskip("botocore") # ### amended from s3fs ### # test_bucket_name = "test" secure_bucket_name = "test-secure" port = 5555 endpoint_url = f"http://127.0.0.1:{port}/" @pytest.fixture(scope="module") def s3_base() -> Generator[None, None, None]: # writable local S3 system # This fixture is module-scoped, meaning that we can reuse the MotoServer across all tests server = moto_server.ThreadedMotoServer(ip_address="127.0.0.1", port=port) server.start() if "AWS_SECRET_ACCESS_KEY" not in os.environ: os.environ["AWS_SECRET_ACCESS_KEY"] = "foo" if "AWS_ACCESS_KEY_ID" not in os.environ: os.environ["AWS_ACCESS_KEY_ID"] = "foo" yield server.stop() def get_boto3_client() -> botocore.client.BaseClient: # NB: we use the sync botocore client for setup session = botocore.session.Session() return session.create_client("s3", endpoint_url=endpoint_url) @pytest.fixture(autouse=True) def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]: """ Quoting Martin Durant: pytest-asyncio creates a new event loop for each async test. When an async-mode s3fs instance is made from async, it will be assigned to the loop from which it is made. That means that if you use s3fs again from a subsequent test, you will have the same identical instance, but be running on a different loop - which fails. For the rest: it's very convenient to clean up the state of the store between tests, make sure we start off blank each time. https://github.com/zarr-developers/zarr-python/pull/1785#discussion_r1634856207 """ client = get_boto3_client() client.create_bucket(Bucket=test_bucket_name, ACL="public-read") s3fs.S3FileSystem.clear_instance_cache() s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_url}) session = sync(s3.set_session()) s3.invalidate_cache() yield s3 requests.post(f"{endpoint_url}/moto-api/reset") client.close() sync(session.close()) # ### end from s3fs ### # async def test_basic() -> None: store = FsspecStore.from_url( f"s3://{test_bucket_name}/foo/spam/", storage_options={"endpoint_url": endpoint_url, "anon": False}, ) assert store.fs.asynchronous assert store.path == f"{test_bucket_name}/foo/spam" assert await _collect_aiterator(store.list()) == () assert not await store.exists("foo") data = b"hello" await store.set("foo", cpu.Buffer.from_bytes(data)) assert await store.exists("foo") buf = await store.get("foo", prototype=default_buffer_prototype()) assert buf is not None assert buf.to_bytes() == data out = await store.get_partial_values( prototype=default_buffer_prototype(), key_ranges=[("foo", OffsetByteRequest(1))] ) assert out[0] is not None assert out[0].to_bytes() == data[1:] class TestFsspecStoreS3(StoreTests[FsspecStore, cpu.Buffer]): store_cls = FsspecStore buffer_cls = cpu.Buffer @pytest.fixture def store_kwargs(self) -> dict[str, str | bool]: try: from fsspec import url_to_fs except ImportError: # before fsspec==2024.3.1 from fsspec.core import url_to_fs fs, path = url_to_fs( f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=True ) return {"fs": fs, "path": path} @pytest.fixture async def store(self, store_kwargs: dict[str, Any]) -> FsspecStore: return self.store_cls(**store_kwargs) async def get(self, store: FsspecStore, key: str) -> Buffer: # make a new, synchronous instance of the filesystem because this test is run in sync code new_fs = fsspec.filesystem( "s3", endpoint_url=store.fs.endpoint_url, anon=store.fs.anon, asynchronous=False ) return self.buffer_cls.from_bytes(new_fs.cat(f"{store.path}/{key}")) async def set(self, store: FsspecStore, key: str, value: Buffer) -> None: # make a new, synchronous instance of the filesystem because this test is run in sync code new_fs = fsspec.filesystem( "s3", endpoint_url=store.fs.endpoint_url, anon=store.fs.anon, asynchronous=False ) new_fs.write_bytes(f"{store.path}/{key}", value.to_bytes()) def test_store_repr(self, store: FsspecStore) -> None: assert str(store) == "" def test_store_supports_writes(self, store: FsspecStore) -> None: assert store.supports_writes def test_store_supports_listing(self, store: FsspecStore) -> None: assert store.supports_listing async def test_fsspec_store_from_uri(self, store: FsspecStore) -> None: storage_options = { "endpoint_url": endpoint_url, "anon": False, } meta: dict[str, JSON] = { "attributes": {"key": "value"}, "zarr_format": 3, "node_type": "group", } await store.set( "zarr.json", self.buffer_cls.from_bytes(json.dumps(meta).encode()), ) group = await zarr.api.asynchronous.open_group( store=f"s3://{test_bucket_name}", storage_options=storage_options ) assert dict(group.attrs) == {"key": "value"} meta = { "attributes": {"key": "value-2"}, "zarr_format": 3, "node_type": "group", } await store.set( "directory-2/zarr.json", self.buffer_cls.from_bytes(json.dumps(meta).encode()), ) group = await zarr.api.asynchronous.open_group( store=f"s3://{test_bucket_name}/directory-2", storage_options=storage_options ) assert dict(group.attrs) == {"key": "value-2"} meta = { "attributes": {"key": "value-3"}, "zarr_format": 3, "node_type": "group", } await store.set( "directory-3/zarr.json", self.buffer_cls.from_bytes(json.dumps(meta).encode()), ) group = await zarr.api.asynchronous.open_group( store=f"s3://{test_bucket_name}", path="directory-3", storage_options=storage_options ) assert dict(group.attrs) == {"key": "value-3"} @pytest.mark.skipif( parse_version(fsspec.__version__) < parse_version("2024.03.01"), reason="Prior bug in from_upath", ) def test_from_upath(self) -> None: upath = pytest.importorskip("upath") path = upath.UPath( f"s3://{test_bucket_name}/foo/bar/", endpoint_url=endpoint_url, anon=False, asynchronous=True, ) result = FsspecStore.from_upath(path) assert result.fs.endpoint_url == endpoint_url assert result.fs.asynchronous assert result.path == f"{test_bucket_name}/foo/bar" def test_init_warns_if_fs_asynchronous_is_false(self) -> None: try: from fsspec import url_to_fs except ImportError: # before fsspec==2024.3.1 from fsspec.core import url_to_fs fs, path = url_to_fs( f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=False ) store_kwargs = {"fs": fs, "path": path} with pytest.warns(ZarrUserWarning, match=r".* was not created with `asynchronous=True`.*"): self.store_cls(**store_kwargs) async def test_empty_nonexistent_path(self, store_kwargs: dict[str, Any]) -> None: # regression test for https://github.com/zarr-developers/zarr-python/pull/2343 store_kwargs["path"] += "/abc" store = await self.store_cls.open(**store_kwargs) assert await store.is_empty("") async def test_delete_dir_unsupported_deletes(self, store: FsspecStore) -> None: store.supports_deletes = False with pytest.raises( NotImplementedError, match="This method is only available for stores that support deletes.", ): await store.delete_dir("test_prefix") def array_roundtrip(store: FsspecStore) -> None: """ Round trip an array using a Zarr store Args: store: FsspecStore """ data = np.ones((3, 3)) arr = zarr.create_array(store=store, overwrite=True, data=data) assert isinstance(arr, Array) # Read set values arr2 = zarr.open_array(store=store) assert isinstance(arr2, Array) np.testing.assert_array_equal(arr[:], data) @pytest.mark.skipif( parse_version(fsspec.__version__) < parse_version("2024.12.0"), reason="No AsyncFileSystemWrapper", ) def test_wrap_sync_filesystem(tmp_path: pathlib.Path) -> None: """The local fs is not async so we should expect it to be wrapped automatically""" from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper store = FsspecStore.from_url(f"file://{tmp_path}", storage_options={"auto_mkdir": True}) assert isinstance(store.fs, AsyncFileSystemWrapper) assert store.fs.async_impl array_roundtrip(store) @pytest.mark.skipif( parse_version(fsspec.__version__) >= parse_version("2024.12.0"), reason="No AsyncFileSystemWrapper", ) def test_wrap_sync_filesystem_raises(tmp_path: pathlib.Path) -> None: """The local fs is not async so we should expect it to be wrapped automatically""" with pytest.raises(ImportError, match="The filesystem .*"): FsspecStore.from_url(f"file://{tmp_path}", storage_options={"auto_mkdir": True}) @pytest.mark.skipif( parse_version(fsspec.__version__) < parse_version("2024.12.0"), reason="No AsyncFileSystemWrapper", ) def test_no_wrap_async_filesystem() -> None: """An async fs should not be wrapped automatically; fsspec's s3 filesystem is such an fs""" from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper store = FsspecStore.from_url( f"s3://{test_bucket_name}/foo/spam/", storage_options={"endpoint_url": endpoint_url, "anon": False, "asynchronous": True}, read_only=False, ) assert not isinstance(store.fs, AsyncFileSystemWrapper) assert store.fs.async_impl array_roundtrip(store) @pytest.mark.skipif( parse_version(fsspec.__version__) < parse_version("2024.12.0"), reason="No AsyncFileSystemWrapper", ) def test_open_fsmap_file(tmp_path: pathlib.Path) -> None: min_fsspec_with_async_wrapper = parse_version("2024.12.0") current_version = parse_version(fsspec.__version__) fs = fsspec.filesystem("file", auto_mkdir=True) mapper = fs.get_mapper(tmp_path) if current_version < min_fsspec_with_async_wrapper: # Expect ImportError for older versions with pytest.raises( ImportError, match=r"The filesystem .* is synchronous, and the required AsyncFileSystemWrapper is not available.*", ): array_roundtrip(mapper) else: # Newer versions should work array_roundtrip(mapper) @pytest.mark.skipif( parse_version(fsspec.__version__) < parse_version("2024.12.0"), reason="No AsyncFileSystemWrapper", ) def test_open_fsmap_file_raises(tmp_path: pathlib.Path) -> None: fsspec = pytest.importorskip("fsspec.implementations.local") fs = fsspec.LocalFileSystem(auto_mkdir=False) mapper = fs.get_mapper(tmp_path) with pytest.raises(FileNotFoundError, match="No such file or directory: .*"): array_roundtrip(mapper) @pytest.mark.parametrize("asynchronous", [True, False]) def test_open_fsmap_s3(asynchronous: bool) -> None: s3_filesystem = s3fs.S3FileSystem( asynchronous=asynchronous, endpoint_url=endpoint_url, anon=False ) mapper = s3_filesystem.get_mapper(f"s3://{test_bucket_name}/map/foo/") array_roundtrip(mapper) def test_open_s3map_raises() -> None: with pytest.raises(TypeError, match="Unsupported type for store_like:.*"): zarr.open(store=0, mode="w", shape=(3, 3)) s3_filesystem = s3fs.S3FileSystem(asynchronous=True, endpoint_url=endpoint_url, anon=False) mapper = s3_filesystem.get_mapper(f"s3://{test_bucket_name}/map/foo/") with pytest.raises( ValueError, match="'path' was provided but is not used for FSMap store_like objects" ): zarr.open(store=mapper, path="bar", mode="w", shape=(3, 3)) with pytest.raises( TypeError, match="'storage_options' is only used when the store is passed as a FSSpec URI string.", ): zarr.open(store=mapper, storage_options={"anon": True}, mode="w", shape=(3, 3)) @pytest.mark.parametrize("asynchronous", [True, False]) def test_make_async(asynchronous: bool) -> None: s3_filesystem = s3fs.S3FileSystem( asynchronous=asynchronous, endpoint_url=endpoint_url, anon=False ) fs = _make_async(s3_filesystem) assert fs.asynchronous @pytest.mark.skipif( parse_version(fsspec.__version__) < parse_version("2024.12.0"), reason="No AsyncFileSystemWrapper", ) async def test_delete_dir_wrapped_filesystem(tmp_path: Path) -> None: from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper from fsspec.implementations.local import LocalFileSystem wrapped_fs = AsyncFileSystemWrapper(LocalFileSystem(auto_mkdir=True)) store = FsspecStore(wrapped_fs, read_only=False, path=f"{tmp_path}/test/path") assert isinstance(store.fs, AsyncFileSystemWrapper) assert store.fs.asynchronous await store.set("zarr.json", cpu.Buffer.from_bytes(b"root")) await store.set("foo-bar/zarr.json", cpu.Buffer.from_bytes(b"root")) await store.set("foo/zarr.json", cpu.Buffer.from_bytes(b"bar")) await store.set("foo/c/0", cpu.Buffer.from_bytes(b"chunk")) await store.delete_dir("foo") assert await store.exists("zarr.json") assert await store.exists("foo-bar/zarr.json") assert not await store.exists("foo/zarr.json") assert not await store.exists("foo/c/0") @pytest.mark.skipif( parse_version(fsspec.__version__) < parse_version("2024.12.0"), reason="No AsyncFileSystemWrapper", ) async def test_with_read_only_auto_mkdir(tmp_path: Path) -> None: """ Test that creating a read-only copy of a store backed by the local file system does not error if auto_mkdir is False. """ store_w = FsspecStore.from_url(f"file://{tmp_path}", storage_options={"auto_mkdir": False}) _ = store_w.with_read_only() zarr-python-3.1.5/tests/test_store/test_local.py000066400000000000000000000126051511007055700220620ustar00rootroot00000000000000from __future__ import annotations import pathlib import re import numpy as np import pytest import zarr from zarr import create_array from zarr.core.buffer import Buffer, cpu from zarr.storage import LocalStore from zarr.storage._local import _atomic_write from zarr.testing.store import StoreTests from zarr.testing.utils import assert_bytes_equal class TestLocalStore(StoreTests[LocalStore, cpu.Buffer]): store_cls = LocalStore buffer_cls = cpu.Buffer async def get(self, store: LocalStore, key: str) -> Buffer: return self.buffer_cls.from_bytes((store.root / key).read_bytes()) async def set(self, store: LocalStore, key: str, value: Buffer) -> None: parent = (store.root / key).parent if not parent.exists(): parent.mkdir(parents=True) (store.root / key).write_bytes(value.to_bytes()) @pytest.fixture def store_kwargs(self, tmpdir: str) -> dict[str, str]: return {"root": str(tmpdir)} def test_store_repr(self, store: LocalStore) -> None: assert str(store) == f"file://{store.root.as_posix()}" def test_store_supports_writes(self, store: LocalStore) -> None: assert store.supports_writes def test_store_supports_listing(self, store: LocalStore) -> None: assert store.supports_listing async def test_empty_with_empty_subdir(self, store: LocalStore) -> None: assert await store.is_empty("") (store.root / "foo/bar").mkdir(parents=True) assert await store.is_empty("") def test_creates_new_directory(self, tmp_path: pathlib.Path) -> None: target = tmp_path.joinpath("a", "b", "c") assert not target.exists() store = self.store_cls(root=target) zarr.group(store=store) def test_invalid_root_raises(self) -> None: """ Test that a TypeError is raised when a non-str/Path type is used for the `root` argument """ with pytest.raises( TypeError, match=r"'root' must be a string or Path instance. Got an instance of instead.", ): LocalStore(root=0) # type: ignore[arg-type] async def test_get_with_prototype_default(self, store: LocalStore) -> None: """ Ensure that data can be read via ``store.get`` if the prototype keyword argument is unspecified, i.e. set to ``None``. """ data_buf = self.buffer_cls.from_bytes(b"\x01\x02\x03\x04") key = "c/0" await self.set(store, key, data_buf) observed = await store.get(key, prototype=None) assert_bytes_equal(observed, data_buf) @pytest.mark.parametrize("ndim", [0, 1, 3]) @pytest.mark.parametrize( "destination", ["destination", "foo/bar/destintion", pathlib.Path("foo/bar/destintion")] ) async def test_move( self, tmp_path: pathlib.Path, ndim: int, destination: pathlib.Path | str ) -> None: origin = tmp_path / "origin" if isinstance(destination, str): destination = str(tmp_path / destination) else: destination = tmp_path / destination print(type(destination)) store = await LocalStore.open(root=origin) shape = (4,) * ndim chunks = (2,) * ndim data = np.arange(4**ndim) if ndim > 0: data = data.reshape(*shape) array = create_array(store, data=data, chunks=chunks or "auto") await store.move(destination) assert store.root == pathlib.Path(destination) assert pathlib.Path(destination).exists() assert not origin.exists() assert np.array_equal(array[...], data) store2 = await LocalStore.open(root=origin) with pytest.raises( FileExistsError, match=re.escape(f"Destination root {destination} already exists") ): await store2.move(destination) @pytest.mark.parametrize("exclusive", [True, False]) def test_atomic_write_successful(tmp_path: pathlib.Path, exclusive: bool) -> None: path = tmp_path / "data" with _atomic_write(path, "wb", exclusive=exclusive) as f: f.write(b"abc") assert path.read_bytes() == b"abc" assert list(path.parent.iterdir()) == [path] # no temp files @pytest.mark.parametrize("exclusive", [True, False]) def test_atomic_write_incomplete(tmp_path: pathlib.Path, exclusive: bool) -> None: path = tmp_path / "data" with pytest.raises(RuntimeError): # noqa: PT012 with _atomic_write(path, "wb", exclusive=exclusive) as f: f.write(b"a") raise RuntimeError assert not path.exists() assert list(path.parent.iterdir()) == [] # no temp files def test_atomic_write_non_exclusive_preexisting(tmp_path: pathlib.Path) -> None: path = tmp_path / "data" with path.open("wb") as f: f.write(b"xyz") assert path.read_bytes() == b"xyz" with _atomic_write(path, "wb", exclusive=False) as f: f.write(b"abc") assert path.read_bytes() == b"abc" assert list(path.parent.iterdir()) == [path] # no temp files def test_atomic_write_exclusive_preexisting(tmp_path: pathlib.Path) -> None: path = tmp_path / "data" with path.open("wb") as f: f.write(b"xyz") assert path.read_bytes() == b"xyz" with pytest.raises(FileExistsError): with _atomic_write(path, "wb", exclusive=True) as f: f.write(b"abc") assert path.read_bytes() == b"xyz" assert list(path.parent.iterdir()) == [path] # no temp files zarr-python-3.1.5/tests/test_store/test_logging.py000066400000000000000000000121521511007055700224130ustar00rootroot00000000000000from __future__ import annotations import logging from typing import TYPE_CHECKING, TypedDict import pytest import zarr from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.storage import LocalStore, LoggingStore from zarr.testing.store import StoreTests if TYPE_CHECKING: from pathlib import Path from zarr.abc.store import Store class StoreKwargs(TypedDict): store: LocalStore log_level: str class TestLoggingStore(StoreTests[LoggingStore[LocalStore], cpu.Buffer]): # store_cls is needed to do an isintsance check, so can't be a subscripted generic store_cls = LoggingStore # type: ignore[assignment] buffer_cls = cpu.Buffer async def get(self, store: LoggingStore[LocalStore], key: str) -> Buffer: return self.buffer_cls.from_bytes((store._store.root / key).read_bytes()) async def set(self, store: LoggingStore[LocalStore], key: str, value: Buffer) -> None: parent = (store._store.root / key).parent if not parent.exists(): parent.mkdir(parents=True) (store._store.root / key).write_bytes(value.to_bytes()) @pytest.fixture def store_kwargs(self, tmp_path: Path) -> StoreKwargs: return {"store": LocalStore(str(tmp_path)), "log_level": "DEBUG"} @pytest.fixture def open_kwargs(self, tmp_path: Path) -> dict[str, type[LocalStore] | str]: return {"store_cls": LocalStore, "root": str(tmp_path), "log_level": "DEBUG"} @pytest.fixture def store(self, store_kwargs: StoreKwargs) -> LoggingStore[LocalStore]: return self.store_cls(**store_kwargs) def test_store_supports_writes(self, store: LoggingStore[LocalStore]) -> None: assert store.supports_writes def test_store_supports_listing(self, store: LoggingStore[LocalStore]) -> None: assert store.supports_listing def test_store_repr(self, store: LoggingStore[LocalStore]) -> None: assert f"{store!r}" == f"LoggingStore(LocalStore, 'file://{store._store.root.as_posix()}')" def test_store_str(self, store: LoggingStore[LocalStore]) -> None: assert str(store) == f"logging-file://{store._store.root.as_posix()}" async def test_default_handler( self, local_store: LocalStore, capsys: pytest.CaptureFixture[str] ) -> None: # Store and then remove existing handlers to enter default handler code path handlers = logging.getLogger().handlers[:] for h in handlers: logging.getLogger().removeHandler(h) # Test logs are sent to stdout wrapped = LoggingStore(store=local_store) buffer = default_buffer_prototype().buffer res = await wrapped.set("foo/bar/c/0", buffer.from_bytes(b"\x01\x02\x03\x04")) # type: ignore[func-returns-value] assert res is None captured = capsys.readouterr() assert len(captured) == 2 assert "Calling LocalStore.set" in captured.out assert "Finished LocalStore.set" in captured.out # Restore handlers for h in handlers: logging.getLogger().addHandler(h) def test_is_open_setter_raises(self, store: LoggingStore[LocalStore]) -> None: "Test that a user cannot change `_is_open` without opening the underlying store." with pytest.raises( NotImplementedError, match="LoggingStore must be opened via the `_open` method" ): store._is_open = True @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) async def test_logging_store(store: Store, caplog: pytest.LogCaptureFixture) -> None: wrapped = LoggingStore(store=store, log_level="DEBUG") buffer = default_buffer_prototype().buffer caplog.clear() res = await wrapped.set("foo/bar/c/0", buffer.from_bytes(b"\x01\x02\x03\x04")) # type: ignore[func-returns-value] assert res is None assert len(caplog.record_tuples) == 2 for tup in caplog.record_tuples: assert str(store) in tup[0] assert f"Calling {type(store).__name__}.set" in caplog.record_tuples[0][2] assert f"Finished {type(store).__name__}.set" in caplog.record_tuples[1][2] caplog.clear() keys = [k async for k in wrapped.list()] assert keys == ["foo/bar/c/0"] assert len(caplog.record_tuples) == 2 for tup in caplog.record_tuples: assert str(store) in tup[0] assert f"Calling {type(store).__name__}.list" in caplog.record_tuples[0][2] assert f"Finished {type(store).__name__}.list" in caplog.record_tuples[1][2] @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"]) async def test_logging_store_counter(store: Store) -> None: wrapped = LoggingStore(store=store, log_level="DEBUG") arr = zarr.create(shape=(10,), store=wrapped, overwrite=True) arr[:] = 1 assert wrapped.counter["set"] == 2 assert wrapped.counter["list"] == 0 assert wrapped.counter["list_dir"] == 0 assert wrapped.counter["list_prefix"] == 0 if store.supports_deletes: assert wrapped.counter["get"] == 0 # 1 if overwrite=False assert wrapped.counter["delete_dir"] == 1 else: assert wrapped.counter["get"] == 1 assert wrapped.counter["delete_dir"] == 0 zarr-python-3.1.5/tests/test_store/test_memory.py000066400000000000000000000107131511007055700222760ustar00rootroot00000000000000from __future__ import annotations import re from typing import TYPE_CHECKING, Any import numpy as np import numpy.typing as npt import pytest import zarr from zarr.core.buffer import Buffer, cpu, gpu from zarr.errors import ZarrUserWarning from zarr.storage import GpuMemoryStore, MemoryStore from zarr.testing.store import StoreTests from zarr.testing.utils import gpu_test if TYPE_CHECKING: from zarr.core.common import ZarrFormat # TODO: work out where this warning is coming from and fix it @pytest.mark.filterwarnings( re.escape("ignore:coroutine 'ClientCreatorContext.__aexit__' was never awaited") ) class TestMemoryStore(StoreTests[MemoryStore, cpu.Buffer]): store_cls = MemoryStore buffer_cls = cpu.Buffer async def set(self, store: MemoryStore, key: str, value: Buffer) -> None: store._store_dict[key] = value async def get(self, store: MemoryStore, key: str) -> Buffer: return store._store_dict[key] @pytest.fixture(params=[None, True]) def store_kwargs(self, request: pytest.FixtureRequest) -> dict[str, Any]: kwargs: dict[str, Any] if request.param is True: kwargs = {"store_dict": {}} else: kwargs = {"store_dict": None} return kwargs @pytest.fixture async def store(self, store_kwargs: dict[str, Any]) -> MemoryStore: return self.store_cls(**store_kwargs) def test_store_repr(self, store: MemoryStore) -> None: assert str(store) == f"memory://{id(store._store_dict)}" def test_store_supports_writes(self, store: MemoryStore) -> None: assert store.supports_writes def test_store_supports_listing(self, store: MemoryStore) -> None: assert store.supports_listing async def test_list_prefix(self, store: MemoryStore) -> None: assert True @pytest.mark.parametrize("dtype", ["uint8", "float32", "int64"]) @pytest.mark.parametrize("zarr_format", [2, 3]) async def test_deterministic_size( self, store: MemoryStore, dtype: npt.DTypeLike, zarr_format: ZarrFormat ) -> None: a = zarr.empty( store=store, shape=(3,), chunks=(1000,), dtype=dtype, zarr_format=zarr_format, overwrite=True, ) a[...] = 1 a.resize((1000,)) np.testing.assert_array_equal(a[:3], 1) np.testing.assert_array_equal(a[3:], 0) # TODO: fix this warning @pytest.mark.filterwarnings("ignore:Unclosed client session:ResourceWarning") @gpu_test class TestGpuMemoryStore(StoreTests[GpuMemoryStore, gpu.Buffer]): store_cls = GpuMemoryStore buffer_cls = gpu.Buffer async def set(self, store: GpuMemoryStore, key: str, value: gpu.Buffer) -> None: # type: ignore[override] store._store_dict[key] = value async def get(self, store: MemoryStore, key: str) -> Buffer: return store._store_dict[key] @pytest.fixture(params=[None, True]) def store_kwargs(self, request: pytest.FixtureRequest) -> dict[str, Any]: kwargs: dict[str, Any] if request.param is True: kwargs = {"store_dict": {}} else: kwargs = {"store_dict": None} return kwargs @pytest.fixture async def store(self, store_kwargs: dict[str, Any]) -> GpuMemoryStore: return self.store_cls(**store_kwargs) def test_store_repr(self, store: GpuMemoryStore) -> None: assert str(store) == f"gpumemory://{id(store._store_dict)}" def test_store_supports_writes(self, store: GpuMemoryStore) -> None: assert store.supports_writes def test_store_supports_listing(self, store: GpuMemoryStore) -> None: assert store.supports_listing async def test_list_prefix(self, store: GpuMemoryStore) -> None: assert True def test_dict_reference(self, store: GpuMemoryStore) -> None: store_dict: dict[str, Any] = {} result = GpuMemoryStore(store_dict=store_dict) assert result._store_dict is store_dict def test_from_dict(self) -> None: d = { "a": gpu.Buffer.from_bytes(b"aaaa"), "b": cpu.Buffer.from_bytes(b"bbbb"), } msg = "Creating a zarr.buffer.gpu.Buffer with an array that does not support the __cuda_array_interface__ for zero-copy transfers, falling back to slow copy based path" with pytest.warns(ZarrUserWarning, match=msg): result = GpuMemoryStore.from_dict(d) for v in result._store_dict.values(): assert type(v) is gpu.Buffer zarr-python-3.1.5/tests/test_store/test_object.py000066400000000000000000000101001511007055700222220ustar00rootroot00000000000000# ruff: noqa: E402 from pathlib import Path from typing import TypedDict import pytest obstore = pytest.importorskip("obstore") from hypothesis.stateful import ( run_state_machine_as_test, ) from obstore.store import LocalStore, MemoryStore from zarr.core.buffer import Buffer, cpu from zarr.storage import ObjectStore from zarr.testing.stateful import ZarrHierarchyStateMachine from zarr.testing.store import StoreTests class StoreKwargs(TypedDict): store: LocalStore read_only: bool class TestObjectStore(StoreTests[ObjectStore[LocalStore], cpu.Buffer]): # store_cls is needed to do an isintsance check, so can't be a subscripted generic store_cls = ObjectStore # type: ignore[assignment] buffer_cls = cpu.Buffer @pytest.fixture def store_kwargs(self, tmp_path: Path) -> StoreKwargs: store = LocalStore(prefix=tmp_path) return {"store": store, "read_only": False} @pytest.fixture def store(self, store_kwargs: StoreKwargs) -> ObjectStore[LocalStore]: return self.store_cls(**store_kwargs) async def get(self, store: ObjectStore[LocalStore], key: str) -> Buffer: assert isinstance(store.store, LocalStore) new_local_store = LocalStore(prefix=store.store.prefix) return self.buffer_cls.from_bytes(obstore.get(new_local_store, key).bytes()) async def set(self, store: ObjectStore[LocalStore], key: str, value: Buffer) -> None: assert isinstance(store.store, LocalStore) new_local_store = LocalStore(prefix=store.store.prefix) obstore.put(new_local_store, key, value.to_bytes()) def test_store_repr(self, store: ObjectStore[LocalStore]) -> None: from fnmatch import fnmatch pattern = "ObjectStore(object_store://LocalStore(*))" assert fnmatch(f"{store!r}", pattern) def test_store_supports_writes(self, store: ObjectStore[LocalStore]) -> None: assert store.supports_writes def test_store_supports_partial_writes(self, store: ObjectStore[LocalStore]) -> None: assert not store.supports_partial_writes def test_store_supports_listing(self, store: ObjectStore[LocalStore]) -> None: assert store.supports_listing def test_store_equal(self, store: ObjectStore[LocalStore]) -> None: """Test store equality""" # Test equality against a different instance type assert store != 0 # Test equality against a different store type new_memory_store = ObjectStore(MemoryStore()) assert store != new_memory_store # Test equality against a read only store assert isinstance(store.store, LocalStore) new_local_store = ObjectStore(LocalStore(prefix=store.store.prefix), read_only=True) assert store != new_local_store # Test two memory stores cannot be equal second_memory_store = ObjectStore(MemoryStore()) assert new_memory_store != second_memory_store def test_store_init_raises(self) -> None: """Test __init__ raises appropriate error for improper store type""" with pytest.raises(TypeError): ObjectStore("path/to/store") # type: ignore[type-var] async def test_store_getsize(self, store: ObjectStore[LocalStore]) -> None: buf = cpu.Buffer.from_bytes(b"\x01\x02\x03\x04") await self.set(store, "key", buf) size = await store.getsize("key") assert size == len(buf) async def test_store_getsize_prefix(self, store: ObjectStore[LocalStore]) -> None: buf = cpu.Buffer.from_bytes(b"\x01\x02\x03\x04") await self.set(store, "c/key1/0", buf) await self.set(store, "c/key2/0", buf) size = await store.getsize_prefix("c/key1") assert size == len(buf) total_size = await store.getsize_prefix("c") assert total_size == len(buf) * 2 @pytest.mark.slow_hypothesis def test_zarr_hierarchy() -> None: sync_store = ObjectStore(MemoryStore()) def mk_test_instance_sync() -> ZarrHierarchyStateMachine: return ZarrHierarchyStateMachine(sync_store) run_state_machine_as_test(mk_test_instance_sync) # type: ignore[no-untyped-call] zarr-python-3.1.5/tests/test_store/test_stateful.py000066400000000000000000000031631511007055700226160ustar00rootroot00000000000000# Stateful tests for arbitrary Zarr stores. import pytest from hypothesis.stateful import ( run_state_machine_as_test, ) from zarr.abc.store import Store from zarr.storage import LocalStore, ZipStore from zarr.testing.stateful import ZarrHierarchyStateMachine, ZarrStoreStateMachine pytestmark = [ pytest.mark.slow_hypothesis, # TODO: work out where this warning is coming from and fix pytest.mark.filterwarnings("ignore:Unclosed client session:ResourceWarning"), ] @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") def test_zarr_hierarchy(sync_store: Store) -> None: def mk_test_instance_sync() -> ZarrHierarchyStateMachine: return ZarrHierarchyStateMachine(sync_store) if isinstance(sync_store, ZipStore): pytest.skip(reason="ZipStore does not support delete") run_state_machine_as_test(mk_test_instance_sync) # type: ignore[no-untyped-call] def test_zarr_store(sync_store: Store) -> None: def mk_test_instance_sync() -> ZarrStoreStateMachine: return ZarrStoreStateMachine(sync_store) if isinstance(sync_store, ZipStore): pytest.skip(reason="ZipStore does not support delete") if isinstance(sync_store, LocalStore): # This test uses arbitrary keys, which are passed to `set` and `delete`. # It assumes that `set` and `delete` are the only two operations that modify state. # But LocalStore, directories can hang around even after a key is delete-d. pytest.skip(reason="Test isn't suitable for LocalStore.") run_state_machine_as_test(mk_test_instance_sync) # type: ignore[no-untyped-call] zarr-python-3.1.5/tests/test_store/test_wrapper.py000066400000000000000000000107261511007055700224520ustar00rootroot00000000000000from __future__ import annotations from typing import TYPE_CHECKING, Any, TypedDict import pytest from zarr.abc.store import ByteRequest, Store from zarr.core.buffer import Buffer from zarr.core.buffer.cpu import Buffer as CPUBuffer from zarr.core.buffer.cpu import buffer_prototype from zarr.storage import LocalStore, WrapperStore from zarr.testing.store import StoreTests if TYPE_CHECKING: from pathlib import Path from zarr.core.buffer.core import BufferPrototype class StoreKwargs(TypedDict): store: LocalStore class OpenKwargs(TypedDict): store_cls: type[LocalStore] root: str # TODO: fix this warning @pytest.mark.filterwarnings( "ignore:coroutine 'ClientCreatorContext.__aexit__' was never awaited:RuntimeWarning" ) class TestWrapperStore(StoreTests[WrapperStore[Any], Buffer]): store_cls = WrapperStore buffer_cls = CPUBuffer async def get(self, store: WrapperStore[LocalStore], key: str) -> Buffer: return self.buffer_cls.from_bytes((store._store.root / key).read_bytes()) async def set(self, store: WrapperStore[LocalStore], key: str, value: Buffer) -> None: parent = (store._store.root / key).parent if not parent.exists(): parent.mkdir(parents=True) (store._store.root / key).write_bytes(value.to_bytes()) @pytest.fixture def store_kwargs(self, tmp_path: Path) -> StoreKwargs: return {"store": LocalStore(str(tmp_path))} @pytest.fixture def open_kwargs(self, tmp_path: Path) -> OpenKwargs: return {"store_cls": LocalStore, "root": str(tmp_path)} def test_store_supports_writes(self, store: WrapperStore[LocalStore]) -> None: assert store.supports_writes def test_store_supports_listing(self, store: WrapperStore[LocalStore]) -> None: assert store.supports_listing def test_store_repr(self, store: WrapperStore[LocalStore]) -> None: assert f"{store!r}" == f"WrapperStore(LocalStore, 'file://{store._store.root.as_posix()}')" def test_store_str(self, store: WrapperStore[LocalStore]) -> None: assert str(store) == f"wrapping-file://{store._store.root.as_posix()}" def test_check_writeable(self, store: WrapperStore[LocalStore]) -> None: """ Test _check_writeable() runs without errors. """ store._check_writable() def test_close(self, store: WrapperStore[LocalStore]) -> None: "Test store can be closed" store.close() assert not store._is_open def test_is_open_setter_raises(self, store: WrapperStore[LocalStore]) -> None: """ Test that a user cannot change `_is_open` without opening the underlying store. """ with pytest.raises( NotImplementedError, match="WrapperStore must be opened via the `_open` method" ): store._is_open = True # TODO: work out where warning is coming from and fix @pytest.mark.filterwarnings( "ignore:coroutine 'ClientCreatorContext.__aexit__' was never awaited:RuntimeWarning" ) @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=True) async def test_wrapped_set(store: Store, capsys: pytest.CaptureFixture[str]) -> None: # define a class that prints when it sets class NoisySetter(WrapperStore[Store]): async def set(self, key: str, value: Buffer) -> None: print(f"setting {key}") await super().set(key, value) key = "foo" value = CPUBuffer.from_bytes(b"bar") store_wrapped = NoisySetter(store) await store_wrapped.set(key, value) captured = capsys.readouterr() assert f"setting {key}" in captured.out assert await store_wrapped.get(key, buffer_prototype) == value @pytest.mark.filterwarnings("ignore:Unclosed client session:ResourceWarning") @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=True) async def test_wrapped_get(store: Store, capsys: pytest.CaptureFixture[str]) -> None: # define a class that prints when it sets class NoisyGetter(WrapperStore[Any]): async def get( self, key: str, prototype: BufferPrototype, byte_range: ByteRequest | None = None ) -> None: print(f"getting {key}") await super().get(key, prototype=prototype, byte_range=byte_range) key = "foo" value = CPUBuffer.from_bytes(b"bar") store_wrapped = NoisyGetter(store) await store_wrapped.set(key, value) await store_wrapped.get(key, buffer_prototype) captured = capsys.readouterr() assert f"getting {key}" in captured.out zarr-python-3.1.5/tests/test_store/test_zip.py000066400000000000000000000124521511007055700215720ustar00rootroot00000000000000from __future__ import annotations import os import shutil import tempfile import zipfile from typing import TYPE_CHECKING import numpy as np import pytest import zarr from zarr import create_array from zarr.core.buffer import Buffer, cpu, default_buffer_prototype from zarr.core.group import Group from zarr.storage import ZipStore from zarr.testing.store import StoreTests if TYPE_CHECKING: from pathlib import Path from typing import Any # TODO: work out where this is coming from and fix pytestmark = [ pytest.mark.filterwarnings( "ignore:coroutine method 'aclose' of 'ZipStore.list' was never awaited:RuntimeWarning" ) ] class TestZipStore(StoreTests[ZipStore, cpu.Buffer]): store_cls = ZipStore buffer_cls = cpu.Buffer @pytest.fixture def store_kwargs(self) -> dict[str, str | bool]: fd, temp_path = tempfile.mkstemp() os.close(fd) os.unlink(temp_path) return {"path": temp_path, "mode": "w", "read_only": False} async def get(self, store: ZipStore, key: str) -> Buffer: buf = store._get(key, prototype=default_buffer_prototype()) assert buf is not None return buf async def set(self, store: ZipStore, key: str, value: Buffer) -> None: return store._set(key, value) def test_store_read_only(self, store: ZipStore) -> None: assert not store.read_only async def test_read_only_store_raises(self, store_kwargs: dict[str, Any]) -> None: # we need to create the zipfile in write mode before switching to read mode store = await self.store_cls.open(**store_kwargs) store.close() kwargs = {**store_kwargs, "mode": "a", "read_only": True} store = await self.store_cls.open(**kwargs) assert store._zmode == "a" assert store.read_only # set with pytest.raises(ValueError): await store.set("foo", cpu.Buffer.from_bytes(b"bar")) def test_store_repr(self, store: ZipStore) -> None: assert str(store) == f"zip://{store.path}" def test_store_supports_writes(self, store: ZipStore) -> None: assert store.supports_writes def test_store_supports_listing(self, store: ZipStore) -> None: assert store.supports_listing # TODO: fix this warning @pytest.mark.filterwarnings("ignore:Unclosed client session:ResourceWarning") def test_api_integration(self, store: ZipStore) -> None: root = zarr.open_group(store=store, mode="a") data = np.arange(10000, dtype=np.uint16).reshape(100, 100) z = root.create_array( shape=data.shape, chunks=(10, 10), name="foo", dtype=np.uint16, fill_value=99 ) z[:] = data assert np.array_equal(data, z[:]) # you can overwrite existing chunks but zipfile will issue a warning with pytest.warns(UserWarning, match="Duplicate name: 'foo/c/0/0'"): z[0, 0] = 100 # TODO: assigning an entire chunk to fill value ends up deleting the chunk which is not supported # a work around will be needed here. with pytest.raises(NotImplementedError): z[0:10, 0:10] = 99 bar = root.create_group("bar", attributes={"hello": "world"}) assert "hello" in dict(bar.attrs) # keys cannot be deleted with pytest.raises(NotImplementedError): del root["bar"] store.close() @pytest.mark.parametrize("read_only", [True, False]) async def test_store_open_read_only( self, store_kwargs: dict[str, Any], read_only: bool ) -> None: if read_only: # create an empty zipfile with zipfile.ZipFile(store_kwargs["path"], mode="w"): pass await super().test_store_open_read_only(store_kwargs, read_only) @pytest.mark.parametrize(("zip_mode", "read_only"), [("w", False), ("a", False), ("x", False)]) async def test_zip_open_mode_translation( self, store_kwargs: dict[str, Any], zip_mode: str, read_only: bool ) -> None: kws = {**store_kwargs, "mode": zip_mode} store = await self.store_cls.open(**kws) assert store.read_only == read_only def test_externally_zipped_store(self, tmp_path: Path) -> None: # See: https://github.com/zarr-developers/zarr-python/issues/2757 zarr_path = tmp_path / "foo.zarr" root = zarr.open_group(store=zarr_path, mode="w") root.require_group("foo") assert isinstance(foo := root["foo"], Group) # noqa: RUF018 foo["bar"] = np.array([1]) shutil.make_archive(str(zarr_path), "zip", zarr_path) zip_path = tmp_path / "foo.zarr.zip" zipped = zarr.open_group(ZipStore(zip_path, mode="r"), mode="r") assert list(zipped.keys()) == list(root.keys()) assert isinstance(group := zipped["foo"], Group) assert list(group.keys()) == list(group.keys()) async def test_move(self, tmp_path: Path) -> None: origin = tmp_path / "origin.zip" destination = tmp_path / "some_folder" / "destination.zip" store = await ZipStore.open(path=origin, mode="a") array = create_array(store, data=np.arange(10)) await store.move(str(destination)) assert store.path == destination assert destination.exists() assert not origin.exists() assert np.array_equal(array[...], np.arange(10)) zarr-python-3.1.5/tests/test_sync.py000066400000000000000000000114611511007055700175500ustar00rootroot00000000000000import asyncio from collections.abc import AsyncGenerator from unittest.mock import AsyncMock, patch import pytest import zarr from zarr.core.sync import ( SyncError, SyncMixin, _get_executor, _get_lock, _get_loop, cleanup_resources, loop, sync, ) @pytest.fixture(params=[True, False]) def sync_loop(request: pytest.FixtureRequest) -> asyncio.AbstractEventLoop | None: if request.param is True: return _get_loop() else: return None @pytest.fixture def clean_state(): # use this fixture to make sure no existing threads/loops exist in zarr.core.sync cleanup_resources() yield cleanup_resources() def test_get_loop() -> None: # test that calling _get_loop() twice returns the same loop loop = _get_loop() loop2 = _get_loop() assert loop is loop2 def test_get_lock() -> None: # test that calling _get_lock() twice returns the same lock lock = _get_lock() lock2 = _get_lock() assert lock is lock2 def test_sync(sync_loop: asyncio.AbstractEventLoop | None) -> None: foo = AsyncMock(return_value="foo") assert sync(foo(), loop=sync_loop) == "foo" foo.assert_awaited_once() def test_sync_raises(sync_loop: asyncio.AbstractEventLoop | None) -> None: foo = AsyncMock(side_effect=ValueError("foo-bar")) with pytest.raises(ValueError, match="foo-bar"): sync(foo(), loop=sync_loop) foo.assert_awaited_once() def test_sync_timeout() -> None: duration = 0.02 async def foo() -> None: await asyncio.sleep(duration) with pytest.raises(asyncio.TimeoutError): sync(foo(), timeout=duration / 10) def test_sync_raises_if_no_coroutine(sync_loop: asyncio.AbstractEventLoop | None) -> None: def foo() -> str: return "foo" with pytest.raises(TypeError): sync(foo(), loop=sync_loop) # type: ignore[arg-type] @pytest.mark.filterwarnings("ignore:coroutine.*was never awaited") def test_sync_raises_if_loop_is_closed() -> None: loop = _get_loop() foo = AsyncMock(return_value="foo") with patch.object(loop, "is_closed", return_value=True): with pytest.raises(RuntimeError): sync(foo(), loop=loop) foo.assert_not_awaited() @pytest.mark.filterwarnings("ignore:Unclosed client session:ResourceWarning") @pytest.mark.filterwarnings("ignore:coroutine.*was never awaited") def test_sync_raises_if_calling_sync_from_within_a_running_loop( sync_loop: asyncio.AbstractEventLoop | None, ) -> None: def foo() -> str: # technically, this should be an async function but doing that # yields a warning because it is never awaited by the inner function return "foo" async def bar() -> str: return sync(foo(), loop=sync_loop) # type: ignore[arg-type] with pytest.raises(SyncError): sync(bar(), loop=sync_loop) @pytest.mark.filterwarnings("ignore:coroutine.*was never awaited") def test_sync_raises_if_loop_is_invalid_type() -> None: foo = AsyncMock(return_value="foo") with pytest.raises(TypeError): sync(foo(), loop=1) # type: ignore[arg-type] foo.assert_not_awaited() def test_sync_mixin(sync_loop) -> None: class AsyncFoo: def __init__(self) -> None: pass async def foo(self) -> str: return "foo" async def bar(self) -> AsyncGenerator: for i in range(10): yield i class SyncFoo(SyncMixin): def __init__(self, async_foo: AsyncFoo) -> None: self._async_foo = async_foo def foo(self) -> str: return self._sync(self._async_foo.foo()) def bar(self) -> list[int]: return self._sync_iter(self._async_foo.bar()) async_foo = AsyncFoo() foo = SyncFoo(async_foo) assert foo.foo() == "foo" assert foo.bar() == list(range(10)) @pytest.mark.parametrize("workers", [None, 1, 2]) def test_threadpool_executor(clean_state, workers: int | None) -> None: with zarr.config.set({"threading.max_workers": workers}): _ = zarr.zeros(shape=(1,)) # trigger executor creation assert loop != [None] # confirm loop was created if workers is None: # confirm no executor was created if no workers were specified # (this is the default behavior) assert loop[0]._default_executor is None else: # confirm executor was created and attached to loop as the default executor # note: python doesn't have a direct way to get the default executor so we # use the private attribute assert _get_executor() is loop[0]._default_executor assert _get_executor()._max_workers == workers def test_cleanup_resources_idempotent() -> None: _get_executor() # trigger resource creation (iothread, loop, thread-pool) cleanup_resources() cleanup_resources() zarr-python-3.1.5/tests/test_tree.py000066400000000000000000000032371511007055700175350ustar00rootroot00000000000000import os import textwrap from typing import Any import pytest import zarr pytest.importorskip("rich") @pytest.mark.parametrize("root_name", [None, "root"]) def test_tree(root_name: Any) -> None: os.environ["OVERRIDE_COLOR_SYSTEM"] = "truecolor" g = zarr.group(path=root_name) A = g.create_group("A") B = g.create_group("B") C = B.create_group("C") D = C.create_group("C") A.create_array(name="x", shape=(2), dtype="float64") A.create_array(name="y", shape=(0,), dtype="int8") B.create_array(name="x", shape=(0,), dtype="float64") C.create_array(name="x", shape=(0,), dtype="float64") D.create_array(name="x", shape=(0,), dtype="float64") result = repr(g.tree()) root = root_name or "" BOPEN = "\x1b[1m" BCLOSE = "\x1b[0m" expected = textwrap.dedent(f"""\ {BOPEN}/{root}{BCLOSE} ├── {BOPEN}A{BCLOSE} │ ├── {BOPEN}x{BCLOSE} (2,) float64 │ └── {BOPEN}y{BCLOSE} (0,) int8 └── {BOPEN}B{BCLOSE} ├── {BOPEN}C{BCLOSE} │ ├── {BOPEN}C{BCLOSE} │ │ └── {BOPEN}x{BCLOSE} (0,) float64 │ └── {BOPEN}x{BCLOSE} (0,) float64 └── {BOPEN}x{BCLOSE} (0,) float64 """) assert result == expected result = repr(g.tree(level=0)) expected = textwrap.dedent(f"""\ {BOPEN}/{root}{BCLOSE} ├── {BOPEN}A{BCLOSE} └── {BOPEN}B{BCLOSE} """) assert result == expected def test_expand_not_implemented() -> None: g = zarr.group() with pytest.raises(NotImplementedError): g.tree(expand=True) zarr-python-3.1.5/tests/test_v2.py000066400000000000000000000233421511007055700171240ustar00rootroot00000000000000import json from pathlib import Path from typing import Any, Literal import numpy as np import pytest from numcodecs import Delta, Zlib from numcodecs.blosc import Blosc from numcodecs.zstd import Zstd import zarr import zarr.core.buffer import zarr.storage from zarr import config from zarr.abc.store import Store from zarr.core.buffer.core import default_buffer_prototype from zarr.core.dtype import FixedLengthUTF32, Structured, VariableLengthUTF8 from zarr.core.dtype.npy.bytes import NullTerminatedBytes from zarr.core.dtype.wrapper import ZDType from zarr.core.group import Group from zarr.core.sync import sync from zarr.errors import ZarrDeprecationWarning from zarr.storage import MemoryStore, StorePath @pytest.fixture async def store() -> StorePath: return StorePath(await MemoryStore.open()) def test_simple(store: StorePath) -> None: data = np.arange(0, 256, dtype="uint16").reshape((16, 16)) a = zarr.create_array( store / "simple_v2", zarr_format=2, shape=data.shape, chunks=(16, 16), dtype=data.dtype, fill_value=0, ) a[:, :] = data assert np.array_equal(data, a[:, :]) def test_codec_pipeline() -> None: # https://github.com/zarr-developers/zarr-python/issues/2243 store = MemoryStore() array = zarr.create( store=store, shape=(1,), dtype="i4", zarr_format=2, filters=[Delta(dtype="i4").get_config()], compressor=Blosc().get_config(), ) array[:] = 1 result = array[:] expected = np.ones(1) np.testing.assert_array_equal(result, expected) @pytest.mark.parametrize( ("dtype", "expected_dtype", "fill_value", "fill_value_json"), [ ("|S1", "|S1", b"X", "WA=="), ("|V1", "|V1", b"X", "WA=="), ("|V10", "|V10", b"X", "WAAAAAAAAAAAAA=="), ], ) async def test_v2_encode_decode( dtype: str, expected_dtype: str, fill_value: bytes, fill_value_json: str ) -> None: store = zarr.storage.MemoryStore() g = zarr.group(store=store, zarr_format=2) g.create_array( name="foo", shape=(3,), chunks=(3,), dtype=dtype, fill_value=fill_value, compressor=None ) result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype()) assert result is not None serialized = json.loads(result.to_bytes()) expected = { "chunks": [3], "compressor": None, "dtype": expected_dtype, "fill_value": fill_value_json, "filters": None, "order": "C", "shape": [3], "zarr_format": 2, "dimension_separator": ".", } assert serialized == expected data = zarr.open_array(store=store, path="foo")[:] np.testing.assert_equal(data, np.full((3,), b"X", dtype=dtype)) data = zarr.open_array(store=store, path="foo")[:] np.testing.assert_equal(data, np.full((3,), b"X", dtype=dtype)) @pytest.mark.parametrize( ("dtype", "value"), [ (NullTerminatedBytes(length=1), b"Y"), (FixedLengthUTF32(length=1), "Y"), (VariableLengthUTF8(), "Y"), ], ) def test_v2_encode_decode_with_data(dtype: ZDType[Any, Any], value: str) -> None: expected = np.full((3,), value, dtype=dtype.to_native_dtype()) a = zarr.create( shape=(3,), zarr_format=2, dtype=dtype, ) a[:] = expected data = a[:] np.testing.assert_equal(data, expected) @pytest.mark.parametrize("filters", [[], [Delta(dtype=" None: array_fixture = [42] with config.set({"array.order": order}): arr = zarr.create(shape=1, dtype=" None: """ Test that passing compressor=None results in no compressor. Also test that the default value of the compressor parameter does produce a compressor. """ g = zarr.open(store, mode="w", zarr_format=2) assert isinstance(g, Group) arr = g.create_array("one", dtype="i8", shape=(1,), chunks=(1,), compressor=None) assert arr.async_array.compressor is None assert not (arr.filters) arr = g.create_array("two", dtype="i8", shape=(1,), chunks=(1,)) assert arr.async_array.compressor is not None assert not (arr.filters) arr = g.create_array("three", dtype="i8", shape=(1,), chunks=(1,), compressor=Zstd()) assert arr.async_array.compressor is not None assert not (arr.filters) with pytest.raises(ValueError): g.create_array( "four", dtype="i8", shape=(1,), chunks=(1,), compressor=None, compressors=None ) @pytest.mark.parametrize("numpy_order", ["C", "F"]) @pytest.mark.parametrize("zarr_order", ["C", "F"]) def test_v2_non_contiguous(numpy_order: Literal["C", "F"], zarr_order: Literal["C", "F"]) -> None: """ Make sure zarr v2 arrays save data using the memory order given to the zarr array, not the memory order of the original numpy array. """ store = MemoryStore() arr = zarr.create_array( store, shape=(10, 8), chunks=(3, 3), fill_value=np.nan, dtype="float64", zarr_format=2, filters=None, compressors=None, overwrite=True, order=zarr_order, ) # Non-contiguous write, using numpy memory order a = np.arange(arr.shape[0] * arr.shape[1]).reshape(arr.shape, order=numpy_order) arr[6:9, 3:6] = a[6:9, 3:6] # The slice on the RHS is important np.testing.assert_array_equal(arr[6:9, 3:6], a[6:9, 3:6]) buf = sync(store.get("2.1", default_buffer_prototype())) assert buf is not None np.testing.assert_array_equal( a[6:9, 3:6], np.frombuffer(buf.to_bytes(), dtype="float64").reshape((3, 3), order=zarr_order), ) # After writing and reading from zarr array, order should be same as zarr order sub_arr = arr[6:9, 3:6] assert isinstance(sub_arr, np.ndarray) if zarr_order == "F": assert (sub_arr).flags.f_contiguous else: assert (sub_arr).flags.c_contiguous # Contiguous write store = MemoryStore() arr = zarr.create_array( store, shape=(10, 8), chunks=(3, 3), fill_value=np.nan, dtype="float64", zarr_format=2, compressors=None, filters=None, overwrite=True, order=zarr_order, ) a = np.arange(9).reshape((3, 3), order=numpy_order) arr[6:9, 3:6] = a np.testing.assert_array_equal(arr[6:9, 3:6], a) # After writing and reading from zarr array, order should be same as zarr order sub_arr = arr[6:9, 3:6] assert isinstance(sub_arr, np.ndarray) if zarr_order == "F": assert (sub_arr).flags.f_contiguous else: assert (sub_arr).flags.c_contiguous def test_default_compressor_deprecation_warning() -> None: with pytest.warns(ZarrDeprecationWarning, match="default_compressor is deprecated"): zarr.storage.default_compressor = "zarr.codecs.zstd.ZstdCodec()" # type: ignore[attr-defined] @pytest.mark.parametrize("fill_value", [None, (b"", 0, 0.0)], ids=["no_fill", "fill"]) def test_structured_dtype_roundtrip(fill_value: float | bytes, tmp_path: Path) -> None: a = np.array( [(b"aaa", 1, 4.2), (b"bbb", 2, 8.4), (b"ccc", 3, 12.6)], dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")], ) array_path = tmp_path / "data.zarr" za = zarr.create( shape=(3,), store=array_path, chunks=(2,), fill_value=fill_value, zarr_format=2, dtype=a.dtype, ) if fill_value is not None: assert (np.array([fill_value] * a.shape[0], dtype=a.dtype) == za[:]).all() za[...] = a za = zarr.open_array(store=array_path) assert (a == za[:]).all() @pytest.mark.parametrize( ( "fill_value", "dtype", "expected_result", ), [ ( ("Alice", 30), np.dtype([("name", "U10"), ("age", "i4")]), np.array([("Alice", 30)], dtype=[("name", "U10"), ("age", "i4")])[0], ), ( ["Bob", 25], np.dtype([("name", "U10"), ("age", "i4")]), np.array([("Bob", 25)], dtype=[("name", "U10"), ("age", "i4")])[0], ), ( b"\x01\x00\x00\x00\x02\x00\x00\x00", np.dtype([("x", "i4"), ("y", "i4")]), np.array([(1, 2)], dtype=[("x", "i4"), ("y", "i4")])[0], ), ], ids=[ "tuple_input", "list_input", "bytes_input", ], ) def test_parse_structured_fill_value_valid( fill_value: Any, dtype: np.dtype[Any], expected_result: Any ) -> None: zdtype = Structured.from_native_dtype(dtype) result = zdtype.cast_scalar(fill_value) assert result.dtype == expected_result.dtype assert result == expected_result if isinstance(expected_result, np.void): for name in expected_result.dtype.names or []: assert result[name] == expected_result[name] @pytest.mark.parametrize("fill_value", [None, b"x"], ids=["no_fill", "fill"]) def test_other_dtype_roundtrip(fill_value: None | bytes, tmp_path: Path) -> None: a = np.array([b"a\0\0", b"bb", b"ccc"], dtype="V7") array_path = tmp_path / "data.zarr" za = zarr.create( shape=(3,), store=array_path, chunks=(2,), fill_value=fill_value, zarr_format=2, dtype=a.dtype, ) if fill_value is not None: assert (np.array([fill_value] * a.shape[0], dtype=a.dtype) == za[:]).all() za[...] = a za = zarr.open_array(store=array_path) assert (a == za[:]).all() zarr-python-3.1.5/tests/test_zarr.py000066400000000000000000000012621511007055700175500ustar00rootroot00000000000000import pytest import zarr def test_exports() -> None: """ Ensure that everything in __all__ can be imported. """ from zarr import __all__ for export in __all__: getattr(zarr, export) def test_print_debug_info(capsys: pytest.CaptureFixture[str]) -> None: """ Ensure that print_debug_info does not raise an error """ from importlib.metadata import version from zarr import __version__, print_debug_info print_debug_info() captured = capsys.readouterr() # test that at least some of what we expect is # printed out assert f"zarr: {__version__}" in captured.out assert f"numpy: {version('numpy')}" in captured.out