pax_global_header00006660000000000000000000000064150305262260014513gustar00rootroot0000000000000052 comment=dce4b945b655d211ed214fa20bed52c84049edb3 conda-conda-package-streaming-dce4b94/000077500000000000000000000000001503052622600177415ustar00rootroot00000000000000conda-conda-package-streaming-dce4b94/.github/000077500000000000000000000000001503052622600213015ustar00rootroot00000000000000conda-conda-package-streaming-dce4b94/.github/CODEOWNERS000066400000000000000000000001551503052622600226750ustar00rootroot00000000000000# Syntax for this file at https://help.github.com/articles/about-codeowners/ * @conda/packaging-tools conda-conda-package-streaming-dce4b94/.github/ISSUE_TEMPLATE/000077500000000000000000000000001503052622600234645ustar00rootroot00000000000000conda-conda-package-streaming-dce4b94/.github/ISSUE_TEMPLATE/0_bug.yml000066400000000000000000000033101503052622600252000ustar00rootroot00000000000000# edit this in https://github.com/conda/infrastructure name: Bug Report description: Create a bug report. labels: - type::bug body: - type: markdown attributes: value: | Because processing new bug reports is time-consuming, we would like to ask you to fill out the following form to the best of your ability and as completely as possible. > [!NOTE] > Bug reports that are incomplete or missing information may be closed as inactionable. Since there are already a lot of open issues, please also take a moment to search existing ones to see if your bug has already been reported. If you find something related, please upvote that issue and provide additional details as necessary. 💐 Thank you for helping to make `conda/conda-package-streaming` better. We would be unable to improve `conda/conda-package-streaming` without our community! - type: checkboxes id: checks attributes: label: Checklist description: Please confirm and check all of the following options. options: - label: I added a descriptive title required: true - label: I searched open reports and couldn't find a duplicate required: true - type: textarea id: what attributes: label: What happened? description: What should have happened instead? Please provide as many details as possible. The more information provided, the more likely we are able to replicate your problem and offer a solution. validations: required: true - type: textarea id: context attributes: label: Additional Context description: Include any additional information (or screenshots) that you think would be valuable. conda-conda-package-streaming-dce4b94/.github/ISSUE_TEMPLATE/1_feature.yml000066400000000000000000000037671503052622600260770ustar00rootroot00000000000000# edit this in https://github.com/conda/infrastructure name: Feature Request description: Create a feature request. labels: - type::feature body: - type: markdown attributes: value: | Because processing new feature requests is time-consuming, we would like to ask you to fill out the following form to the best of your ability and as completely as possible. > [!NOTE] > Feature requests that are incomplete or missing information may be closed as inactionable. Since there are already a lot of open issues, please also take a moment to search existing ones to see if your feature request has already been submitted. If you find something related, please upvote that issue and provide additional details as necessary. 💐 Thank you for helping to make `conda/conda-package-streaming` better. We would be unable to improve `conda/conda-package-streaming` without our community! - type: checkboxes id: checks attributes: label: Checklist description: Please confirm and check all of the following options. options: - label: I added a descriptive title required: true - label: I searched open requests and couldn't find a duplicate required: true - type: textarea id: idea attributes: label: What is the idea? description: Describe what the feature is and the desired state. validations: required: true - type: textarea id: why attributes: label: Why is this needed? description: Who would benefit from this feature? Why would this add value to them? What problem does this solve? - type: textarea id: what attributes: label: What should happen? description: What should be the user experience with the feature? Describe from a user perspective what they would do and see. - type: textarea id: context attributes: label: Additional Context description: Include any additional information that you think would be valuable. conda-conda-package-streaming-dce4b94/.github/ISSUE_TEMPLATE/2_documentation.yml000066400000000000000000000027411503052622600273050ustar00rootroot00000000000000# edit this in https://github.com/conda/infrastructure name: Documentation description: Create a documentation related issue. labels: - type::documentation body: - type: markdown attributes: value: | > [!NOTE] > Documentation requests that are incomplete or missing information may be closed as inactionable. Since there are already a lot of open issues, please also take a moment to search existing ones to see if your bug has already been reported. If you find something related, please upvote that issue and provide additional details as necessary. 💐 Thank you for helping to make conda better. We would be unable to improve conda without our community! - type: checkboxes id: checks attributes: label: Checklist description: Please confirm and check all of the following options. options: - label: I added a descriptive title required: true - label: I searched open reports and couldn't find a duplicate required: true - type: textarea id: what attributes: label: What happened? description: Mention here any typos, broken links, or missing, incomplete, or outdated information, etc. that you have noticed in the conda docs or CLI help. validations: required: true - type: textarea id: context attributes: label: Additional Context description: Include any additional information (or screenshots) that you think would be valuable. conda-conda-package-streaming-dce4b94/.github/ISSUE_TEMPLATE/epic.yml000066400000000000000000000062511503052622600251330ustar00rootroot00000000000000# edit this in https://github.com/conda/infrastructure name: Epic description: A collection of related tickets. labels: - epic body: - type: markdown attributes: value: | This form is intended for grouping and collecting together related tickets to better gauge the scope of a problem/feature. If you are attempting to report a bug, propose a new feature, or some other code change please use one of the other forms available. > [!NOTE] > Epics that are incomplete or missing information may be closed as inactionable. Since there are already a lot of open issues, please also take a moment to search existing ones to see if a similar epic has already been opened. If you find something related, please upvote that issue and provide additional details as necessary. 💐 Thank you for helping to make `conda/conda-package-streaming` better. We would be unable to improve `conda/conda-package-streaming` without our community! - type: checkboxes id: checks attributes: label: Checklist description: Please confirm and check all of the following options. options: - label: I added a descriptive title required: true - label: I searched open issues and couldn't find a duplicate required: true - type: textarea id: what attributes: label: What? description: >- What feature or problem will be addressed in this epic? placeholder: Please describe here. validations: required: true - type: textarea id: why attributes: label: Why? description: >- Why is the reported issue(s) a problem, or why is the proposed feature needed? (Research and spike issues can be linked here.) value: | - [ ] placeholder: Please describe here and/or link to relevant supporting issues. validations: required: true - type: textarea id: user_impact attributes: label: User impact description: >- In what specific way(s) will users benefit from this change? (e.g. use cases or performance improvements) placeholder: Please describe here. validations: required: true - type: textarea id: goals attributes: label: Goals description: >- What goal(s) should this epic accomplish? value: | - [ ] validations: required: true - type: textarea id: tasks attributes: label: Tasks description: >- What needs to be done to implement this change? value: | - [ ] validations: required: false - type: textarea id: blocked_by attributes: label: 'This epic is blocked by:' description: >- Epics and issues that block this epic. value: | - [ ] validations: required: false - type: textarea id: blocks attributes: label: 'This epic blocks:' description: >- Epics and issues that are blocked by this epic. value: | - [ ] validations: required: false conda-conda-package-streaming-dce4b94/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000016301503052622600251020ustar00rootroot00000000000000 ### Description conda-conda-package-streaming-dce4b94/.github/dependabot.yml000066400000000000000000000007221503052622600241320ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: pip directory: /docs/ schedule: interval: monthly allow: # Allow only production updates for Sphinx - dependency-name: sphinx dependency-type: production groups: docs: patterns: - '*' - package-ecosystem: github-actions directory: /.github/workflows schedule: interval: monthly groups: workflows: patterns: - '*' conda-conda-package-streaming-dce4b94/.github/template-files/000077500000000000000000000000001503052622600242145ustar00rootroot00000000000000conda-conda-package-streaming-dce4b94/.github/template-files/config.yml000066400000000000000000000027051503052622600262100ustar00rootroot00000000000000conda/governance: # [required] community files - CODE_OF_CONDUCT.md conda/infrastructure: # [required] general workflows - .github/workflows/cla.yml - .github/workflows/update.yml # [optional] to include repo in https://github.com/orgs/conda/projects/2 - .github/workflows/issues.yml - .github/workflows/labels.yml - .github/workflows/project.yml # [optional] stale bot workflows - .github/workflows/stale.yml - .github/workflows/lock.yml # [optional] general processes for the conda org - src: templates/HOW_WE_USE_GITHUB.md dst: HOW_WE_USE_GITHUB.md # [optional] standard issue templates - src: templates/issues/bug.yml dst: .github/ISSUE_TEMPLATE/0_bug.yml - src: templates/issues/feature.yml dst: .github/ISSUE_TEMPLATE/1_feature.yml - src: templates/issues/documentation.yml dst: .github/ISSUE_TEMPLATE/2_documentation.yml - src: templates/issues/epic.yml dst: .github/ISSUE_TEMPLATE/epic.yml # [optional] standard PR template # - src: templates/pull_requests/news_tests_docs.md # dst: .github/template-files/templates/pull_request_template_details.md - src: templates/pull_requests/base.md dst: .github/PULL_REQUEST_TEMPLATE.md # [optional] rever release files # - src: templates/releases/RELEASE.md # dst: RELEASE.md # with: # placeholder: YY.M # - src: templates/releases/rever.xsh # dst: rever.xsh # - src: templates/releases/TEMPLATE # dst: news/TEMPLATE conda-conda-package-streaming-dce4b94/.github/workflows/000077500000000000000000000000001503052622600233365ustar00rootroot00000000000000conda-conda-package-streaming-dce4b94/.github/workflows/cla.yml000066400000000000000000000021611503052622600246200ustar00rootroot00000000000000name: CLA on: issue_comment: types: - created pull_request_target: jobs: check: if: >- !github.event.repository.fork && ( github.event.issue.pull_request && github.event.comment.body == '@conda-bot check' || github.event_name == 'pull_request_target' ) runs-on: ubuntu-latest steps: - name: Check CLA uses: conda/actions/check-cla@eb545bb8ab48d499b31c057a6df3cf46753fdbcb # v25.3.1 with: # [required] # A token with ability to comment, label, and modify the commit status # (`pull_request: write` and `statuses: write` for fine-grained PAT; `repo` for classic PAT) # (default: secrets.GITHUB_TOKEN) token: ${{ secrets.CLA_ACTION_TOKEN }} # [required] # Label to apply to contributor's PR once CLA is signed label: cla-signed # [required] # Token for opening signee PR in the provided `cla_repo` # (`pull_request: write` for fine-grained PAT; `repo` and `workflow` for classic PAT) cla_token: ${{ secrets.CLA_FORK_TOKEN }} conda-conda-package-streaming-dce4b94/.github/workflows/issues.yml000066400000000000000000000024651503052622600254030ustar00rootroot00000000000000name: Automate Issues on: # NOTE: github.event is issue_comment payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#issue_comment issue_comment: types: [created] env: FEEDBACK_LBL: pending::feedback SUPPORT_LBL: pending::support jobs: # NOTE: will update label if anyone responds, not just the author/reporter # TODO: create conda-issue-sorting team and modify this to toggle label based on # whether a non-issue-sorting engineer commented pending_support: # if [pending::feedback] and anyone responds if: >- !github.event.repository.fork && !github.event.issue.pull_request && contains(github.event.issue.labels.*.name, 'pending::feedback') runs-on: ubuntu-latest steps: # remove [pending::feedback] - uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0 # v1.3.0 with: labels: ${{ env.FEEDBACK_LBL }} github_token: ${{ secrets.PROJECT_TOKEN }} # add [pending::support], if still open - uses: actions-ecosystem/action-add-labels@18f1af5e3544586314bbe15c0273249c770b2daf # v1.1.3 if: github.event.issue.state == 'open' with: labels: ${{ env.SUPPORT_LBL }} github_token: ${{ secrets.PROJECT_TOKEN }} conda-conda-package-streaming-dce4b94/.github/workflows/labels.yml000066400000000000000000000032361503052622600253270ustar00rootroot00000000000000name: Sync Labels on: # NOTE: github.event is workflow_dispatch payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_dispatch workflow_dispatch: inputs: delete-unmapped: description: Delete labels not mapped in either global or local label configurations. default: false type: boolean dry-run: description: Run label synchronization workflow without making any changes. default: false type: boolean jobs: sync: if: '!github.event.repository.fork' runs-on: ubuntu-latest env: GLOBAL: https://raw.githubusercontent.com/conda/infra/main/.github/global.yml LOCAL: .github/labels.yml steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - id: has_local uses: andstor/file-existence-action@076e0072799f4942c8bc574a82233e1e4d13e9d6 # v3.0.0 with: files: ${{ env.LOCAL }} - name: Global Only uses: EndBug/label-sync@52074158190acb45f3077f9099fea818aa43f97a # v2.3.3 if: steps.has_local.outputs.files_exists == 'false' with: config-file: ${{ env.GLOBAL }} delete-other-labels: ${{ inputs.delete-unmapped }} dry-run: ${{ inputs.dry-run }} - name: Global & Local uses: EndBug/label-sync@52074158190acb45f3077f9099fea818aa43f97a # v2.3.3 if: steps.has_local.outputs.files_exists == 'true' with: config-file: | ${{ env.GLOBAL }} ${{ env.LOCAL }} delete-other-labels: ${{ inputs.delete-unmapped }} dry-run: ${{ inputs.dry-run }} conda-conda-package-streaming-dce4b94/.github/workflows/lock.yml000066400000000000000000000037401503052622600250150ustar00rootroot00000000000000name: Lock on: # NOTE: github.event is workflow_dispatch payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_dispatch workflow_dispatch: schedule: - cron: 0 6 * * * permissions: issues: write pull-requests: write jobs: lock: if: '!github.event.repository.fork' runs-on: ubuntu-latest steps: - uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771 # v5.0.1 with: # Number of days of inactivity before a closed issue is locked issue-inactive-days: 180 # Do not lock issues created before a given timestamp, value must follow ISO 8601 exclude-issue-created-before: '' # Do not lock issues with these labels, value must be a comma separated list of labels or '' exclude-any-issue-labels: '' # Labels to add before locking an issue, value must be a comma separated list of labels or '' add-issue-labels: locked # Reason for locking an issue, value must be one of resolved, off-topic, too heated, spam or '' issue-lock-reason: resolved # Number of days of inactivity before a closed pull request is locked pr-inactive-days: 365 # Do not lock pull requests created before a given timestamp, value must follow ISO 8601 exclude-pr-created-before: '' # Do not lock pull requests with these labels, value must be a comma separated list of labels or '' exclude-any-pr-labels: '' # Labels to add before locking a pull request, value must be a comma separated list of labels or '' add-pr-labels: locked # Reason for locking a pull request, value must be one of resolved, off-topic, too heated, spam or '' pr-lock-reason: resolved # Limit locking to issues, pull requests or discussions, value must be a comma separated list of issues, prs, discussions or '' process-only: issues, prs conda-conda-package-streaming-dce4b94/.github/workflows/project.yml000066400000000000000000000011031503052622600255220ustar00rootroot00000000000000name: Add to Project on: issues: types: - opened pull_request_target: types: - opened jobs: add_to_project: if: '!github.event.repository.fork' runs-on: ubuntu-latest steps: - uses: actions/add-to-project@244f685bbc3b7adfa8466e08b698b5577571133e # v1.0.2 with: # issues are added to the Planning project # PRs are added to the Review project project-url: https://github.com/orgs/conda/projects/${{ github.event_name == 'issues' && 2 || 16 }} github-token: ${{ secrets.PROJECT_TOKEN }} conda-conda-package-streaming-dce4b94/.github/workflows/sphinx.yml000066400000000000000000000023211503052622600253700ustar00rootroot00000000000000name: Sphinx on: push: branches: - main pull_request: branches: - main jobs: sphinx: runs-on: ubuntu-latest steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0 with: python-version: "3.x" architecture: "x64" cache: "pip" - name: Build Documentation run: | pip install -e .[docs] make html - name: Upload artifact uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3.0.1 with: # Upload entire repository path: 'build/html' pages: runs-on: ubuntu-latest if: github.ref == 'refs/heads/main' needs: [sphinx] # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages permissions: contents: read pages: write id-token: write environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} steps: - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5 conda-conda-package-streaming-dce4b94/.github/workflows/stale.yml000066400000000000000000000072741503052622600252030ustar00rootroot00000000000000name: Stale on: # NOTE: github.event is workflow_dispatch payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_dispatch workflow_dispatch: inputs: dryrun: description: 'dryrun: Preview stale issues/prs without marking them (true|false)' required: true type: boolean default: true schedule: - cron: 0 4 * * * permissions: issues: write pull-requests: write jobs: stale: if: '!github.event.repository.fork' runs-on: ubuntu-latest strategy: matrix: include: - only-issue-labels: '' days-before-issue-stale: 365 days-before-issue-close: 30 # [type::support] issues have a more aggressive stale/close timeline - only-issue-labels: type::support days-before-issue-stale: 90 days-before-issue-close: 21 steps: - uses: conda/actions/read-yaml@eb545bb8ab48d499b31c057a6df3cf46753fdbcb # v25.3.1 id: read_yaml with: path: https://raw.githubusercontent.com/conda/infra/main/.github/messages.yml - uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0 id: stale with: # Only issues with these labels are checked whether they are stale only-issue-labels: ${{ matrix.only-issue-labels }} # Idle number of days before marking issues stale days-before-issue-stale: ${{ matrix.days-before-issue-stale }} # Idle number of days before closing stale issues/PRs days-before-issue-close: ${{ matrix.days-before-issue-close }} # Idle number of days before marking PRs stale days-before-pr-stale: 365 # Idle number of days before closing stale PRs days-before-pr-close: 30 # Comment on the staled issues stale-issue-message: ${{ fromJSON(steps.read_yaml.outputs.value)['stale-issue'] }} # Label to apply on staled issues stale-issue-label: stale # Label to apply on closed issues close-issue-label: stale::closed # Reason to use when closing issues close-issue-reason: not_planned # Comment on the staled PRs stale-pr-message: ${{ fromJSON(steps.read_yaml.outputs.value)['stale-pr'] }} # Label to apply on staled PRs stale-pr-label: stale # Label to apply on closed PRs close-pr-label: stale::closed # Remove stale label from issues/PRs on updates/comments remove-stale-when-updated: true # Add specified labels to issues/PRs when they become unstale labels-to-add-when-unstale: stale::recovered # Remove specified labels to issues/PRs when they become unstale labels-to-remove-when-unstale: stale,stale::closed # Max number of operations per run operations-per-run: ${{ secrets.STALE_OPERATIONS_PER_RUN || 100 }} # Dry-run debug-only: ${{ github.event.inputs.dryrun || false }} # Order to get issues/PRs ascending: true # Delete branch after closing a stale PR delete-branch: false # Issues with these labels will never be considered stale exempt-issue-labels: stale::recovered,epic # Issues with these labels will never be considered stale exempt-pr-labels: stale::recovered,epic # Exempt all issues/PRs with milestones from stale exempt-all-milestones: true # Assignees on issues/PRs exempted from stale exempt-assignees: mingwandroid - name: Print outputs run: echo ${{ join(steps.stale.outputs.*, ',') }} conda-conda-package-streaming-dce4b94/.github/workflows/tests.yml000066400000000000000000000061221503052622600252240ustar00rootroot00000000000000name: Tests on: # NOTE: github.event context is push payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#push push: branches: - main - feature/** # NOTE: github.event context is pull_request payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#pull_request pull_request: concurrency: # Concurrency group that uses the workflow name and PR number if available # or commit SHA as a fallback. If a new build is triggered under that # concurrency group while a previous build is running it will be canceled. # Repeated pushes to a PR will cancel all previous builds, while multiple # merges to main will not cancel. group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} cancel-in-progress: true jobs: linux: runs-on: ubuntu-latest defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: python-version: ['3.9', '3.10', '3.11', '3.12'] steps: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Setup Miniconda uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1 with: python-version: ${{ matrix.python-version }} channels: defaults activate-environment: test_env auto-update-conda: false auto-activate-base: false show-channel-urls: true - name: Source Scripts run: | set -x # conda is our test dependency but can't be pip installed conda install --quiet conda pip pip install -e .[test] conda info --json echo "condarc" cat ~/.condarc echo "conda_pkgs_dir" ls /home/runner/conda_pkgs_dir echo "miniconda/pkgs" ls /usr/share/miniconda/pkgs echo "test_env" ls /usr/share/miniconda/envs/test_env pytest analyze: name: Analyze test results needs: [linux] if: always() runs-on: ubuntu-latest steps: - name: Download test results uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 - name: Upload combined test results # provides one downloadable archive of all .coverage/test-report.xml files # of all matrix runs for further analysis. uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: test-results-${{ github.sha }}-all path: test-results-${{ github.sha }}-* retention-days: 90 # default: 90 - name: Test Summary uses: test-summary/action@31493c76ec9e7aa675f1585d3ed6f1da69269a86 # v2.4 with: paths: ./test-results-${{ github.sha }}-**/test-report*.xml - name: Decide whether the needed jobs succeeded or failed uses: re-actors/alls-green@release/v1 with: jobs: ${{ toJSON(needs) }} conda-conda-package-streaming-dce4b94/.github/workflows/update.yml000066400000000000000000000106731503052622600253520ustar00rootroot00000000000000name: Update Repository on: # every Sunday at 00:36 UTC # https://crontab.guru/#36_2_*_*_0 schedule: - cron: 36 2 * * 0 workflow_dispatch: issue_comment: types: - created jobs: update: if: >- !github.event.repository.fork && ( github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || ( github.event_name == 'issue_comment' && github.event.issue.pull_request && ( github.event.comment.body == '@conda-bot render' || github.event.comment.body == '@conda-bot recreate' ) ) ) runs-on: ubuntu-latest steps: - if: github.event_name == 'issue_comment' uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0 with: comment-id: ${{ github.event.comment.id }} reactions: eyes reactions-edit-mode: replace token: ${{ secrets.SYNC_TOKEN }} - if: github.event.comment.body == '@conda-bot render' name: Configure git origin run: | echo REPOSITORY=$(curl --silent ${{ github.event.issue.pull_request.url }} | jq --raw-output '.head.repo.full_name') >> $GITHUB_ENV echo REF=$(curl --silent ${{ github.event.issue.pull_request.url }} | jq --raw-output '.head.ref') >> $GITHUB_ENV - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: ${{ env.REPOSITORY || github.repository }} ref: ${{ env.REF || '' }} token: ${{ secrets.SYNC_TOKEN }} - name: Configure git user run: | git config --global user.name 'Conda Bot' git config --global user.email '18747875+conda-bot@users.noreply.github.com' - uses: conda/actions/combine-durations@eb545bb8ab48d499b31c057a6df3cf46753fdbcb # v25.3.1 id: durations continue-on-error: true - uses: conda/actions/template-files@eb545bb8ab48d499b31c057a6df3cf46753fdbcb # v25.3.1 id: templates continue-on-error: true - name: Commit changes # no-op if there are no updates continue-on-error: true run: | git add . git commit --message "🤖 updated file(s)" - if: github.event.comment.body != '@conda-bot render' name: Create fork # no-op if the repository is already forked run: echo FORK=$(gh repo fork --clone=false --default-branch-only 2>&1 | awk '{print $1}') >> $GITHUB_ENV env: GH_TOKEN: ${{ secrets.SYNC_TOKEN }} - if: github.event.comment.body != '@conda-bot render' id: create # no-op if no commits were made uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8 with: push-to-fork: ${{ env.FORK }} token: ${{ secrets.SYNC_TOKEN }} branch: update delete-branch: true title: 🤖 Update infrastructure file(s) body: | [update.yml]: ${{ github.server_url }}/${{ github.repository }}/blob/main/.github/workflows/update.yml Your friendly repository updater. ${{ steps.durations.outputs.summary }} ${{ steps.templates.outputs.summary }} This PR was triggered by @${{ github.triggering_actor }} via ${{ github.event_name }}.
Commands Trigger actions by commenting on this PR: - `@conda-bot render` will run rendering workflows and commit and push any changes to this PR - `@conda-bot recreate` will recreate this PR, overwriting any edits that have been made to it
###### Auto-generated by the [`update.yml`][update.yml] workflow, see ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}. - if: github.event.comment.body == '@conda-bot render' id: update name: Push changes run: git push --force-with-lease - if: always() && github.event_name == 'issue_comment' uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0 with: comment-id: ${{ github.event.comment.id }} reactions: ${{ (steps.create.conclusion == 'success' || steps.update.conclusion == 'success') && 'hooray' || 'confused' }} reactions-edit-mode: replace token: ${{ secrets.SYNC_TOKEN }} conda-conda-package-streaming-dce4b94/.gitignore000066400000000000000000000001151503052622600217260ustar00rootroot00000000000000.coverage* .vscode .nox __pycache__ PKG-INFO build dist deploy/metadata.json conda-conda-package-streaming-dce4b94/.pre-commit-config.yaml000066400000000000000000000025371503052622600242310ustar00rootroot00000000000000# disable autofixing PRs, commenting "pre-commit.ci autofix" on a pull request triggers a autofix ci: autofix_prs: false repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.12.0 hooks: - id: ruff args: [ --fix ] - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: # ensure syntaxes are valid - id: check-toml - id: check-yaml exclude: ^(conda\.)?recipe/meta.yaml # catch git merge/rebase problems - id: check-merge-conflict # sort requirements files - id: file-contents-sorter files: | (?x)^( docs/requirements.txt | tests/requirements.*\.txt ) args: [--unique] # other - id: check-added-large-files - id: check-ast - id: fix-byte-order-marker - id: check-case-conflict - id: check-merge-conflict - id: check-shebang-scripts-are-executable - id: debug-statements - id: detect-private-key - repo: meta # see https://pre-commit.com/#meta-hooks hooks: - id: check-hooks-apply - id: check-useless-excludes - repo: local hooks: - id: git-diff name: git diff entry: git diff --exit-code language: system pass_filenames: false always_run: true conda-conda-package-streaming-dce4b94/CHANGELOG.md000066400000000000000000000033561503052622600215610ustar00rootroot00000000000000[//]: # (current developments) ## 0.12.0 (2025-06) * Skip setting permissions if `tarinfo.mode` is `None`. (#140) * Set minimum Python version to 3.9. (#142) * Add flag to deal with package servers that reply `416 Range Not Satisfiable` if requested range is larger than entire file, when using lazy [`conda_reader_for_url`](https://conda.github.io/conda-package-streaming/url.html#conda_package_streaming.url.conda_reader_for_url). (#132) * Format with Ruff (#133) ## 0.11.0 (2024-10) * Add Python 3.12 to test matrix. * Pass Python `tarfile.extractall(filter="fully_trusted")` in addition to internal filtering, when available, to avoid Python 3.12+ `DeprecationWarning` (#87) * Improve umask handling. (#106) * Add `transmute_stream(...)` to create `.conda` from `(TarFile, TarInfo)`. (#90) iterators, allowing more creative data sources than just `.tar.bz2` inputs. * Add `create` module with `TarFile` interface for creating `.conda` archives, also used by `transmute`. (#90) * Pass `encoding="utf-8"` to `TarFile` instead of the system default, avoiding rare potential issues with non-ASCII filenames. (#93) ## 0.10.0 (2024-06) * Use zip64 extensions when converting .tar.bz2 to .conda, if uncompressed size is close to the 2GB ZIP64_LIMIT. (#79) ## 0.9.0 (2023-07) * Respect umask when extracting files. [#65](https://github.com/conda/conda-package-streaming/pulls/65); [conda issue #12829](https://github.com/conda/conda/issues/12829). ## 0.8.0 (2023-05) * Update transmute to use SpooledTemporaryFile instead of streaming directly to zip [(#57)](https://github.com/conda/conda-package-streaming/issues/57). This can reduce zstd memory usage during decompression. * `transmute` returns Path to transmuted package instead of `None`. conda-conda-package-streaming-dce4b94/CODE_OF_CONDUCT.md000066400000000000000000000621121503052622600225420ustar00rootroot00000000000000# Conda Organization Code of Conduct # The Short Version Be kind to others. Do not insult or put down others. Behave professionally. Remember that harassment and sexist, racist, or exclusionary jokes are not appropriate for the conda Organization. All communication should be appropriate for a professional audience including people of many different backgrounds. Sexual language and imagery is not appropriate. The conda Organization is dedicated to providing a harassment-free community for everyone, regardless of gender, sexual orientation, gender identity and expression, disability, physical appearance, body size, race, or religion. We do not tolerate harassment of community members in any form. Thank you for helping make this a welcoming, friendly community for all. ## Report an Incident * Report a code of conduct incident [using a form](https://form.jotform.com/221527028480048). * Report a code of conduct incident via email: [conduct@conda.org](mailto:conduct@conda.org). * Contact [an individual committee member](#committee-membership) or [CoC event representative](#coc-representatives) to report an incident in confidence.   And now the longer version... # Conda Organization Diversity Statement The conda Organization welcomes and encourages participation in our community by people of all backgrounds and identities. We are committed to promoting and sustaining a culture that values mutual respect, tolerance, and learning, and we work together as a community to help each other live out these values. We have created this diversity statement because we believe that a diverse community is stronger, more vibrant, and produces better software and better science. A diverse community where people treat each other with respect has more potential contributors, more sources for ideas, and fewer shared assumptions that might hinder development or research. Although we have phrased the formal diversity statement generically to make it all-inclusive, we recognize that there are specific identities that are impacted by systemic discrimination and marginalization. We welcome all people to participate in the conda Organization community regardless of their identity or background. # Conda Organization Code of Conduct: Introduction & Scope This code of conduct should be honored by everyone who participates in the conda Organization community. It should be honored in any conda Organization-related activities, by anyone claiming affiliation with the conda Organization, and especially when someone is representing the conda Organization in any role (including as an event volunteer or speaker). This code of conduct applies to all spaces managed by the conda Organization, including all public and private mailing lists, issue trackers, wikis, forums, and any other communication channel used by our community. The code of conduct equally applies at conda Organization events and governs standards of behavior for attendees, speakers, volunteers, booth staff, and event sponsors. This code is not exhaustive or complete. It serves to distill our understanding of a collaborative, inclusive community culture. Please try to follow this code in spirit as much as in letter, to create a friendly and productive environment that enriches the conda Organization community. The conda Organization Code of Conduct follows below. # Standards for Behavior The conda Organization is a worldwide community. All communication should be appropriate for a professional audience including people of many different backgrounds. **Please always be kind and courteous. There's never a need to be mean or rude or disrespectful.** Thank you for helping make this a welcoming, friendly community for all. We strive to: **Be empathetic, welcoming, friendly, and patient.** We remember that the conda Organization is crafted by human beings who deserve to be treated with kindness and empathy. We work together to resolve conflict and assume good intentions. We may all experience some frustration from time to time, but we do not allow frustration to turn into a personal attack. A community where people feel uncomfortable or threatened is not a productive one. **Be collaborative.** Our work depends on the participation of many people, and in turn others depend on our work. Open source communities depend on effective and friendly collaboration to achieve their goals. **Be inquisitive.** Nobody knows everything! Asking questions early avoids many problems later, so we encourage questions, although we may direct them to the appropriate forum. We will try hard to be responsive and helpful. **Be careful in the words that we choose.** We are careful and respectful in our communication and we take responsibility for our own speech. Be kind to others. Do not insult or put down other members of the community. ## Unacceptable Behavior We are committed to making participation in this community a harassment-free experience. We will not accept harassment or other exclusionary behaviors, such as: - The use of sexualized language or imagery - Excessive profanity (please avoid curse words; people differ greatly in their sensitivity to swearing) - Posting sexually explicit or violent material - Violent or intimidating threats or language directed against another person - Inappropriate physical contact and/or unwelcome sexual attention or sexual comments - Sexist, racist, or otherwise discriminatory jokes and language - Trolling or insulting and derogatory comments - Written or verbal comments which have the effect of excluding people on the basis of membership in a specific group, including level of experience, gender, gender identity and expression, sexual orientation, disability, neurotype, personal appearance, body size, race, ethnicity, age, religion, or nationality - Public or private harassment - Sharing private content, such as emails sent privately or non-publicly, or direct message history, without the sender's consent - Continuing to initiate interaction (such as photography, recording, messaging, or conversation) with someone after being asked to stop - Sustained disruption of talks, events, or communications, such as heckling of a speaker - Publishing (or threatening to post) other people's personally identifying information ("doxing"), such as physical or electronic addresses, without explicit permission - Other unethical or unprofessional conduct - Advocating for, or encouraging, any of the above behaviors The conda Organization prioritizes marginalized people’s safety over privileged people’s comfort. The conda CoC Committee reserves the right not to act on complaints including, but not limited to: * ‘Reverse’ -isms, including ‘reverse racism,’ ‘reverse sexism,’ and ‘cisphobia’. * Reasonable communication of boundaries, such as “leave me alone,” “go away,” or “I’m not discussing this with you.” * Communicating in a ‘tone’ you don’t find congenial. * Criticizing racist, sexist, cissexist, or otherwise oppressive behavior or assumptions. ## Behavior Outside of conda Organization Spaces The CoC Committee does not influence behavior and membership in spaces outside the conda Organization. However, if you are being harassed by a member of the conda community outside our spaces, you may still report it to the CoC Committee. We will take all good-faith reports of harassment by conda community members seriously. This includes harassment outside our spaces and harassment that took place at any point in time. The CoC Committee reserves the right to exclude people from conda Organization spaces based on their past behavior, including behavior outside conda Organization spaces and behavior towards people who are not in the conda community. # Confidentiality and Public Statements to the Community The CoC Committee will keep the identity of the reporter confidential. Whenever possible, CoC cases will be reported to the community. The level of detail in reports will vary from case to case. Reports will describe at least the type of infraction that was reported, and the Committee's decision and any action taken. In most cases, the report will not include personally identifiable information. # Live Events > **If you feel your safety is in jeopardy or the situation is an emergency, we urge you to contact local law enforcement before making a report to the event's Code of Conduct committee members, [representatives](#coc-representatives), or other staff.** (In the U.S., call 911.) Live events present particular challenges: **Code of conduct reports, and consequences that stem from them, merit a thoughtful and deliberative process. Decisions and consequences matter for the reporter, the reported, and for the community at large. However, many reports, especially at live events, require rapid action to quickly address the behavior being reported.** To better support situations where immediate action may be required, these guidelines are used *during* live events: * All conda Organization events will have specific, named Code of Conduct contacts for the events. * The names and contact mechanisms for the Code of Conduct representatives will be clearly and frequently communicated to event participants. ## CoC Representatives Every conda Organization associated event will have named CoC Committee members or *CoC representatives* that are the first point of contact for that event. Who these people are will be clearly and frequently communicated to event participants. CoC approved representatives are used when there are no committee members participating in the event. ## Live Events: Reporting and Actions At conda Organization events, Code of Conduct committee members or representatives will attempt to gather and write down [information](#what-to-include-in-a-report) from anyone making a verbal report at a live event. Recording the details in writing is exceedingly important in order for us to effectively respond to reports. If event staff write down a report taken verbally, then the person making the report will be asked to review the written report for accuracy. For reports made during live events, or in any situation where urgent action is needed: * Any two (or more) event organizers, event staff, CoC Committee members or CoC representatives can decide if immediate action is to be taken and what that action is. In exceptionally dangerous situations, this decision can be made by a single person. * These rapid decisions can be reconsidered during the event as more information becomes available. * The scope of any rapid decision is limited to the current event / situation. * The report, any related information, and any decisions and consequences will be reported to the full Code of Conduct Committee as soon as possible. The full Code of Conduct Committee will then consider the report using the full timeline and processes defined below. The Committee may decide to apply consequences in other spaces beyond the space where the behavior was reported. Potential *immediate* consequences for violating the conda Organization Code of Conduct at a live event include, but are not limited to: - Warning the person to cease their behavior and that any further reports will result in sanctions - Requiring that the person avoid any interaction with, and physical proximity to, the person they are harassing for the remainder of the event - Ending a talk that violates the policy early - Not publishing the video or slides of a talk that violated the policy - Not allowing a speaker who violated the policy to give (further) talks at the event now or in the future - Immediately ending any event volunteer responsibilities and privileges the reported person holds - Expelling the person from the event without a refund - Requiring that the person immediately leave the event and not return - Any other response that the CoC members, representatives, or event staff deem necessary and appropriate to the situation # Reporting Guidelines If you believe someone is violating the code of conduct, please report this in a timely manner. Code of conduct violations reduce the value of the community for everyone. The conda Code of Conduct (CoC) Committee and the conda Organization take reports of misconduct very seriously and are committed to preserving and maintaining the welcoming nature of our community. > [!NOTE] > You are also encouraged to reach out to the conda Code of Conduct (CoC) Committee if you want clarification on something, if you notice some borderline behavior, or just have a concern. Send us a note at [conduct@conda.org](mailto:conduct@conda.org). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The conda CoC Committee commits to maintaining confidentiality with regard to the reporter of an incident. For possibly unintentional breaches of the code of conduct, you may want to respond to the person and point out this code of conduct (either in public or in private, whatever is most appropriate). If you would prefer not to do that, please report the issue to the conda CoC Committee directly. Take care of each other. Alert someone if you notice a dangerous situation, someone in distress, or violations of this code of conduct, even if they seem inconsequential. ## How to Submit a Report The CoC Committee is committed to promptly addressing any reported issues. If you have experienced or witnessed behavior that violates the conda Organization Code of Conduct, please let us know. You can report an incident * via the **[Incident Reporting Form](https://form.jotform.com/221527028480048)** * via email: [conduct@conda.org](mailto:conduct@conda.org) * contact [an individual committee member](#committee-membership) or [CoC event representative](#coc-representatives) to report an incident in confidence. Reports submitted via the form or committee email address are sent to the [full conda Code of Conduct Committee](#committee-membership). ## What to Include in a Report Our ability to address any code of conduct breaches in a timely and effective manner is impacted by the amount of information you can provide, so, we ask you to include as much of the following information as you can**: - **Your contact info** (so we can get in touch with you if we need to follow up). This will be kept confidential. You can also file a report [anonymously](#anonymous-reporting). - The **approximate time and location of the incident** (please be as specific as possible). - **Identifying information** (e.g. name, nickname, screen name, physical description) of the individual whose behavior is being reported. - **Description of the behavior** (if reporting harassing language, please be specific about the words used), **your account of what happened**, and any available **supporting records** (e.g. email, GitHub issue, screenshots, etc.). - **Description of the circumstances/context** surrounding the incident. - Let us know **if the incident is ongoing**, and/or if this is part of an ongoing pattern of behavior. - Names and contact info, if possible, of **anyone else who witnessed** or was involved in this incident. (Did anyone else observe the incident?) - **Any other relevant information** you believe we should have. ## Anonymous Reporting The reporting form supports anonymous incident reporting. Anonymous reporting works best when the behavior happened in a public space and was witnessed by many. If an incident is reported anonymously and was not witnessed by others, then the committee may be limited in what actions it can take and what it can report to the larger community. Nevertheless, the CoC Committee is still interested in receiving these reports. They are helpful when determining what we need to address as a community, and when looking for evidence of repeated behavior. ## Conflicts of Interest Committee members are expected to recuse themselves if they have a conflict of interest, and are required to recuse themselves if they are the accused or the target of the reported behavior. In addition, the CoC Committee can [vote](#voting-and-decision-making) to remove a committee member from a case, if the committee feels that the member has a conflict of interest. This [vote](#voting-and-decision-making) requires a simple majority. If you are concerned about making a report that will be read by all committee members, you are strongly encouraged to contact [individual committee members](#committee-membership) directly. # Enforcement: What Happens After a Report is Filed? ## Acknowledgment and Responding to Immediate Needs CoC Committee members and/or event staff will attempt to ensure your safety and help with any immediate needs. The CoC Committee will make every effort to **acknowledge receipt within 24 hours** (and we'll aim for much more quickly than that). ## Reviewing the Report The CoC Committee will make all efforts to **review the incident within three days** and determine: - Whether this is an ongoing situation, or if there is a threat to anyone's physical safety - What happened - Whether this event constitutes a code of conduct violation - Who the bad actor was, if any ## Contacting the Person Reported After the CoC Committee has had time to review and discuss the report, someone will attempt to contact the person who is the subject of the report to inform them of what has been reported about them. We will then ask that person for their account of what happened. ## Response and Potential Consequences Once the CoC Committee has completed our investigation of the report, we will make a decision as to how to respond. The person making a report will not normally be consulted as to the proposed resolution of the issue, except insofar as we need to understand how to help them feel safe. Potential consequences for violating the conda Organization code of conduct include: - Nothing (if we determine that no violation occurred) - Private feedback or reprimand from the CoC Committee to the individual(s) involved - Warning the person to cease their behavior and that any further reports will result in sanctions - A public announcement that an incident occurred - Mediation (only if both reporter and reportee agree) - An imposed vacation (e.g. asking someone to "take a week off" from a mailing list) - A permanent or temporary ban from some or all the conda Organization spaces (mailing lists, GitHub repos, in-person events, etc.) - Assistance to the complainant with a report to other bodies, for example, institutional offices or appropriate law enforcement agencies - Removing a person from the conda Organization membership or other formal affiliation - Publishing an account of the harassment and calling for the resignation of the alleged harasser from their responsibilities (may be called for if the person is an event leader, or refuses to stand aside from the conflict of interest, or similar) - Any other response that the CoC Committee deems necessary and appropriate to the situation No one espousing views or values contrary to the standards of our code of conduct will be permitted to hold any position representing the conda Organization, including volunteer positions. The CoC Committee has the right and responsibility to remove, edit, or reject comments, commits, code, website edits, issues, and other contributions that are not aligned with this code of conduct. We aim to **respond within one week** to the original reporter with either a resolution or an explanation of why the situation is not yet resolved. We will contact the person who is the subject of the report to let them know what actions will be taken as a result of the report, if any. Our policy is to make sure that everyone aware of the initial incident is also made aware that official action has been taken, while still respecting the privacy of individuals. In addition, we will also usually [notify the community](#confidentiality-and-public-statements-to-the-community) that an incident has been reported, what type of incident it was, and what the response was, again respecting the privacy of individuals. ## Appealing a Decision To appeal a decision of the CoC Committee, contact the [Committee Co-Chairs](#committee-membership), with your appeal. Please include as much detail as possible about why you are appealing the decision. The Co-Chairs will review the appeal, possibly consulting with the full Committee, and then issue a decision. # Timeline Summary: | Time | Event | Details | | ---- | ---- | ---- | | Within 24 Hours | Acknowledge | The CoC Committee will make every effort to **acknowledge receipt of a report within 24 hours**. | | Within 3 Days | Review | The CoC Committee aims to **review the incident within three days**. | | Within 1 Week | Resolve | We will **respond within one week** to the original reporter with either a resolution or an explanation of why the situation is not yet resolved. | # Voting and Decision Making Committee votes and decisions require both a minimum quorum size for the vote to be counted, and then a minimum percentage of cast affirmative votes to pass. Except where otherwise noted, votes require a quorum and a simple majority to pass: * Minimum Quorum: * More than 50% of eligible committee members must vote. * Eligibile members do not include those excluded because of [conflicts of interest](#conflicts-of-interest). * Affirmative vote threshold: * More than 50% of the votes cast need to be affirmative to take action. # Committee Membership You can reach the entire CoC Committee by emailing [conduct@conda.org](mailto:conduct@conda.org). | Name | Employer / Funding | Steering Council Member | Current Term Ends | | ---- | ---- | ---- | --- | | [Eric Dill](https://github.com/ericdill) | [Anaconda](https://anaconda.com/) | | 2026-07-01 | | [Dasha Gurova](https://github.com/dashagurova) | [Anaconda](https://anaconda.com/) | | 2026-07-01 | | [Bianca Henderson](https://github.com/beeankha) | [Red Hat](https://redhat.com/) | | 2026-07-01 | | [Katherine Kinnaman](https://github.com/kathatherine) | [Anaconda](https://anadonda.com/) | | 2026-07-01 | | [Mahe Iram Khan](https://github.com/ForgottenProgramme) | [Anaconda](https://anaconda.com/) | | 2025-07-01 | | [Ken Odegard](https://github.com/kenodegard) | [Anaconda](https://anaconda.com/) | | 2025-07-01 | | [Crystal Soja](https://github.com/csoja), Co-Chair | [Anaconda](https://anaconda.com/) | | 2025-07-01 | | [Jaime Rodríguez-Guerra](https://github.com/jaimergp), Co-Chair | [Quansight](https://quansight.com/) | yes | 2025-07-01 | # Terms and New Members * Committee members are appointed for two year terms. Committee members can choose to renew their memberships. * Committee members can resign before their term ends. * Committee members can also be removed by a [simple majority vote](#voting-and-decision-making) from their fellow committee members. * New committee members are added by a simple majority vote as well. # Eligibility Anyone from the community who is interested and able to do CoC Committee work is eligible to be nominated for the committee. New committee members can be nominated by any community member, including nominating themselves. ## Shared Funding Unlike the Steering Council, we are not limiting the number of Committee members who share a common source of funding. However, if a report involves someone who shares funding with CoC Committee members, then the remainder of the committee may vote to exclude some or all Committee members with that same funding, even if that excludes a majority of the CoC Committee. This should be done only if the separately funded Committee members feel that the common funding is interfering with decision making. Note: This requires tracking the funding sources of CoC Committee members. ## Overlap with Steering Council Membership Committee members can also be on the conda Organization Steering Council. However, Steering Council members have to make up less than 50% of the Code of Conduct Committee. # Updating this Code of Conduct The conda Organization's Code of Conduct can be updated by a [simple majority vote](#voting-and-decision-making) of the CoC Committee. # License This code of conduct is based on the [NumFOCUS code of conduct template](https://github.com/numfocus/numfocus/blob/8759e21481552f213489e3718979ccecf68e9ead/manual/numfocus-coc.md) as it existed on 2022/03/08 (which is the 2019/11/20 version). Several added sections are based on the [Galaxy Community Code of Conduct](https://galaxyproject.org/community/coc/). The NumFOCUS code of conduct template was itself adapted from numerous sources, including the [*Geek Feminism wiki, created by the Ada Initiative and other volunteers, which is under a Creative Commons Zero license*](http://geekfeminism.wikia.com/wiki/Conference_anti-harassment/Policy), the [*Contributor Covenant version 1.2.0*](http://contributor-covenant.org/version/1/2/0/), the [*Bokeh Code of Conduct*](https://github.com/bokeh/bokeh/blob/master/CODE_OF_CONDUCT.md), the [*SciPy Code of Conduct*](https://github.com/jupyter/governance/blob/master/conduct/enforcement.md), the [*Carpentries Code of Conduct*](https://docs.carpentries.org/topic_folders/policies/code-of-conduct.html#enforcement-manual), and the [*NeurIPS Code of Conduct*](https://neurips.cc/public/CodeOfConduct). **The conda Organization Code of Conduct is licensed under the [Creative Commons Attribution 3.0 Unported License](https://creativecommons.org/licenses/by/3.0/).** conda-conda-package-streaming-dce4b94/HOW_WE_USE_GITHUB.md000066400000000000000000000550051503052622600230760ustar00rootroot00000000000000 [conda-org]: https://github.com/conda [sub-team]: https://github.com/conda-incubator/governance#sub-teams [project-planning]: https://github.com/orgs/conda/projects/2/views/11 [project-sorting]: https://github.com/orgs/conda/projects/2/views/11 [project-support]: https://github.com/orgs/conda/projects/2/views/12 [project-backlog]: https://github.com/orgs/conda/projects/2/views/13 [project-in-progress]: https://github.com/orgs/conda/projects/2/views/14 [docs-toc]: https://github.blog/changelog/2021-04-13-table-of-contents-support-in-markdown-files/ [docs-actions]: https://docs.github.com/en/actions [docs-saved-reply]: https://docs.github.com/en/get-started/writing-on-github/working-with-saved-replies/creating-a-saved-reply [docs-commit-signing]: https://docs.github.com/en/authentication/managing-commit-signature-verification/signing-commits [infrastructure]: https://github.com/conda/infrastructure [workflow-sync]: https://github.com/conda/infrastructure/blob/main/.github/workflows/sync.yml [workflow-update]: https://github.com/conda/conda-package-streaming/blob/main/.github/workflows/update.yml [labels-global]: https://github.com/conda/infrastructure/blob/main/.github/global.yml [workflow-cla]: https://github.com/conda/conda-package-streaming/blob/main/.github/workflows/cla.yml [workflow-issues]: https://github.com/conda/conda-package-streaming/blob/main/.github/workflows/issues.yml [workflow-labels]: https://github.com/conda/conda-package-streaming/blob/main/.github/workflows/labels.yml [workflow-lock]: https://github.com/conda/conda-package-streaming/blob/main/.github/workflows/lock.yml [workflow-project]: https://github.com/conda/conda-package-streaming/blob/main/.github/workflows/project.yml [workflow-stale]: https://github.com/conda/conda-package-streaming/blob/main/.github/workflows/stale.yml [labels-local]: https://github.com/conda/conda-package-streaming/blob/main/.github/labels.yml [labels-page]: https://github.com/conda/conda-package-streaming/labels # How We Use GitHub This document seeks to outline how we as a community use GitHub Issues to track bugs and feature requests while still catering to development practices & project management (_e.g._, release cycles, feature planning, priority sorting, etc.). **Topics:** - [What is "Issue Sorting"?](#what-is-issue-sorting) - [Issue Sorting Procedures](#issue-sorting-procedures) - [Commit Signing](#commit-signing) - [Types of Issues](#types-of-issues) - [Standard Issue](#standard-issue) - [Epics](#epics) - [Spikes](#spikes) - [Working on Issues](#working-on-issues) > [!NOTE] > This document is written in the style of an FAQ. For easier navigation, use [GitHub's table of contents feature][docs-toc]. ## What is "Issue Sorting"? > [!NOTE] > "Issue sorting" is similar to that of "triaging", but we've chosen to use different terminology because "triaging" is a word related to very weighty topics (_e.g._, injuries and war) and we would like to be sensitive to those connotations. Additionally, we are taking a more "fuzzy" approach to sorting (_e.g._, severities may not be assigned, etc.). "Issue Sorting" refers to the process of assessing the priority of incoming issues. Below is a high-level diagram of the flow of issues: ```mermaid flowchart LR subgraph flow_sorting [Issue Sorting] board_sorting{{Sorting}} board_support{{Support}} board_sorting<-->board_support end subgraph flow_refinement [Refinement] board_backlog{{Backlog}} board_backlog-- refine -->board_backlog end subgraph flow_progress [In Progress] board_progress{{In Progress}} end state_new(New Issues) state_closed(Closed) state_new-->board_sorting board_sorting-- investigated -->board_backlog board_sorting-- duplicates, off-topic -->state_closed board_support-- resolved, unresponsive -->state_closed board_backlog-- pending work -->board_progress board_backlog-- resolved, irrelevant -->state_closed board_progress-- resolved -->state_closed ``` ### Why sort issues? At the most basic "bird's eye view" level, sorted issues will fall into the category of four main priority levels: - Do now - Do sometime - Provide user support - Never do (_i.e._, close) At its core, sorting enables new issues to be placed into these four categories, which helps to ensure that they will be processed at a velocity similar to or exceeding the rate at which new issues are coming in. One of the benefits of actively sorting issues is to avoid engineer burnout and to make necessary work sustainable; this is done by eliminating a never-ending backlog that has not been reviewed by any maintainers. There will always be broad-scope design and architecture implementations that the maintainers will be interested in pursuing; by actively organizing issues, the sorting engineers will be able to more easily track and tackle both specific and big-picture goals. ### Who does the sorting? Sorting engineers are a conda governance [sub-team][sub-team]; they are a group of community members who are responsible for making decisions regarding closing issues and setting feature work priorities, among other sorting-related tasks. ### How do items show up for sorting? New issues that are opened in any of the repositories in the [conda GitHub organization][conda-org] will show up in the "Sorting" tab of the [Planning project][project-planning]. There are two [GitHub Actions][docs-actions] workflows utilized for this purpose; [`.github/workflows/issues.yml`][workflow-issues] and [`.github/workflows/project.yml`][workflow-project]. The GitHub workflows in the [`conda/infrastructure`][infrastructure] repository are viewed as canonical; the [`.github/workflows/sync.yml` workflow][workflow-sync] pushes any modifications to other repositories from there and individual repositories can pull additional files using the [`.github/workflows/update.yml`][workflow-update] workflow. ### What is done about the issues in the "Sorting" tab? Issues in the ["Sorting" tab of the project board][project-sorting] are considered ready for the following procedures: - Mitigation via short-term workarounds and fixes - Redirection to the correct project - Determining if support can be provided for errors and questions - Closing out of any duplicate/off-topic issues The sorting engineers on rotation are not seeking to _resolve_ issues that arise. Instead, the goal is to understand the issue and to determine whether it is legitimate, and then to collect as much relevant information as possible so that the maintainers can make an informed decision about the appropriate resolution schedule. Issues will remain in the ["Sorting" tab][project-sorting] as long as the issue is in an investigatory phase (_e.g._, querying the user for more details, asking the user to attempt other workarounds, other debugging efforts, etc.) and are likely to remain in this state the longest, but should still be progressing over the course of 1-2 weeks. For more information on the sorting process, see [Issue Sorting Procedures](#issue-sorting-procedures). ### When do items move out of the "Sorting" tab? Items move out of the ["Sorting" tab][project-sorting] once the investigatory phase described in [What is done about the issues in the "Sorting" tab?](#what-is-done-about-the-issues-in-the-sorting-tab) has concluded and the sorting engineer has enough information to make a decision about the appropriate resolution schedule for the issue. The additional tabs in the project board that the issues can be moved to include the following: - **"Support"** - Any issue in the ["Support" tab of the Planning board][project-support] is a request for support and is not a feature request or a bug report. Add the https://github.com/conda/conda-package-streaming/labels/type%3A%3Asupport label to move an issue to this tab. - **"Backlog"** - The issue has revealed a bug or feature request. We have collected enough details to understand the problem/request and to reproduce it on our own. These issues have been moved into the [Backlog tab of the Planning board][project-backlog] at the end of the sorting rotation during Refinement. Add the https://github.com/conda/conda-package-streaming/labels/backlog label to move an issue to this tab. - **"Closed"** - The issue was closed due to being a duplicate, being redirected to a different project, was a user error, a question that has been resolved, etc. ### Where do work issues go after being sorted? Once issues are deemed ready to be worked on, they will be moved to the ["Backlog" tab of the Planning board][project-backlog]. Once actively in progress, the issues will be moved to the ["In Progress" tab of the Planning board][project-in-progress] and then closed out once the work is complete. ### What is the purpose of having a "Backlog"? Issues are "backlogged" when they have been sorted but not yet earmarked for an upcoming release. ### What automation procedures are currently in place? Global automation procedures synced out from the [`conda/infrastructure`][infrastructure] repo include: - [Marking of issues and pull requests as stale][workflow-stale], resulting in: - issues marked as https://github.com/conda/conda-package-streaming/labels/type%3A%3Asupport being labeled stale after 21 days of inactivity and being closed after 7 further days of inactivity (that is, closed after 30 inactive days total) - all other inactive issues (not labeled as https://github.com/conda/conda-package-streaming/labels/type%3A%3Asupport being labeled stale after 365 days of inactivity and being closed after 30 further days of inactivity (that is, closed after an approximate total of 1 year and 1 month of inactivity) - all inactive pull requests being labeled stale after 365 days of inactivity and being closed after 30 further days of inactivity (that is, closed after an approximate total of 1 year and 1 month of inactivity) - [Locking of closed issues and pull requests with no further activity][workflow-lock] after 365 days - [Adding new issues and pull requests to the respective project boards][workflow-project] - [Indicating an issue is ready for the sorting engineer's attention][workflow-issues] by toggling https://github.com/conda/conda-package-streaming/labels/pending%3A%3Afeedback with https://github.com/conda/conda-package-streaming/labels/pending%3A%3Asupport after a contributor leaves a comment - [Verifying that contributors have signed the CLA][workflow-cla] before allowing pull requests to be merged; if the contributor hasn't signed the CLA previously, merging is be blocked until a manual review can be done - [Syncing out templates, labels, workflows, and documentation][workflow-sync] from [`conda/infrastructure`][infrastructure] to the other repositories ## Issue Sorting Procedures ### How are issues sorted? Issues in the ["Sorting" tab of the Planning board][project-sorting] are reviewed by issue sorting engineers, who take rotational sorting shifts. In the process of sorting issues, engineers label the issues and move them to the other tabs of the project board for further action. Issues that require input from multiple members of the sorting team will be brought up during refinement meetings in order to understand how those particular issues fit into the short- and long-term roadmap. These meetings enable the sorting engineers to get together to collectively prioritize issues, earmark feature requests for specific future releases (versus a more open-ended backlog), tag issues as ideal for first-time contributors, as well as whether or not to close/reject specific feature requests. ### How does labeling work? Labeling is a very important means for sorting engineers to keep track of the current state of an issue with regards to the asynchronous nature of communicating with users. Utilizing the proper labels helps to identify the severity of the issue as well as to quickly understand the current state of a discussion. Each label has an associated description that clarifies how the label should be used. Hover on the label to see its description. Label colors are used to distinguish labels by category. Generally speaking, labels with the same category are considered mutually exclusive, but in some cases labels sharing the same category can occur concurrently, as they indicate qualifiers as opposed to types. For example, we may have the following types, https://github.com/conda/conda-package-streaming/labels/type%3A%3Abug, https://github.com/conda/conda-package-streaming/labels/type%3A%3Afeature, and https://github.com/conda/conda-package-streaming/labels/type%3A%3Adocumentation, where for any one issue there would be _at most_ **one** of these to be defined (_i.e._ an issue should not be a bug _and_ a feature request at the same time). Alternatively, with issues involving specific operating systems (_i.e._, https://github.com/conda/conda-package-streaming/labels/os%3A%3Alinux, https://github.com/conda/conda-package-streaming/labels/os%3A%3Amacos, and https://github.com/conda/conda-package-streaming/labels/os%3A%3Awindows), an issue could be labeled with one or more, depending on the system(s) the issue occurs on. Please note that there are also automation policies in place that are affected by labeling. For example, if an issue is labeled as https://github.com/conda/conda-package-streaming/labels/type%3A%3Asupport, that issue will be marked https://github.com/conda/conda-package-streaming/labels/stale after 21 days of inactivity and auto-closed after seven more days without activity (30 inactive days total), which is earlier than issues without this label. See [What automation procedures are currently in place?](#what-automation-procedures-are-currently-in-place) for more details. ### What labels are required for each issue? At minimum, both `type` and `source` labels should be specified on each issue before moving it from the "Sorting" tab to the "Backlog" tab. All issues that are bugs should also be tagged with a `severity` label. The `type` labels are exclusive of each other: each sorted issue should have exactly one `type` label. These labels give high-level information on the issue's classification (_e.g._, bug, feature, tech debt, etc.) The `source` labels are exclusive of each other: each sorted issue should have exactly one `source` label. These labels give information on the sub-group to which the issue's author belongs (_e.g._, a partner, a frequent contributor, the wider community, etc.). Through these labels, maintainers gain insight into how well we're meeting the needs of various groups. The `severity` labels are exclusive of each other and, while required for the https://github.com/conda/conda-package-streaming/labels/type%3A%bug label, they can also be applied to other types to indicate demand or need. These labels help us to prioritize our work. Severity is not the only factor for work prioritization, but it is an important consideration. Please review the descriptions of the `type`, `source`, and `severity` labels on the [labels page][labels-page] prior to use. ### How are new labels defined? Labels are defined using a scoped syntax with an optional high-level category (_e.g._, `source`, `tag`, `type`, etc.) and a specific topic, much like the following: - `[topic]` - `[category::topic]` - `[category::topic-phrase]` This syntax helps with issue sorting enforcement, as it helps to ensure that sorted issues are, at minimum, categorized by type and source. There are a number of labels that have been defined for the different repositories. In order to create a streamlined sorting process, label terminologies are standardized using similar (if not the same) labels. ### How are new labels added? New **global** labels (_i.e._, labels that apply equally to all repositories within the conda GitHub organization) are added to [`conda/infrastructure`][infrastructure]'s [`.github/global.yml` file][labels-global]; new **local** labels (_i.e._, labels specific to particular repositories) are added to each repository's [`.github/labels.yml` file][labels-local]. All new labels should follow the labeling syntax described in ["How are new labels defined?"](#how-are-new-labels-defined). Global labels are combined with any local labels and these aggregated labels are used by the [`.github/workflows/labels.yml` workflow][workflow-labels] to synchronize the labels available for the repository. ### Are there any templates to use as responses for commonly-seen issues? Some of the same types of issues appear regularly (_e.g._, issues that are duplicates of others, issues that should be filed in the Anaconda issue tracker, errors that are due to a user's specific setup/environment, etc.). Below are some boilerplate responses for the most commonly-seen issues to be sorted:
Duplicate Issue

This is a duplicate of [link to primary issue]; please feel free to continue the discussion there.
> **Warning** > Apply the https://github.com/conda/conda-package-streaming/labels/duplicate label to the issue being closed and https://github.com/conda/conda-package-streaming/labels/duplicate%3A%3Aprimary to the original issue.
Anaconda Products
Thank you for filing this issue! Unfortunately, this is off-topic for this repo because it is related to an Anaconda product.
If you are encountering issues with Anaconda products or services, you have several options for receiving community
support:

- [Anaconda community forums](https://community.anaconda.cloud)
- [Anaconda issue tracker on GitHub](https://github.com/ContinuumIO/anaconda-issues/issues)
> **Warning** > Apply the https://github.com/conda/conda-package-streaming/labels/off-topic label to these issues before closing them out.
General Off Topic
Unfortunately, this issue is outside the scope of support we offer via GitHub or is not directly related to this project.
Community support can be found elsewhere, though, and we encourage you to explore the following options:

- [Conda discourse forum](https://conda.discourse.group/)
- [Community chat channels](https://conda.org/community#chat)
- [Stack Overflow posts tagged "conda"](https://stackoverflow.com/questions/tagged/conda)
> **Warning** > Apply the https://github.com/conda/conda-package-streaming/labels/off-topic label to these issues before closing them out.
In order to not have to manually type or copy/paste the above repeatedly, note that it's possible to add text for the most commonly-used responses via [GitHub's "Add Saved Reply" option][docs-saved-reply]. ## Commit Signing For all maintainers, we require commit signing and strongly recommend it for all others wishing to contribute. More information about how to set this up within GitHub can be found here: - [GitHub's signing commits docs][docs-commit-signing] ## Types of Issues ### Standard Issue TODO ### Epics TODO ### Spikes #### What is a spike? "Spike" is a term that is borrowed from extreme programming and agile development. They are used when the **outcome of an issue is unknown or even optional**. For example, when first coming across a problem that has not been solved before, a project may choose to either research the problem or create a prototype in order to better understand it. Additionally, spikes represent work that **may or may not actually be completed or implemented**. An example of this are prototypes created to explore possible solutions. Not all prototypes are implemented and the purpose of creating a prototype is often to explore the problem space more. For research-oriented tasks, the end result of this research may be that a feature request simply is not viable at the moment and would result in putting a stop to that work. Finally, spikes are usually **timeboxed**. However, given the open source/volunteer nature of our contributions, we do not enforce this for our contributors. When a timebox is set, this means that we are limiting how long we want someone to work on said spike. We do this to prevent contributors from falling into a rabbit hole they may never return from. Instead, we set a time limit to perform work on the spike and then have the assignee report back. If the tasks defined in the spike have not yet been completed, a decision is made on whether it makes sense to perform further work on the spike. #### When do I create a spike? A spike should be created when we do not have enough information to move forward with solving a problem. That simply means that, whenever we are dealing with unknowns or processes the project team has never encountered before, it may be useful for us to create a spike. In day-to-day work, this kind of situation may appear when new bug reports or feature requests come in that deal with problems or technologies that the project team is unfamiliar with. All issues that the project team has sufficient knowledge of should instead proceed as regular issues. #### When do I not create a spike? Below are some common scenarios where creating a spike is not appropriate: - Writing a technical specification for a feature we know how to implement - Design work that would go into drafting how an API is going to look and function - Any work that must be completed or is not optional ## Working on Issues ### How do I assign myself to an issue I am actively reviewing? If you do **not** have permissions, please indicate that you are working on an issue by leaving a comment. Someone who has permissions will assign you to the issue. If two weeks have passed without a pull request or an additional comment requesting information, you may be removed from the issue and the issue reassigned. If you are assigned to an issue but will not be able to continue work on it, please comment to indicate that you will no longer be working on it and press `unassign me` next to your username in the `Assignees` section of the issue page (top right). If you **do** have permissions, please assign yourself to the issue by pressing `assign myself` under the `Assignees` section of the issue page (top right). conda-conda-package-streaming-dce4b94/LICENSE000066400000000000000000000051561503052622600207550ustar00rootroot00000000000000BSD 3-Clause License Copyright (c) 2022, Anaconda, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lazy_wheel.py: Copyright (c) 2008-present The pip developers (see https://github.com/pypa/pip/blob/main/AUTHORS.txt file) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. conda-conda-package-streaming-dce4b94/Makefile000066400000000000000000000011741503052622600214040ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = docs BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) conda-conda-package-streaming-dce4b94/README.md000066400000000000000000000061351503052622600212250ustar00rootroot00000000000000# conda-package-streaming [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/conda/conda-package-streaming/main.svg)](https://results.pre-commit.ci/latest/github/conda/conda-package-streaming/main) An efficient library to read from new and old format .conda and .tar.bz2 conda packages. Download conda metadata from packages without transferring entire file. Get metadata from local `.tar.bz2` packages without reading entire files. Uses enhanced pip `lazy_wheel` to fetch a file out of `.conda` with no more than 3 range requests, but usually 2. Uses `tar = tarfile.open(fileobj=...)` to stream remote `.tar.bz2`. Closes the HTTP request once desired files have been seen. # Quickstart The basic API yields (tarfile, member) tuples from conda files as tarfile is needed to extract member. Note the `.tar.bz2` format yields all members, not just `info/`, from `stream_conda_info` / `stream_conda_component`, while the `.conda` format yields members from the requested inner archive — allowing the caller to decide when to stop reading. From a url, ```python from conda_package_streaming.url import stream_conda_info # url = (ends with .conda or .tar.bz2) for tar, member in stream_conda_info(url): if member.name == "info/index.json": index_json = json.load(tar.extractfile(member)) break ``` From s3, ```python client = boto3.client("s3") from conda_package_streaming.s3 import stream_conda_info # key = (ends with .conda or .tar.bz2) for tar, member in stream_conda_info(client, bucket, key): if member.name == "info/index.json": index_json = json.load(tar.extractfile(member)) break ``` From a filename, ```python from conda_package_streaming import package_streaming # filename = (ends with .conda or .tar.bz2) for tar, member in package_streaming.stream_conda_info(filename): if member.name == "info/index.json": index_json = json.load(tar.extractfile(member)) break ``` From a file-like object, ```python from contextlib import closing from conda_package_streaming.url import conda_reader_for_url from conda_package_streaming.package_streaming import stream_conda_component filename, conda = conda_reader_for_url(url) # file object must be seekable for `.conda` format, but merely readable for `.tar.bz2` with closing(conda): for tar, member in stream_conda_component(filename, conda, component="info"): if member.name == "info/index.json": index_json = json.load(tar.extractfile(member)) break ``` If you need the entire package, download it first and use the file-based APIs. The URL-based APIs are more efficient if you only need to access package metadata. # Package goals * Extract conda packages (both formats) * Easy to install from pypi or conda * Do the least amount of I/O possible (no temporary files, transfer partial packages) * Open files from the network / standard HTTP / s3 * Continue using conda-package-handling to create .conda packages # Generating documentation Uses markdown, furo theme. Requires newer mdit-py-plugins. `pip install conda-package-streaming[docs]` One time: `sphinx-apidoc -o docs .` conda-conda-package-streaming-dce4b94/conda.recipe/000077500000000000000000000000001503052622600222735ustar00rootroot00000000000000conda-conda-package-streaming-dce4b94/conda.recipe/meta.yaml000066400000000000000000000023301503052622600241030ustar00rootroot00000000000000{% set name = "conda-package-streaming" %} {% set version_match = load_file_regex( load_file="conda_package_streaming/__init__.py", regex_pattern='^__version__ = "(.+)"') %} {% set version = version_match[1] %} package: name: {{ name|lower }} version: {{ version }} source: git_url: ../ # url: https://github.com/conda/conda-package-streaming/archive/refs/tags/v{{ version }}.tar.gz # sha256: 60a064dcb6adf775362339ffc8728320d89115c0f3870f2bb43fd368240a3205 build: script: {{ PYTHON }} -m pip install --no-build-isolation . -vv number: 0 noarch: python requirements: host: - flit-core - python >=3.7 - pip run: - zstandard >=0.15 - python >=3.7 # allow optional 'requests' test: imports: - conda_package_streaming.url commands: - pip check requires: - pip - requests about: home: https://github.com/conda/conda-package-streaming summary: An efficient library to read from new and old format .conda and .tar.bz2 conda packages. license: BSD-3-Clause license_family: BSD license_file: LICENSE doc_url: https://conda.github.io/conda-package-streaming/ dev_url: https://github.com/conda/conda-package-streaming extra: recipe-maintainers: - dholth conda-conda-package-streaming-dce4b94/conda_package_streaming/000077500000000000000000000000001503052622600245515ustar00rootroot00000000000000conda-conda-package-streaming-dce4b94/conda_package_streaming/__init__.py000066400000000000000000000000271503052622600266610ustar00rootroot00000000000000__version__ = "0.12.0" conda-conda-package-streaming-dce4b94/conda_package_streaming/create.py000066400000000000000000000137761503052622600264040ustar00rootroot00000000000000""" Tools for creating ``.conda``-format archives. Uses ``tempfile.SpooledTemporaryFile`` to buffer ``pkg-*.tar`` and ``info-*.tar``, then compress directly into an open `ZipFile` at the end. `SpooledTemporaryFile` buffers the first 10MB of the package and its metadata in memory, but writes out to disk for larger packages. Uses more disk space than ``conda-package-handling`` (temporary uncompressed tarballs of the package contents) but accepts streams instead of just files-on-the-filesystem. """ from __future__ import annotations import json import shutil import tarfile import tempfile import zipfile from collections.abc import Iterator from contextlib import contextmanager from pathlib import Path from typing import Callable import zstandard # increase to reduce speed and increase compression (levels above 19 use much # more memory) ZSTD_COMPRESS_LEVEL = 19 # increase to reduce compression and increase speed ZSTD_COMPRESS_THREADS = 1 CONDA_PACKAGE_FORMAT_VERSION = 2 # Account for growth from "2 GB of /dev/urandom" to not exceed ZIP64_LIMIT after # compression CONDA_ZIP64_LIMIT = zipfile.ZIP64_LIMIT - (1 << 18) - 1 def anonymize(tarinfo: tarfile.TarInfo): """ Pass to ``tarfile.add(..., filter=anonymize)`` to anonymize uid/gid. Does not anonymize mtime or any other field. """ tarinfo.uid = tarinfo.gid = 0 tarinfo.uname = tarinfo.gname = "" return tarinfo class CondaTarFile(tarfile.TarFile): """ Subclass of :external+python:py:class:`tarfile.TarFile` that adds members to a second ``info`` tar if they match ``is_info(name)``. Create this with ``conda_builder(...)`` which sets up the component archives, then wraps them into a ``.conda`` on exit. Only useful for creating, not extracting ``.conda``. """ info_tar: tarfile.TarFile is_info: Callable def __init__( self, *args, info_tar: tarfile.TarFile, is_info=lambda name: name.startswith("info/"), **kwargs, ): super().__init__(*args, **kwargs) self.info_tar = info_tar self.is_info = is_info def addfile(self, tarinfo, fileobj=None): """ Add the TarInfo object ``tarinfo`` to the archive. If ``fileobj`` is given, it should be a binary file, and tarinfo.size bytes are read from it and added to the archive. You can create TarInfo objects directly, or by using ``gettarinfo()``. If ``self.is_info(tarinfo.name)`` returns ``True``, add ``tarinfo`` to ``self.info_tar`` instead. """ if self.is_info(tarinfo.name): return self.info_tar.addfile(tarinfo, fileobj=fileobj) else: return super().addfile(tarinfo, fileobj) @contextmanager def conda_builder( stem, path, *, compressor: Callable[ [], zstandard.ZstdCompressor ] = lambda: zstandard.ZstdCompressor( level=ZSTD_COMPRESS_LEVEL, threads=ZSTD_COMPRESS_THREADS ), is_info: Callable[[str], bool] = lambda filename: filename.startswith("info/"), encoding="utf-8", ) -> Iterator[CondaTarFile]: """ Produce a ``TarFile`` subclass used to build a ``.conda`` package. The subclass delegates ``addfile()`` to the ``info-`` component when ``is_info`` returns True. When the context manager exits, ``{path}/{stem}.conda`` is written with the component tar archives. Args: stem: output filename without extension path: destination path for transmuted .conda package compressor: A function that creates instances of ``zstandard.ZstdCompressor()``. encoding: passed to TarFile constructor. Keep default "utf-8" for valid .conda. Yields: ``CondaTarFile`` """ output_path = Path(path, f"{stem}.conda") with ( tempfile.SpooledTemporaryFile() as info_file, tempfile.SpooledTemporaryFile() as pkg_file, ): with ( tarfile.TarFile(fileobj=info_file, mode="w", encoding=encoding) as info_tar, CondaTarFile( fileobj=pkg_file, mode="w", info_tar=info_tar, is_info=is_info, encoding=encoding, ) as pkg_tar, ): # If we wanted to compress these at a low setting to save temporary # space, we could insert a file object that counts bytes written in # front of a zstd (level between 1..3) compressor. yield pkg_tar info_tar.close() pkg_tar.close() info_size = info_file.tell() pkg_size = pkg_file.tell() info_file.seek(0) pkg_file.seek(0) with zipfile.ZipFile( output_path, "x", # x to not append to existing compresslevel=zipfile.ZIP_STORED, ) as conda_file: # Use a maximum of one Zstd compressor, stream_writer at a time to save # # memory. data_compress = compressor() pkg_metadata = {"conda_pkg_format_version": CONDA_PACKAGE_FORMAT_VERSION} conda_file.writestr("metadata.json", json.dumps(pkg_metadata)) with ( conda_file.open( f"pkg-{stem}.tar.zst", "w", force_zip64=(pkg_size > CONDA_ZIP64_LIMIT), ) as pkg_file_zip, data_compress.stream_writer( pkg_file_zip, size=pkg_size, closefd=False ) as pkg_stream, ): shutil.copyfileobj(pkg_file._file, pkg_stream) with ( conda_file.open( f"info-{stem}.tar.zst", "w", force_zip64=(info_size > CONDA_ZIP64_LIMIT), ) as info_file_zip, data_compress.stream_writer( info_file_zip, size=info_size, closefd=False, ) as info_stream, ): shutil.copyfileobj(info_file._file, info_stream) conda-conda-package-streaming-dce4b94/conda_package_streaming/exceptions.py000066400000000000000000000012451503052622600273060ustar00rootroot00000000000000import tarfile class SafetyError(tarfile.TarError): def __init__(self, msg, *args, **kw): msg = f"Error with archive. {msg}" super().__init__(msg) class CaseInsensitiveFileSystemError(OSError): def __init__(self): message = """\ Cannot extract package to a case-insensitive file system. Your install destination does not differentiate between upper and lowercase characters, and this breaks things. Try installing to a location that is case-sensitive. Windows drives are usually the culprit here - can you install to a native Unix drive, or turn on case sensitivity for this (Windows) location? """ super().__init__(message) conda-conda-package-streaming-dce4b94/conda_package_streaming/extract.py000066400000000000000000000055671503052622600266120ustar00rootroot00000000000000""" Extract package to directory, with checks against tar members extracting outside the target directory. """ from __future__ import annotations import os import tarfile from collections.abc import Generator from errno import ELOOP from pathlib import Path from . import exceptions, package_streaming __all__ = ["extract_stream", "extract"] HAS_TAR_FILTER = hasattr(tarfile, "tar_filter") def extract_stream( stream: Generator[tuple[tarfile.TarFile, tarfile.TarInfo]], dest_dir: Path | str, tar_filter: str | None = None, ): """ Pipe ``stream_conda_component`` output here to extract every member into dest_dir. For ``.conda`` will need to be called twice (for info and pkg components); for ``.tar.bz2`` every member is extracted. """ dest_dir = os.path.realpath(dest_dir) def is_within_dest_dir(name): abs_target = os.path.realpath(os.path.join(dest_dir, name)) prefix = os.path.commonpath((dest_dir, abs_target)) return prefix == dest_dir for tar_file, _ in stream: # careful not to seek backwards def checked_members(): # from conda_package_handling for member in tar_file: if not is_within_dest_dir(member.name): raise exceptions.SafetyError(f"contains unsafe path: {member.name}") yield member try: # Drop checked_members() when HAS_TAR_FILTER once we are 100% # certain the stdlib filter maintains same permissions as # checked_members(). tar_args = {"path": dest_dir, "members": checked_members()} if HAS_TAR_FILTER: tar_args["filter"] = tar_filter or "fully_trusted" tar_file.extractall(**tar_args) except OSError as e: if e.errno == ELOOP: raise exceptions.CaseInsensitiveFileSystemError() from e raise # next iteraton of for loop raises GeneratorExit in stream stream.close() def extract(filename, dest_dir=None, fileobj=None): """ Extract all components of conda package to dest_dir. fileobj: must be seekable if provided, if a ``.conda`` package. """ assert dest_dir, "dest_dir is required" if str(filename).endswith(".conda"): components = [ package_streaming.CondaComponent.pkg, package_streaming.CondaComponent.info, ] else: # .tar.bz2 doesn't filter by component components = [package_streaming.CondaComponent.pkg] closefd = False if not fileobj: fileobj = open(filename, "rb") closefd = True try: for component in components: stream = package_streaming.stream_conda_component( filename, fileobj, component=component ) extract_stream(stream, dest_dir) finally: if closefd: fileobj.close() conda-conda-package-streaming-dce4b94/conda_package_streaming/lazy_wheel.py000066400000000000000000000276141503052622600273000ustar00rootroot00000000000000"""Lazy ZIP over HTTP""" from __future__ import annotations import logging import zipfile from bisect import bisect_left, bisect_right from collections.abc import Iterator from contextlib import contextmanager from tempfile import NamedTemporaryFile from typing import Any from zipfile import BadZipfile, ZipFile from requests import HTTPError, Session from requests.models import CONTENT_CHUNK_SIZE, Response # from pip 22.0.3 with fixes & remove imports from pip log = logging.getLogger(__name__) # If-Match (etag) to detect file changed during fetch would also be nice HEADERS = {"Accept-Encoding": "identity"} class HTTPRangeRequestUnsupported(Exception): pass class LazyZipOverHTTP: """File-like object mapped to a ZIP file over HTTP. This uses HTTP range requests to lazily fetch the file's content, which is supposed to be fed to ZipFile. If such requests are not supported by the server, raise HTTPRangeRequestUnsupported during initialization. """ def __init__( self, url: str, session: Session, chunk_size: int = CONTENT_CHUNK_SIZE, fall_back_to_full_download: bool = False, ) -> None: """ Initialize a LazyZipOverHTTP object. :param session: The session to use for web requests. :param chunk_size: The chunk size to use for downloading. :param fall_back_to_full_download: If true, we fall back to downloading the whole file if the server incorrectly responds with 416 (Range Not Satisfiable) to an HTTP range request. """ # if CONTENT_CHUNK_SIZE is bigger than the file: # In [8]: response.headers["Content-Range"] # Out[8]: 'bytes 0-3133374/3133375' self._request_count = 0 self._session, self._url, self._chunk_size = session, url, chunk_size self._fall_back_to_full_download: bool = fall_back_to_full_download self._has_streaming_support: bool = True # If the server returns 416 (Range Not Satisfiable) and the fallback is # enabled, we request the whole file and set this to False. Some package # servers incorrectly respond with 416 (Range Not Satisfiable) if the # file is smaller than the range requested. # initial range request for the end of the file # if the server does not support range requests, this sets # _has_streaming_support to False tail = self._stream_response(start="", end=CONTENT_CHUNK_SIZE) # e.g. {'accept-ranges': 'bytes', 'content-length': '10240', # 'content-range': 'bytes 12824-23063/23064', 'last-modified': 'Sat, 16 # Apr 2022 13:03:02 GMT', 'date': 'Thu, 21 Apr 2022 11:34:04 GMT'} if self._has_streaming_support and tail.status_code != 206: raise HTTPRangeRequestUnsupported("range request is not supported") if self._has_streaming_support: # lowercase content-range to support s3 self._length = int(tail.headers["content-range"].partition("/")[-1]) else: # the file is already downloaded self._length = len(tail.content) self._file = NamedTemporaryFile() self.truncate(self._length) # length is also in Content-Length and Content-Range header with self._stay(): if self._has_streaming_support: content_length = int(tail.headers["content-length"]) if hasattr(tail, "content"): assert content_length == len(tail.content) else: content_length = len(tail.content) self.seek(self._length - content_length) for chunk in tail.iter_content(self._chunk_size): self._file.write(chunk) self._left: list[int] = [self._length - content_length] self._right: list[int] = [self._length - 1] @property def mode(self) -> str: """Opening mode, which is always rb.""" return "rb" @property def name(self) -> str: """Path to the underlying file.""" return self._file.name def seekable(self) -> bool: """Return whether random access is supported, which is True.""" return True def close(self) -> None: """Close the file.""" self._file.close() @property def closed(self) -> bool: """Whether the file is closed.""" return self._file.closed def read(self, size: int = -1) -> bytes: """Read up to size bytes from the object and return them. As a convenience, if size is unspecified or -1, all bytes until EOF are returned. Fewer than size bytes may be returned if EOF is reached. """ # BUG does not download correctly if size is unspecified download_size = size start, length = self.tell(), self._length stop = length if size < 0 else min(start + download_size, length) start = max(0, stop - download_size) self._download(start, stop - 1) return self._file.read(size) def readable(self) -> bool: """Return whether the file is readable, which is True.""" return True def seek(self, offset: int, whence: int = 0) -> int: """Change stream position and return the new absolute position. Seek to offset relative position indicated by whence: * 0: Start of stream (the default). pos should be >= 0; * 1: Current position - pos may be negative; * 2: End of stream - pos usually negative. """ return self._file.seek(offset, whence) def tell(self) -> int: """Return the current position.""" return self._file.tell() def truncate(self, size: int | None = None) -> int: """Resize the stream to the given size in bytes. If size is unspecified resize to the current position. The current stream position isn't changed. Return the new file size. """ return self._file.truncate(size) def writable(self) -> bool: """Return False.""" return False def __enter__(self) -> LazyZipOverHTTP: self._file.__enter__() return self def __exit__(self, *exc: Any) -> bool | None: return self._file.__exit__(*exc) @contextmanager def _stay(self) -> Iterator[None]: """Return a context manager keeping the position. At the end of the block, seek back to original position. """ pos = self.tell() try: yield finally: self.seek(pos) def _check_zip(self) -> None: """Check and download until the file is a valid ZIP.""" end = self._length - 1 for start in reversed(range(0, end, self._chunk_size)): self._download(start, end) with self._stay(): try: # For read-only ZIP files, ZipFile only needs # methods read, seek, seekable and tell. ZipFile(self) # type: ignore except BadZipfile: pass else: break def _stream_response( self, start: int | str, end: int, base_headers: dict[str, str] = HEADERS ) -> Response: """ Return HTTP response to a range request from start to end. If the does not support range requests, the whole file is requested. :param start: if "", request `end` bytes from end of file. """ headers = base_headers.copy() headers["Range"] = f"bytes={start}-{end}" log.debug("%s", headers["Range"]) # TODO: Get range requests to be correctly cached headers["Cache-Control"] = "no-cache" self._request_count += 1 if self._has_streaming_support: response = self._session.get(self._url, headers=headers, stream=True) if response.status_code == 416 and self._fall_back_to_full_download: # Range Not Satisfiable -> enable fallback self._has_streaming_support = False elif response.status_code == 416: # fallback disabled, print helpful error message raise HTTPError( "The server returned 416 (Range Not Satisfiable). " "This can occur on a buggy server if the file is smaller " "than the range requested. Set the fall_back_to_full_download flag " "to work around this issue.", response=response, ) else: response.raise_for_status() return response # no streaming support, try to get the whole file del headers["Range"] response = self._session.get(self._url, headers=headers, stream=True) response.raise_for_status() return response def _merge( self, start: int, end: int, left: int, right: int ) -> Iterator[tuple[int, int]]: """Return an iterator of intervals to be fetched. Args: start (int): Start of needed interval end (int): End of needed interval left (int): Index of first overlapping downloaded data right (int): Index after last overlapping downloaded data """ lslice, rslice = self._left[left:right], self._right[left:right] i = start = min([start] + lslice[:1]) end = max([end] + rslice[-1:]) for j, k in zip(lslice, rslice): if j > i: yield i, j - 1 i = k + 1 if i <= end: yield i, end self._left[left:right], self._right[left:right] = [start], [end] def _download(self, start: int, end: int) -> None: """ Download bytes from start to end inclusively. If the server does not support streaming for this file, this does nothing as the entire file is already downloaded. """ # these assertions should hold, but read() violates them # assert 0 <= start <= end # assert end < self._length with self._stay(): left = bisect_left(self._right, start) right = bisect_right(self._left, end) for start, end in self._merge(start, end, left, right): response = self._stream_response(start, end) self.seek(start) for chunk in response.iter_content(self._chunk_size): self._file.write(chunk) class LazyConda(LazyZipOverHTTP): def prefetch(self, conda_file_id): """ Conda fork specific. Prefetch the `.info` range from the remote archive. Reduces number of Range requests to 2 or 3 (1 or 2 for the directory, 1 for the file). conda_file_id: name of .conda without path or `.conda` extension """ target_file = f"info-{conda_file_id}.tar.zst" with self._stay(): # not strictly necessary # try to read entire conda info in one request zf = zipfile.ZipFile(self) infolist = zf.infolist() for i, info in enumerate(infolist): if info.filename == target_file: # could be incorrect if zipfile was concatenated to another # file (not likely for .conda) start = info.header_offset try: end = infolist[i + 1].header_offset # or info.header_offset # + len(info.filename) # + len(info.extra) # + info.compress_size # (unless Zip64) except IndexError: end = zf.start_dir self.seek(start) self.read(end - start) log.debug( "prefetch %s-%s", info.header_offset, end, ) break else: log.debug("no zip prefetch") conda-conda-package-streaming-dce4b94/conda_package_streaming/package_streaming.py000066400000000000000000000117631503052622600305770ustar00rootroot00000000000000""" Unpack conda packages without using a temporary file. """ from __future__ import annotations import bz2 import os import os.path import tarfile import zipfile from collections.abc import Generator from enum import Enum # acquire umask taking advantage of import system lock, instead of possibly in # multiple threads at once. UMASK = os.umask(0) os.umask(UMASK) try: import zstandard except ImportError: import warnings warnings.warn("zstandard could not be imported. Running without .conda support.") zstandard = None class CondaComponent(Enum): pkg = "pkg" info = "info" def __str__(self): return self.value class TarfileNoSameOwner(tarfile.TarFile): def __init__(self, *args, umask: int | None = None, **kwargs): """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing file or 'w' to create a new file overwriting an existing one. `mode' defaults to 'r'. If `fileobj' is given, it is used for reading or writing data. If it can be determined, `mode' is overridden by `fileobj's mode. `fileobj' is not closed, when TarFile is closed. """ super().__init__(*args, **kwargs) self.umask = umask if umask is not None else UMASK def chown(self, tarinfo, targetpath, numeric_owner): """ Override chown to be a no-op, since we don't want to preserve ownership here. (tarfile.TarFile only lets us toggle all of (chown, chmod, mtime)) """ return def chmod(self, tarinfo, targetpath): """ Set file permissions of targetpath according to tarinfo, respecting umask. """ if tarinfo.mode is None: return try: os.chmod(targetpath, tarinfo.mode & (-1 & (~self.umask))) except OSError as e: raise tarfile.ExtractError("could not change mode") from e def tar_generator( fileobj, tarfile_open=TarfileNoSameOwner.open, closefd=False, *, encoding="utf-8" ) -> Generator[tuple[tarfile.TarFile, tarfile.TarInfo]]: """ Yield (tar, member) from fileobj. Args: fileobj: file-like object encoding: "utf-8" passed to TarFile.open(); can be changed for testing. """ # tarfile will not close fileobj because _extfileobj is True # caller should take care to close files all the way back to the http request... try: with tarfile_open(fileobj=fileobj, mode="r|", encoding=encoding) as tar: for member in tar: yield tar, member finally: if closefd: fileobj.close() def stream_conda_info( filename, fileobj=None ) -> Generator[tuple[tarfile.TarFile, tarfile.TarInfo]]: """ Yield members from conda's embedded info/ tarball. For .tar.bz2 packages, yield all members. Yields (tar, member) tuples. You must only use the current member to prevent tar seeks and scans. To extract to disk, it's possible to call ``tar.extractall(path)`` on the first result and then ignore the rest of this generator. ``extractall`` takes care of some directory permissions/mtime issues, compared to ``extract`` or writing out the file objects yourself. """ component = "info" return stream_conda_component(filename, fileobj, component) def stream_conda_component( filename, fileobj=None, component: CondaComponent | str = CondaComponent.pkg, *, encoding="utf-8", ) -> Generator[tuple[tarfile.TarFile, tarfile.TarInfo]]: """ Yield members from .conda's embedded {component}- tarball. "info" or "pkg". For .tar.bz2 packages, yield all members. Yields (tar, member) tuples. You must only use the current member to prevent tar seeks and scans. To extract to disk, it's possible to call ``tar.extractall(path)`` on the first result and then ignore the rest of this generator. ``extractall`` takes care of some directory permissions/mtime issues, compared to ``extract`` or writing out the file objects yourself. """ if str(filename).endswith(".conda"): if zstandard is None: raise RuntimeError("Cannot unpack `.conda` without zstandard") zf = zipfile.ZipFile(fileobj or filename) stem, _, _ = os.path.basename(filename).rpartition(".") component_name = f"{component}-{stem}" component_filename = [ info for info in zf.infolist() if info.filename.startswith(component_name) ] if not component_filename: raise LookupError(f"didn't find {component_name} component in {filename}") assert len(component_filename) == 1 reader = zstandard.ZstdDecompressor().stream_reader( zf.open(component_filename[0]) ) elif str(filename).endswith(".tar.bz2"): reader = bz2.open(fileobj or filename, mode="rb") else: raise ValueError("unsupported file extension") return tar_generator(reader, closefd=fileobj is None, encoding=encoding) conda-conda-package-streaming-dce4b94/conda_package_streaming/s3.py000066400000000000000000000043561503052622600254600ustar00rootroot00000000000000""" Adapt s3 to package_streaming """ from __future__ import annotations import typing from contextlib import closing from typing import Any from . import package_streaming if typing.TYPE_CHECKING: # pragma: no cover from mypy_boto3_s3 import Client from mypy_boto3_s3.type_defs import GetObjectOutputTypeDef else: Client = GetObjectOutputTypeDef = None from .url import conda_reader_for_url __all__ = ["stream_conda_info", "conda_reader_for_s3"] class ResponseFacade: def __init__(self, response: GetObjectOutputTypeDef): self.response = response self.raw: Any = response["Body"] def raise_for_status(self): # s3 get_object raises automatically? pass @property def status_code(self): return self.response["ResponseMetadata"]["HTTPStatusCode"] @property def headers(self): # a case-sensitive dict; keys may be lowercased always? return self.response["ResponseMetadata"]["HTTPHeaders"] def iter_content(self, n: int): return iter(lambda: self.raw.read(n), b"") class SessionFacade: """ Make s3 client look just enough like a requests.session for LazyZipOverHTTP """ def __init__(self, client: Client, bucket: str, key: str): self.client = client self.bucket = bucket self.key = key def get(self, url, *, headers: dict | None = None, stream=True): if headers and "Range" in headers: response = self.client.get_object( Bucket=self.bucket, Key=self.key, Range=headers["Range"] ) else: response = self.client.get_object(Bucket=self.bucket, Key=self.key) return ResponseFacade(response) def stream_conda_info(client, bucket, key): """ Yield (tar, member) for conda package. Just "info/" for .conda, all members for tar. """ filename, conda = conda_reader_for_s3(client, bucket, key) with closing(conda): yield from package_streaming.stream_conda_info(filename, conda) def conda_reader_for_s3(client: Client, bucket: str, key: str): """ Return (name, file_like) suitable for package_streaming APIs """ session: Any = SessionFacade(client, bucket, key) return conda_reader_for_url(key, session) conda-conda-package-streaming-dce4b94/conda_package_streaming/transmute.py000066400000000000000000000126441503052622600271540ustar00rootroot00000000000000""" Convert .tar.bz2 to .conda Uses ``tempfile.SpooledTemporaryFile`` to buffer ``pkg-*.tar`` and ``info-*.tar``, then compress directly into an open `ZipFile` at the end. `SpooledTemporaryFile` buffers the first 10MB of the package and its metadata in memory, but writes out to disk for larger packages. """ from __future__ import annotations import os import tarfile from collections.abc import Iterator from pathlib import Path from typing import Callable import zstandard from .create import ZSTD_COMPRESS_LEVEL, ZSTD_COMPRESS_THREADS, conda_builder # streams everything in .tar.bz2 mode from .package_streaming import CondaComponent, stream_conda_component def transmute( package, path, *, compressor: Callable[ [], zstandard.ZstdCompressor ] = lambda: zstandard.ZstdCompressor( level=ZSTD_COMPRESS_LEVEL, threads=ZSTD_COMPRESS_THREADS ), is_info: Callable[[str], bool] = lambda filename: filename.startswith("info/"), ) -> Path: """ Convert .tar.bz2 conda package to .conda-format under path. :param package: path to .tar.bz2 conda package :param path: destination path for transmuted .conda package :param compressor: A function that creates instances of ``zstandard.ZstdCompressor()`` to override defaults. :param is_info: A function that returns True if a file belongs in the ``info`` component of a `.conda` package. ``conda-package-handling`` (not this package ``conda-package-streaming``) uses a set of regular expressions to keep expected items in the info- component, while other items starting with ``info/`` wind up in the pkg- component. :return: Path to transmuted package. """ assert package.endswith(".tar.bz2"), "can only convert .tar.bz2 to .conda" assert os.path.isdir(path) stem = os.path.basename(package)[: -len(".tar.bz2")] package_stream = stream_conda_component(package) return transmute_stream( stem, path, compressor=compressor, is_info=is_info, package_stream=package_stream, ) def transmute_stream( stem, path, *, compressor: Callable[ [], zstandard.ZstdCompressor ] = lambda: zstandard.ZstdCompressor( level=ZSTD_COMPRESS_LEVEL, threads=ZSTD_COMPRESS_THREADS ), is_info: Callable[[str], bool] = lambda filename: filename.startswith("info/"), package_stream: Iterator[tuple[tarfile.TarFile, tarfile.TarInfo]], ): """ Convert (TarFile, TarInfo) iterator like those produced by ``stream_conda_component`` to .conda-format under path. Allows for more creative data sources. e.g. recompress ``.conda``: .. code-block:: python transmute_stream(..., package_stream=itertools.chain( stream_conda_component("package.conda", component=CondaComponent.pkg), stream_conda_component("package.conda", component=CondaComponent.info), )) This example could move files between the ``pkg-`` and ``info-`` components depending on the ``is_info`` function. :param stem: output filename without extension :param path: destination path for transmuted .conda package :param compressor: A function that creates instances of ``zstandard.ZstdCompressor()`` to override defaults. :param is_info: A function that returns True if a file belongs in the ``info`` component of a `.conda` package. ``conda-package-handling`` (not this package ``conda-package-streaming``) uses a set of regular expressions to keep expected items in the info- component, while other items starting with ``info/`` wind up in the pkg- component. :param package_stream: Iterator of (Tarfile, TarInfo) tuples. :return: Path to transmuted package. """ output_path = Path(path, f"{stem}.conda") with conda_builder(stem, path, compressor=compressor, is_info=is_info) as conda_tar: for tar, member in package_stream: if member.isfile(): conda_tar.addfile(member, tar.extractfile(member)) else: conda_tar.addfile(member) return output_path def transmute_tar_bz2( package: str, path, ) -> Path: """ Convert .conda package to .tar.bz2 format under path. Can recompress .tar.bz2 packages. Args: package: path to .conda or .tar.bz2 package. path: destination path for transmuted package. Returns: Path to transmuted package. """ assert package.endswith((".tar.bz2", ".conda")), "Unknown extension" assert os.path.isdir(path) incoming_format = ".conda" if package.endswith(".conda") else ".tar.bz2" file_id = os.path.basename(package)[: -len(incoming_format)] if incoming_format == ".conda": # .tar.bz2 MUST place info/ first. components = [CondaComponent.info, CondaComponent.pkg] else: # .tar.bz2 doesn't filter by component components = [CondaComponent.pkg] output_path = Path(path, f"{file_id}.tar.bz2") with open(package, "rb") as fileobj, tarfile.open(output_path, "x:bz2") as pkg_tar: for component in components: stream = iter(stream_conda_component(package, fileobj, component=component)) for tar, member in stream: if member.isfile(): pkg_tar.addfile(member, tar.extractfile(member)) else: pkg_tar.addfile(member) return output_path conda-conda-package-streaming-dce4b94/conda_package_streaming/url.py000066400000000000000000000062231503052622600257300ustar00rootroot00000000000000""" Fetch metadata from remote .conda or .tar.bz2 package. Try to fetch less than the whole file if possible. This module should only be used to make *partial* reads against a remote package, typically just the ``info`` portion. If a full ``.conda`` format package is needed, it is more efficient to download locally first and then use the file-based API. """ import logging import sys import urllib.parse from pathlib import Path import requests from . import package_streaming # Excellent HTTP Range request file-like object from .lazy_wheel import LazyConda log = logging.getLogger(__name__) session = requests.Session() session.headers["User-Agent"] = "conda-package-streaming/0.1.0" METADATA_CHECKLIST = frozenset({"info/index.json", "info/recipe/meta.yaml"}) def extract_conda_info(url, destdir, checklist=METADATA_CHECKLIST, session=session): """ Extract info/index.json and info/recipe/meta.yaml from url to destdir; close url as soon as those files are found. """ checklist = set(checklist) stream = stream_conda_info(url, session=session) for tar, member in stream: if member.name in checklist: tar.extract(member, destdir) checklist.remove(member.name) if not checklist: stream.close() # next iteraton of for loop raises GeneratorExit in stream def stream_conda_info(url, session=session): """ Yield (tar, member) for conda package at url Just "info/" for .conda, all members for tar. """ filename, conda = conda_reader_for_url(url, session=session) try: yield from package_streaming.stream_conda_info(filename, conda) finally: if hasattr(conda, "release_conn"): # For .tar.bz2. Take extra care to drop connections after we are # done reading a partial response. conda.release_conn() conda.close() def conda_reader_for_url( url, session: requests.Session = session, fall_back_to_full_download: bool = False ): """ Return (name, file_like) suitable for package_streaming APIs :param url: URL to conda package :param session: The session to use for web requests. :param fall_back_to_full_download: If true, fall back to downloading the entire package instead of streaming it in the case of a specific issue where the server incorrectly responds with 416 (Range Not Satisfiable). See LazyZipOverHTTP for more details. """ parsed_url = urllib.parse.urlparse(url) *_, filename = parsed_url.path.rsplit("/", 1) if filename.endswith(".conda"): file_id = filename[: -len(".conda")] conda = LazyConda( url, session, fall_back_to_full_download=fall_back_to_full_download ) conda.prefetch(file_id) elif filename.endswith(".tar.bz2"): response = session.get(url, stream=True, headers={"Connection": "close"}) conda = response.raw else: raise ValueError("Unsupported extension %s", url) return filename, conda if __name__ == "__main__": # pragma: no cover import logging logging.basicConfig(level=logging.DEBUG) extract_conda_info(sys.argv[1], Path(sys.argv[2]).absolute()) conda-conda-package-streaming-dce4b94/docs/000077500000000000000000000000001503052622600206715ustar00rootroot00000000000000conda-conda-package-streaming-dce4b94/docs/changelog.md000066400000000000000000000000551503052622600231420ustar00rootroot00000000000000# Changelog ```{include} ../CHANGELOG.md ``` conda-conda-package-streaming-dce4b94/docs/conf.py000066400000000000000000000037271503052622600222010ustar00rootroot00000000000000# Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath("..")) # -- Project information ----------------------------------------------------- project = "conda-package-streaming" copyright = "2022, Anaconda, Inc." author = "Anaconda, Inc." # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "myst_parser", "sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.napoleon", ] intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "furo" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] conda-conda-package-streaming-dce4b94/docs/create.md000066400000000000000000000002241503052622600224540ustar00rootroot00000000000000create module ============= ```{eval-rst} .. automodule:: conda_package_streaming.create :members: :undoc-members: :show-inheritance: ``` conda-conda-package-streaming-dce4b94/docs/extract.rst000066400000000000000000000002601503052622600230730ustar00rootroot00000000000000extract module ============== Extract conda packages to the filesystem. .. automodule:: conda_package_streaming.extract :members: :undoc-members: :show-inheritance: conda-conda-package-streaming-dce4b94/docs/index.md000066400000000000000000000025421503052622600223250ustar00rootroot00000000000000% conda-package-streaming documentation master file, created by % sphinx-quickstart on Fri Jun 17 14:43:38 2022. % You can adapt this file completely to your liking, but it should at least % contain the root `toctree` directive. # Welcome to conda-package-streaming's documentation! `conda-package-streaming` strives to be the most efficient way to read from new and old format `.conda` and `.tar.bz2` [conda packages](https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/packages.html). `conda-package-streaming` can read from conda packages without ever writing to disk, unlike [conda-package-handling](https://github.com/conda/conda-package-handling) `< 2.0.0`'s temporary directories. [conda-package-handling](https://github.com/conda/conda-package-handling) `>= 2.0.0` uses `conda-package-streaming`. This library can also read a package from a URL or a stream without transferring the entire archive. `conda-package-streaming` uses the standard library [`zipfile`](https://docs.python.org/3/library/zipfile.html) and [`tarfile`](https://docs.python.org/3/library/tarfile.html), and [`zstandard`](https://github.com/indygreg/python-zstandard) to handle zstd-compressed streams. ```{include} ../README.md ``` ```{toctree} :caption: 'Contents:' :maxdepth: 2 modules changelog ``` # Indices and tables - {ref}`genindex` - {ref}`modindex` - {ref}`search` conda-conda-package-streaming-dce4b94/docs/lazy_wheel.md000066400000000000000000000014361503052622600233620ustar00rootroot00000000000000# lazy_wheel module `lazy_wheel` is derived from pip's wheel download code. It is really a seekable file-like based on HTTP range requests, backed by a sparse temporary file. Each `read()` issues one or more HTTP range requests to the URL depending on how much of the file has already been downloaded, while read()\`s from already-fetched portions of the file are fulfilled by the backing file. ZIP archives have a directory at the end of the file giving the offset to each compressed member. We fetch the directory, and then the portion of the file containing the member or members of interest, for a maximum of 3 requests to retrieve any individual file in the archive. ```{eval-rst} .. automodule:: conda_package_streaming.lazy_wheel :members: :undoc-members: :show-inheritance: ``` conda-conda-package-streaming-dce4b94/docs/modules.md000066400000000000000000000052531503052622600226700ustar00rootroot00000000000000# conda_package_streaming Fetch metadata from remote .conda or .tar.bz2 package. Try to fetch less than the whole file if possible. Zip (.conda) is made for this: ``` $ python -m conda_package_streaming.url https://repo.anaconda.com/pkgs/main/osx-64/sqlalchemy-1.4.32-py310hca72f7f_0.conda /tmp/ DEBUG:conda_package_streaming.lazy_wheel:bytes=-10240 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): repo.anaconda.com:443 DEBUG:urllib3.connectionpool:https://repo.anaconda.com:443 "GET /pkgs/main/osx-64/sqlalchemy-1.4.32-py310hca72f7f_0.conda HTTP/1.1" 206 10240 DEBUG:conda_package_streaming.lazy_wheel:bytes=43-38176 DEBUG:urllib3.connectionpool:https://repo.anaconda.com:443 "GET /pkgs/main/osx-64/sqlalchemy-1.4.32-py310hca72f7f_0.conda HTTP/1.1" 206 38134 DEBUG:conda_package_streaming.lazy_wheel:prefetch 43-38177 $ curl -s -I https://repo.anaconda.com/pkgs/main/osx-64/sqlalchemy-1.4.32-py310hca72f7f_0.conda | grep content-length content-length: 1984926 ``` We fetch 10240 + 38134 = 48374 bytes in two requests of this 1984926-byte package. ## Older format bzip2 has a very large block size, and we don't know if the info/ directory is finished before reading the entire archive. However if we only want certain files from info/ we can stop after we've seen them all. Fetching repodata and calling response.raw.tell() after each tar member: ``` $ python -m metayaml.fetch_metadata \ https://repo.anaconda.com/pkgs/main/linux-64/absl-py-0.1.10-py27_0.tar.bz2 128948 info/hash_input.json 128948 info/index.json 128948 info/files 128948 info/about.json 128948 info/paths.json 128948 info/LICENSE.txt 128948 info/git 128948 lib/python2.7/site-packages/absl_py-0.1.10-py2.7.egg-info/dependency_links.txt 128948 lib/python2.7/site-packages/absl_py-0.1.10-py2.7.egg-info/requires.txt 128948 lib/python2.7/site-packages/absl_py-0.1.10-py2.7.egg-info/top_level.txt 128948 lib/python2.7/site-packages/absl/__init__.pyc 128948 lib/python2.7/site-packages/absl/testing/__init__.pyc 128948 info/test/run_test.py ... ``` A larger package: ``` # Fetch https://repo.anaconda.com/pkgs/main/linux-64/airflow-1.10.10-py36_0.tar.bz2 # Printing bytes transferred after each archive member, 286720 info/hash_input.json 286720 info/has_prefix 286720 info/index.json 286720 info/about.json 286720 info/git 286720 info/files 286720 info/paths.json 286720 lib/python3.6/site-packages/airflow/alembic.ini 286720 lib/python3.6/site-packages/airflow/www/templates/airflow/variables/README.md ... 286720 info/test/test_time_dependencies.json ... 634880 lib/python3.6/site-packages/airflow/www/static/ace.js 634880 bin/airflow ``` ```{toctree} :maxdepth: 4 url s3 lazy_wheel package_streaming extract transmute create ``` conda-conda-package-streaming-dce4b94/docs/package_streaming.rst000066400000000000000000000002451503052622600250700ustar00rootroot00000000000000package\_streaming module ========================= .. automodule:: conda_package_streaming.package_streaming :members: :undoc-members: :show-inheritance: conda-conda-package-streaming-dce4b94/docs/requirements.txt000066400000000000000000000000571503052622600241570ustar00rootroot00000000000000furo mdit-py-plugins>=0.3.0 myst-parser sphinx conda-conda-package-streaming-dce4b94/docs/s3.md000066400000000000000000000005261503052622600215430ustar00rootroot00000000000000s3 module ====================== conda_package_streaming.s3 adapts a s3 client, bucket name, and key to `LazyConda`, or, for `.tar.bz2`, a normal streaming `GET` request that can be closed before transferring the whole file. ```{eval-rst} .. automodule:: conda_package_streaming.s3 :members: :undoc-members: :show-inheritance: ``` conda-conda-package-streaming-dce4b94/docs/transmute.rst000066400000000000000000000002131503052622600234410ustar00rootroot00000000000000transmute module ================ .. automodule:: conda_package_streaming.transmute :members: :undoc-members: :show-inheritance: conda-conda-package-streaming-dce4b94/docs/url.rst000066400000000000000000000002051503052622600222220ustar00rootroot00000000000000url module ====================== .. automodule:: conda_package_streaming.url :members: :undoc-members: :show-inheritance: conda-conda-package-streaming-dce4b94/make.bat000066400000000000000000000014441503052622600213510ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=source set BUILDDIR=build %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd conda-conda-package-streaming-dce4b94/noxfile.py000066400000000000000000000003411503052622600217550ustar00rootroot00000000000000import nox @nox.session(venv_backend="conda") @nox.parametrize( "python", [(python) for python in ("3.7", "3.8", "3.9", "3.10")], ) def tests(session): session.install("-e", ".[test]") session.run("pytest") conda-conda-package-streaming-dce4b94/pyproject.toml000066400000000000000000000030551503052622600226600ustar00rootroot00000000000000[build-system] requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" [project] name = "conda_package_streaming" authors = [ { name = "Anaconda, Inc. & Contributors", email = "conda@continuum.io" }, ] description = "An efficient library to read from new and old format .conda and .tar.bz2 conda packages." license = { file = "LICENSE" } readme = "README.md" classifiers = [ "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 3", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] dynamic = ["version"] requires-python = ">=3.9" dependencies = ["requests", "zstandard >=0.15"] [project.optional-dependencies] test = [ "pytest >=7", "pytest-cov", "pytest-mock", "boto3", "boto3-stubs[essential]", "bottle", "conda", "conda-package-handling >=2", "responses" ] docs = ["furo", "sphinx", "myst-parser", "mdit-py-plugins>=0.3.0"] [project.urls] Home = "https://github.com/conda/conda-package-streaming" Documentation = "https://conda.github.io/conda-package-streaming/" # pyproject.toml [tool.pytest.ini_options] minversion = "7.0" addopts = "--cov=conda_package_streaming" testpaths = ["tests"] [tool.coverage.report] exclude_lines = ["pragma: no cover", "if TYPE_CHECKING:"] [tool.coverage.run] source = ["conda_package_streaming/", "tests/"] [tool.ruff.lint] select = [ "F", # pyflakes "E", # pycodestyle (errors) "W", # pycodestyle (warnings) "I", # isort "UP" # pyupgrade ] [tool.ruff.format] line-ending = "auto" conda-conda-package-streaming-dce4b94/tests/000077500000000000000000000000001503052622600211035ustar00rootroot00000000000000conda-conda-package-streaming-dce4b94/tests/conftest.py000066400000000000000000000052101503052622600233000ustar00rootroot00000000000000import json import logging import os.path import shutil import subprocess from pathlib import Path import pytest import server from conda_package_streaming.transmute import transmute_tar_bz2 log = logging.getLogger(__name__) LIMIT_TEST_PACKAGES = 16 def find_packages_dirs() -> Path: """ Ask conda for package directories. """ conda_info = json.loads( subprocess.run( [os.environ["CONDA_EXE"], "info", "--json"], stdout=subprocess.PIPE, check=True, ).stdout ) # XXX can run individual environment's conda (base conda is more likely to # have useful cached packages) pkgs_dirs = conda_info["pkgs_dirs"] + [os.path.expanduser("~/miniconda3/pkgs")] log.debug("search %s", pkgs_dirs) first_pkg_dir = next(path for path in pkgs_dirs if os.path.exists(path)) return Path(first_pkg_dir) @pytest.fixture(scope="session") def pkgs_dir(tmp_path_factory): """ Dedicated test package directory. """ return tmp_path_factory.mktemp("pkgs") @pytest.fixture(scope="session") def package_server(pkgs_dir, conda_paths): thread = server.get_server_thread(pkgs_dir) thread.start() return thread @pytest.fixture(scope="session") def conda_paths(pkgs_dir: Path): found_packages = find_packages_dirs() conda_paths = [] for path in found_packages.iterdir(): if path.name.endswith((".tar.bz2", ".conda")): conda_paths.append(path) return add_tar_bz2s(conda_paths, pkgs_dir) def add_tar_bz2s(paths: list[Path], pkgs_dir: Path): """ If there aren't enough .tar.bz2's available, create some from available .conda's. Return paths. """ conda_paths: list[Path] = [] tarbz2_paths: list[Path] = [] output_paths: list[Path] = [] assert isinstance(pkgs_dir, Path) for path in paths: if path.name.endswith(".tar.bz2"): tarbz2_paths.append(path) elif path.name.endswith(".conda"): conda_paths.append(path) tarbz2_path: Path = pkgs_dir medium_conda_paths = [] for path in conda_paths: if 1 << 20 < path.stat().st_size < 1 << 22: medium_conda_paths.append(path) medium_conda_paths = medium_conda_paths[:LIMIT_TEST_PACKAGES] # this ignores existing .tar.bz2 for simplicity (.tar.bz2 is missing in CI) for conda in set(medium_conda_paths + conda_paths[:10]): shutil.copy(conda, tarbz2_path) transmute_tar_bz2(str(conda), tarbz2_path) output_paths.extend(tarbz2_path.glob("*.tar.bz2")) output_paths.extend(tarbz2_path.glob("*.conda")) return sorted(output_paths) # sort interleaves .tar.bz2 and .conda conda-conda-package-streaming-dce4b94/tests/requirements.txt000066400000000000000000000001461503052622600243700ustar00rootroot00000000000000boto3 boto3-stubs[essential] bottle conda pytest >=7 pytest-cov pytest-mock requests zstandard >=0.15 conda-conda-package-streaming-dce4b94/tests/server.py000066400000000000000000000035111503052622600227630ustar00rootroot00000000000000""" Test web server. """ import logging import threading import wsgiref.simple_server from pathlib import Path from typing import Any import bottle import conftest log = logging.getLogger(__name__) def get_app(pkgs_dir): """ Bottle conveniently supports Range requests. Server may block if browser etc. keeps connection open. """ app = bottle.Bottle() app.pkgs_dir = pkgs_dir def serve_file(filename): mimetype = "auto" # from https://repo.anaconda.com/ behavior: if filename.endswith(".tar.bz2"): mimetype = "application/x-tar" elif filename.endswith(".conda"): mimetype = "binary/octet-stream" return bottle.static_file(filename, root=pkgs_dir, mimetype=mimetype) app.route("/pkgs/", "GET", serve_file) return app def selftest(): """ Run server in a thread that will die when the application exits. """ t = get_server_thread(conftest.find_packages_dirs()) t.start() import time time.sleep(300) class ServerThread(threading.Thread): server: wsgiref.simple_server.WSGIServer app: Any def get_server_thread(pkgs_dir: Path): """ Return test server thread with additional .server, .app properties. Call .start() to serve in the background. """ app = get_app(pkgs_dir) server = wsgiref.simple_server.make_server("127.0.0.1", 0, app) log.info(f"serving {app.pkgs_dir} on {server.server_address}/pkgs") t = ServerThread(daemon=True, target=server.serve_forever) t.app = app t.server = server # server.application == app return t if __name__ == "__main__": import logging logging.basicConfig( level=logging.INFO, format="%(asctime)s %(message)s", datefmt="%Y-%m-%dT%H:%M:%S", ) log.setLevel(logging.DEBUG) selftest() conda-conda-package-streaming-dce4b94/tests/test_degraded.py000066400000000000000000000030511503052622600242520ustar00rootroot00000000000000""" Allow conda_package_streaming to work in .tar.bz2-only mode if zstandard is not available (please immediately install zstandard if this is the case). """ import importlib import sys import tarfile import zipfile from pathlib import Path import pytest def test_degraded(tmpdir): try: sys.modules["zstandard"] = None # type: ignore import conda_package_streaming.extract import conda_package_streaming.package_streaming importlib.reload(conda_package_streaming.package_streaming) testconda = Path(tmpdir, "testconda.conda") with zipfile.ZipFile(testconda, "w"): pass testtar = Path(tmpdir, "test.tar.bz2") with tarfile.open(testtar, "w:bz2") as tar: tar.addfile(tarfile.TarInfo(name="jim")) for ( tar, _, ) in conda_package_streaming.package_streaming.stream_conda_component(testtar): pass with pytest.raises(RuntimeError): for ( tar, _, ) in conda_package_streaming.package_streaming.stream_conda_component( testconda ): pass # pragma: no cover with pytest.raises(RuntimeError): conda_package_streaming.extract.extract(testconda, tmpdir) finally: sys.modules.pop("zstandard", None) import conda_package_streaming.package_streaming importlib.reload(conda_package_streaming.package_streaming) assert conda_package_streaming.package_streaming.zstandard conda-conda-package-streaming-dce4b94/tests/test_extract.py000066400000000000000000000166041503052622600241750ustar00rootroot00000000000000import io import os import stat import tarfile from errno import ELOOP from pathlib import Path import pytest from conda_package_streaming import exceptions, extract, package_streaming HAS_TAR_FILTER = hasattr(tarfile, "tar_filter") MAX_CONDAS = 8 def test_extract_stream(conda_paths, tmp_path): for i, package in enumerate(conda_paths): print(package) with open(package, "rb") as fileobj: stream = package_streaming.stream_conda_component( package, fileobj, component=package_streaming.CondaComponent.pkg ) dest_dir = tmp_path / package.name extract.extract_stream(stream, dest_dir) if i >= MAX_CONDAS: break def test_extract_all(conda_paths, tmp_path): for i, package in enumerate(conda_paths): print(package) dest_dir = tmp_path / package.name extract.extract(package, dest_dir=dest_dir) if i >= MAX_CONDAS: break def empty_tarfile(name, mode=0o644, tar_mode="w", create_subdir=False): """ Return BytesIO containing a tarfile with one empty file named :name """ tar = io.BytesIO() t = tarfile.open(mode=tar_mode, fileobj=tar) if create_subdir: tarinfo = tarfile.TarInfo(name=name) # Add execute bit for directory tarinfo.mode = mode | 0o111 tarinfo.type = tarfile.DIRTYPE t.addfile(tarinfo, io.BytesIO()) tarinfo = tarfile.TarInfo(name=str(Path(name, name))) tarinfo.mode = mode t.addfile(tarinfo, io.BytesIO()) else: tarinfo = tarfile.TarInfo(name=name) tarinfo.mode = mode t.addfile(tarinfo, io.BytesIO()) t.close() tar.seek(0) return tar def not_unicode_tarbz2( name=b"\x80\x81".decode("utf-8", errors="surrogateescape"), mode=0o644 ): """ Return BytesIO containing a tarfile with one empty file named :name """ return empty_tarfile(name=name, tar_mode="w:bz2") def test_oserror(tmp_path): """ Fail if tarfile raises OSError (formerly known as IOError) """ tar = empty_tarfile("empty-test") class TarELOOP(tarfile.TarFile): def extractall(self, path=None, members=None, filter=None): raise OSError(ELOOP, "case sensitivity") class TarOSError(tarfile.TarFile): def extractall(self, path=None, members=None, filter=None): raise OSError("not eloop") def stream(cls): yield (cls(fileobj=tar), tarfile.TarInfo()) with pytest.raises(exceptions.CaseInsensitiveFileSystemError): extract.extract_stream(stream(TarELOOP), tmp_path) with pytest.raises(OSError): extract.extract_stream(stream(TarOSError), tmp_path) def stream(fileobj): """ Like the tuples produced by part of conda-package-streaming. """ yield (package_streaming.TarfileNoSameOwner(fileobj=fileobj), tarfile.TarInfo()) def stream_stdlib(fileobj): """ Like the tuples produced by part of conda-package-streaming. """ yield (tarfile.TarFile(fileobj=fileobj), tarfile.TarInfo()) def test_slip(tmp_path): """ Fail if tarfile tries to put files outside its dest_dir (tmp_path) """ tar = empty_tarfile(name="../slip") with pytest.raises(exceptions.SafetyError): extract.extract_stream(stream(tar), tmp_path) # If we are using tarfile.filter, the leading / will be stripped instead. tar2 = empty_tarfile(name="/absolute") with pytest.raises(exceptions.SafetyError): extract.extract_stream(stream(tar2), tmp_path) def test_chown(conda_paths, tmp_path, mocker): for package in conda_paths[:2]: print(package) with open(package, "rb") as fileobj: stream = package_streaming.stream_conda_component( package, fileobj, component=package_streaming.CondaComponent.pkg ) for tar, member in stream: assert isinstance(tar, package_streaming.TarfileNoSameOwner), tar break @pytest.mark.parametrize( "tar_filter", (pytest.param(None, id="no tar filter"), pytest.param("data", id="data_filter")), ) def test_umask(tmp_path, mocker, tar_filter): """ Demonstrate that umask-respecting tar implementation works. Mock umask in case it is different on your system. """ if tar_filter is not None and not HAS_TAR_FILTER: pytest.skip("Requires tar_filter") try: MOCK_UMASK = 0o022 current_umask = os.umask(MOCK_UMASK) mocker.patch("conda_package_streaming.package_streaming.UMASK", new=MOCK_UMASK) assert ( package_streaming.TarfileNoSameOwner( fileobj=empty_tarfile("file.txt") ).umask == MOCK_UMASK ) # [ # ('S_IFREG', 32768), # ('UF_HIDDEN', 32768), # ('FILE_ATTRIBUTE_INTEGRITY_STREAM', 32768) # ] # Of the high bits 100755 highest bit 1 can mean just "is regular file" name = "naughty_umask" tar3 = empty_tarfile(name=name, mode=0o777, create_subdir=True) stat_check = stat.S_IRGRP stat_name = "S_IRGRP" root_path = tmp_path / "stdlib" root_path.mkdir() files_to_check = [root_path / name, root_path / name / name] extract.extract_stream(stream_stdlib(tar3), root_path, tar_filter=tar_filter) for file in files_to_check: mode = file.stat().st_mode # is the new .extractall(filter=) erasing "stat_name"? assert mode & stat_check, f"{file} has {stat_name}? %o != %o" % ( mode, mode & stat_check, ) # specifically forbid that stat bit MOCK_UMASK |= stat_check mocker.patch("conda_package_streaming.package_streaming.UMASK", new=MOCK_UMASK) os.umask(MOCK_UMASK) root_path = tmp_path / "cps" root_path.mkdir() files_to_check = [root_path / name, root_path / name / name] tar3.seek(0) extract.extract_stream(stream(tar3), root_path, tar_filter=tar_filter) for file in files_to_check: mode = file.stat().st_mode if mode & stat_check: assert not (mode & stat_check), ( f"{file}: No {stat_name} due to umask? %o != %o" % ( mode, mode & stat_check, ) ) finally: os.umask(current_umask) def test_encoding(): """ Some users do not have "utf-8" as the default sys.getfilesystemencoding() or sys.getdefaultencoding(). Instead of trying to change the system encoding, we prove that stream_conda_component honors the new passed-in encoding which is now "utf-8" by default. """ tar = not_unicode_tarbz2() # Use new default encoding of "utf-8" regardless of what the system says. stream = package_streaming.stream_conda_component( "package.tar.bz2", tar, component="pkg" ) with pytest.raises(UnicodeEncodeError): for t, member in stream: member.name.encode("utf-8") print(t, member) tar.seek(0) # Prove that we are passing encoding all the way down to the TarFile() used # for extraction. stream = package_streaming.stream_conda_component( "package.tar.bz2", tar, component="pkg", encoding="latin-1" ) for t, member in stream: member.name.encode("utf-8") print(t, member) conda-conda-package-streaming-dce4b94/tests/test_lazy_wheel.py000066400000000000000000000106501503052622600246610ustar00rootroot00000000000000import re import pytest import requests import responses from requests import HTTPError from requests.models import PreparedRequest from responses import matchers from conda_package_streaming.lazy_wheel import LazyZipOverHTTP HTTP_FULL_RANGE_PATTERN = re.compile(r"bytes=(\d+)-(\d+)$") HTTP_END_RANGE_PATTERN = re.compile(r"bytes=-(\d+)$") class TestLazyZipOverHTTP: @staticmethod def generate_zero_bytes(length_bytes: int) -> bytes: return bytes(length_bytes) @staticmethod def successful_http_stream_callback_wrapper(file: bytes): # https://datatracker.ietf.org/doc/html/rfc7233 def _callback(request: PreparedRequest): full_pattern_match = HTTP_FULL_RANGE_PATTERN.match(request.headers["Range"]) end_pattern_match = HTTP_END_RANGE_PATTERN.match(request.headers["Range"]) if full_pattern_match: start_range = int(full_pattern_match.group(1)) end_range = int(full_pattern_match.group(2)) assert start_range >= 0 assert end_range >= 0 assert start_range < end_range if start_range >= len(file): # Range Not Satisfiable return 416, {}, b"" # truncate to file length end_range = min(end_range, len(file) - 1) content_length = end_range - start_range + 1 # the bounds are inclusive else: assert end_pattern_match content_length_requested = int(end_pattern_match.group(1)) assert content_length_requested >= 0 content_length = min(content_length_requested, len(file)) start_range = len(file) - content_length end_range = len(file) - 1 headers = { "Content-Length": str(content_length), "Content-Range": f"bytes {start_range}-{end_range}/{len(file)}", } # this is not inlined because black and flake8 disagree on how to format it end_file_range = end_range + 1 return 206, headers, file[start_range:end_file_range] return _callback @pytest.mark.parametrize("fall_back_to_full_download", [True, False]) @responses.activate def test_init_stream_successful(self, fall_back_to_full_download: bool): responses.add_callback( responses.GET, "https://example.com/test.zip", callback=self.successful_http_stream_callback_wrapper( self.generate_zero_bytes(10000) ), content_type="application/zip", ) session = requests.Session() lazy_zip = LazyZipOverHTTP( "https://example.com/test.zip", session, fall_back_to_full_download=fall_back_to_full_download, ) lazy_zip.read() @pytest.mark.parametrize("fall_back_to_full_download", [True, False]) @responses.activate def test_init_stream_retry_without_range_with_fallback( self, fall_back_to_full_download: bool ): """ Some package servers respond with 416 (Range Not Satisfiable) when the file is smaller than the range requested. This violates RFC 7233, but we cope with it by retrying without Range, requesting the full file if fall_back_to_full_download is set. """ responses.add( responses.GET, url="https://example.com/test.zip", status=416, match=[matchers.header_matcher({"Range": re.compile(r".*")})], ) responses.add( responses.GET, url="https://example.com/test.zip", body=self.generate_zero_bytes(10000), content_type="application/zip", ) session = requests.Session() if fall_back_to_full_download: # this should work lazy_zip = LazyZipOverHTTP( "https://example.com/test.zip", session, fall_back_to_full_download=True ) lazy_zip.read() return # otherwise, the constructor should raise an exception with pytest.raises( HTTPError, match="Set the fall_back_to_full_download flag to work around this issue.", ): LazyZipOverHTTP( "https://example.com/test.zip", session, fall_back_to_full_download=False, ) conda-conda-package-streaming-dce4b94/tests/test_s3.py000066400000000000000000000022001503052622600230330ustar00rootroot00000000000000import boto3 import pytest from conda_package_streaming import s3 LIMIT = 16 @pytest.fixture def s3_client(package_server): host, port = package_server.server.server_address client = boto3.client( "s3", aws_access_key_id="test_id", aws_secret_access_key="test_key", endpoint_url=f"http://{host}:{port}", use_ssl=False, verify=False, ) return client def test_head_objects(s3_client, conda_paths): bucket = "pkgs" # independent of filesystem path for path in conda_paths[:LIMIT]: s3_client.head_object(Bucket=bucket, Key=path.name) def test_stream_s3(s3_client, conda_paths): with pytest.raises(ValueError): next(s3.stream_conda_info(s3_client, "pkgs", "notaconda.rar")) for path in conda_paths[:LIMIT]: members = s3.stream_conda_info(s3_client, "pkgs", path.name) print("stream s3", path.name) for tar, member in members: if member.name == "info/index.json": members.close() # faster than waiting for gc? break else: pytest.fail("info/index.json not found") conda-conda-package-streaming-dce4b94/tests/test_streaming.py000066400000000000000000000032771503052622600245160ustar00rootroot00000000000000import io import json import tarfile import pytest from conda_package_streaming import package_streaming def test_package_streaming(conda_paths): for path in conda_paths: if str(path).endswith(".conda"): with pytest.raises(LookupError): package_streaming.stream_conda_component(path, component="notfound") with pytest.raises(ValueError): package_streaming.stream_conda_component("notapackage.rar") def test_early_exit(conda_paths): for package in conda_paths: print(package) stream = iter(package_streaming.stream_conda_info(package)) found = False for tar, member in stream: assert not found, "early exit did not work" if member.name == "info/index.json": reader = tar.extractfile(member) if reader: json.load(reader) found = True stream.close() # PEP 342 close() # stream_conda_info doesn't close a passed-in fileobj, but a # filename should be closed. assert found, f"index.json not found in {package}" def test_chmod_error(tmp_path, mocker): """ Coverage for os.chmod() error handling. """ with package_streaming.TarfileNoSameOwner(tmp_path / "test.tar", mode="w") as tar: member = tarfile.TarInfo(name="file") tar.addfile(member, io.BytesIO()) mocker.patch("os.chmod", side_effect=OSError) with pytest.raises(tarfile.ExtractError): # only logs a debug message if errorlevel<=1 with package_streaming.TarfileNoSameOwner( tmp_path / "test.tar", errorlevel=2 ) as tar: tar.extractall(tmp_path) conda-conda-package-streaming-dce4b94/tests/test_transmute.py000066400000000000000000000150251503052622600245410ustar00rootroot00000000000000import contextlib import io import itertools import os import tarfile import time from pathlib import Path from zipfile import ZipFile import pytest import zstandard from conda_package_handling.validate import validate_converted_files_match_streaming from conda_package_streaming.create import anonymize from conda_package_streaming.package_streaming import ( CondaComponent, stream_conda_component, ) from conda_package_streaming.transmute import ( transmute, transmute_stream, transmute_tar_bz2, ) @pytest.fixture def testtar_bytes(): buffer = io.BytesIO() with tarfile.open("test.tar.bz2", "w:bz2", fileobj=buffer) as tar: symlink = tarfile.TarInfo(name="symlink") symlink.type = tarfile.LNKTYPE symlink.linkname = "target" tar.addfile(symlink) expected = tarfile.TarInfo(name="info/expected") tar.addfile(expected, io.BytesIO()) unexpected = tarfile.TarInfo(name="info/unexpected") tar.addfile(unexpected, io.BytesIO()) return buffer.getbuffer() @contextlib.contextmanager def timeme(message: str = ""): begin = time.time() yield end = time.time() print(f"{message}{end - begin:0.2f}s") def test_transmute(conda_paths: list[Path], tmpdir): tarbz_packages = [] for path in conda_paths: path = str(path) if path.endswith(".tar.bz2") and (1 << 20 < os.stat(path).st_size < 1 << 22): tarbz_packages = [path] conda_packages = [] # not supported assert tarbz_packages, "no medium-sized .tar.bz2 packages found" metadata_checks = 0 for packages in (conda_packages, tarbz_packages): for package in packages: with timeme(f"{package} took "): out = transmute(package, tmpdir) _, missing, mismatched = validate_converted_files_match_streaming( out, package, strict=True ) assert missing == mismatched == [] if out.name.endswith(".conda"): with ZipFile(out) as zf: metadata_checks += 1 assert "metadata.json" in zf.namelist() assert metadata_checks > 0 def test_transmute_symlink(tmpdir, testtar_bytes): testtar = Path(tmpdir, "test.tar.bz2") testtar.write_bytes(testtar_bytes) out = transmute(str(testtar), tmpdir) _, missing, mismatched = validate_converted_files_match_streaming( out, testtar, strict=True ) assert missing == mismatched == [] def test_transmute_info_filter(tmpdir, testtar_bytes): testtar = Path(tmpdir, "test.tar.bz2") testtar.write_bytes(testtar_bytes) transmute( str(testtar), tmpdir, is_info=lambda filename: filename == "info/expected" ) with open(Path(tmpdir, "test.conda"), "rb") as fileobj: for component, expected in ( (CondaComponent.info, {"info/expected"}), ( CondaComponent.pkg, { "info/unexpected", "symlink", }, ), ): items = stream_conda_component("test.conda", fileobj, component) assert {member.name for tar, member in items} == expected, items def test_transmute_backwards(tmpdir, conda_paths): tarbz_packages = [] for path in conda_paths: path = str(path) if path.endswith(".conda") and (1 << 20 < os.stat(path).st_size < 1 << 22): tarbz_packages = [path] conda_packages = [] # not supported assert tarbz_packages, "no medium-sized .conda packages found" for packages in (conda_packages, tarbz_packages): for package in packages: with timeme(f"{package} took "): out = transmute_tar_bz2(package, tmpdir) _, missing, mismatched = validate_converted_files_match_streaming( out, package, strict=True ) assert missing == mismatched == [] def test_transmute_tarbz2_to_tarbz2(tmpdir, testtar_bytes): testtar = Path(tmpdir, "test.tar.bz2") testtar.write_bytes(testtar_bytes) outdir = Path(tmpdir, "output") outdir.mkdir() out = transmute_tar_bz2(str(testtar), outdir) _, missing, mismatched = validate_converted_files_match_streaming( out, testtar, strict=True ) assert missing == mismatched == [] def test_transmute_conditional_zip64(tmp_path, mocker): """ Test that zip64 is used in transmute after a threshold. """ LIMIT = 16384 for test_size, extra_expected in (LIMIT // 2, False), (LIMIT * 2, True): mocker.patch("conda_package_streaming.create.CONDA_ZIP64_LIMIT", new=LIMIT) mocker.patch("zipfile.ZIP64_LIMIT", new=LIMIT) tmp_tar = tmp_path / f"{test_size}.tar.bz2" with tarfile.open(tmp_tar, "w:bz2") as tar: pkg = tarfile.TarInfo(name="packagedata") data = io.BytesIO(os.urandom(test_size)) pkg.size = len(data.getbuffer()) tar.addfile(pkg, data) info = tarfile.TarInfo(name="info/data") data = io.BytesIO(os.urandom(test_size)) info.size = len(data.getbuffer()) tar.addfile(info, data) out = transmute(str(tmp_tar), tmp_path) with ZipFile(out) as e: assert e.filelist[0].extra == b"" # when zip64 extension is used, extra contains zip64 headers assert bool(e.filelist[1].extra) == extra_expected assert bool(e.filelist[2].extra) == extra_expected def test_transmute_stream(tmpdir, conda_paths): """ Test example from transmute_stream documentation. Recompress .conda using transmute_stream() """ conda_packages = [] for path in conda_paths: if path.name.endswith(".conda") and (1 << 20 < os.stat(path).st_size < 1 << 22): conda_packages.append(path) for package in conda_packages[:3]: file_id = package.name transmute_stream( file_id, tmpdir, compressor=lambda: zstandard.ZstdCompressor(), package_stream=itertools.chain( stream_conda_component(package, component=CondaComponent.pkg), stream_conda_component(package, component=CondaComponent.info), ), ) def test_anonymize_helper(): ti = tarfile.TarInfo(name="info") ti.uid = ti.gid = 500 ti.uname = ti.gname = "somebody" anon = anonymize(ti) assert anon.name == ti.name # they are also the same object assert anon.uid == anon.gid == 0 assert anon.uname == anon.gname == "" conda-conda-package-streaming-dce4b94/tests/test_url.py000066400000000000000000000130701503052622600233170ustar00rootroot00000000000000import io import tempfile from contextlib import closing, contextmanager from pathlib import Path from unittest.mock import MagicMock, patch from zipfile import ZipFile import pytest from requests import HTTPError, Session from conda_package_streaming import lazy_wheel from conda_package_streaming.lazy_wheel import LazyConda from conda_package_streaming.url import ( conda_reader_for_url, extract_conda_info, stream_conda_info, ) LIMIT = 16 @pytest.fixture def package_url(package_server): """ Base url for all test packages. """ host, port = package_server.server.server_address return f"http://{host}:{port}/pkgs" @pytest.fixture def package_urls(package_server, package_url): pkgs_dir = Path(package_server.app.pkgs_dir) conda = [] tar_bz2 = [] for path in pkgs_dir.iterdir(): if len(conda) > LIMIT and len(tar_bz2) > LIMIT: break url = f"{package_url}/{path.name}" if path.name.endswith(".tar.bz2") and len(tar_bz2) < LIMIT: tar_bz2.append(url) elif path.name.endswith(".conda") and len(conda) < LIMIT: conda.append(url) # interleave urls = [] for pair in zip(conda, tar_bz2): urls.extend(pair) return urls def test_stream_url(package_urls): with pytest.raises(ValueError): next(stream_conda_info("https://localhost/notaconda.rar")) for url in package_urls: with closing(stream_conda_info(url)) as members: print("stream_url", url) for tar, member in members: if member.name == "info/index.json": break else: pytest.fail("info/index.json not found") def test_fetch_meta(package_urls): for url in package_urls: with tempfile.TemporaryDirectory() as destdir: extract_conda_info(url, destdir) def test_lazy_wheel(package_urls): lazy_tests = 7 for url in package_urls: if url.endswith(".conda"): # API works with `.tar.bz2` but only returns LazyConda for `.conda` filename, conda = conda_reader_for_url(url) assert filename == url.rsplit("/")[-1] with conda: assert isinstance(conda, LazyConda) assert conda.mode == "rb" assert conda.readable() assert not conda.writable() assert not conda.closed request_count = conda._request_count # did we really prefetch the info? zf = ZipFile(conda) # type: ignore filename = filename[: -len(".conda")] zf.open(f"info-{filename}.tar.zst").read() assert conda._request_count == request_count, ( "info required extra GET request" ) assert conda._request_count <= 3 conda.prefetch("not-appearing-in-archive.txt") # zip will figure this out naturally; delete method? conda._check_zip() lazy_tests -= 1 if lazy_tests <= 0: break else: raise LookupError( f"not enough .conda packages found {lazy_tests} {package_urls}" ) with pytest.raises(HTTPError): conda_reader_for_url(package_urls[0] + ".404.conda") class Session200(Session): def get(self, *args, **kwargs): response = super().get(*args, **kwargs) response.status_code = 200 return response with pytest.raises(lazy_wheel.HTTPRangeRequestUnsupported): LazyConda(package_urls[0], Session200()) for url in package_urls: if url.endswith(".tar.bz2"): LazyConda(url, Session())._check_zip() break else: raise LookupError("no .tar.bz2 packages found") @pytest.mark.parametrize("fall_back_to_full_download", [True, False]) @patch("conda_package_streaming.url.LazyConda") def test_conda_reader_for_url_passes_to_lazy_conda_correctly( lazy_conda_mock: MagicMock, fall_back_to_full_download: bool ): url = "https://example.com/package.conda" session = Session() filename, conda = conda_reader_for_url( url, session, fall_back_to_full_download=fall_back_to_full_download ) assert filename == "package.conda" lazy_conda_mock.assert_called_once_with( url, session, fall_back_to_full_download=fall_back_to_full_download, ) def test_no_file_after_info(): """ If info is the last file, LazyConda must fetch (start of info file .. start of zip directory) instead of to the next file in the zip. """ class MockBytesIO(io.BytesIO): prefetch = LazyConda.prefetch @contextmanager def _stay(self): yield zip = MockBytesIO() zf = ZipFile(zip, "w") zf.writestr("info-test.tar.zst", b"00000000") # a short file zf.close() zip.prefetch("test") @pytest.mark.skip() def test_obsolete_lazy_wheel_selftest(): import logging import requests logging.basicConfig(level=logging.DEBUG) session = requests.Session() lzoh = lazy_wheel.LazyZipOverHTTP( "https://repodata.fly.dev/repo.anaconda.com/pkgs/main/win-32/current_repodata.jlap", session, ) lzoh.seek(1024) lzoh.read(768) lzoh.seek(0) # compare against regular fetch with open("outfile.txt", "wb+") as out: buf = b" " while buf: buf = lzoh.read(1024 * 10) print(list(zip(lzoh._left, lzoh._right)), lzoh._length) if not buf: break out.write(buf)