pax_global_header00006660000000000000000000000064150514244750014521gustar00rootroot0000000000000052 comment=4ad73170f75108db7eb62805309267d37c7f5035 uhi-1.0.0/000077500000000000000000000000001505142447500123045ustar00rootroot00000000000000uhi-1.0.0/.git_archival.txt000066400000000000000000000002171505142447500155570ustar00rootroot00000000000000node: 4ad73170f75108db7eb62805309267d37c7f5035 node-date: 2025-08-20T15:59:57-04:00 describe-name: v1.0.0 ref-names: HEAD -> main, tag: v1.0.0 uhi-1.0.0/.gitattributes000066400000000000000000000000401505142447500151710ustar00rootroot00000000000000.git_archival.txt export-subst uhi-1.0.0/.github/000077500000000000000000000000001505142447500136445ustar00rootroot00000000000000uhi-1.0.0/.github/CONTRIBUTING.md000066400000000000000000000014771505142447500161060ustar00rootroot00000000000000See the [Scikit-HEP Developer introduction][skhep-dev-intro] for a detailed description of best practices for developing Scikit-HEP packages. [skhep-dev-intro]: https://scikit-hep.org/developer/intro # Quick run with Nox If you have nox, this project supports nox. These are the supplied sessions: ```console nox -s lint nox -s tests nox -s docs nox -s build ``` # Post setup You should prepare pre-commit, which will help you by checking that commits pass required checks: ```bash pip install pre-commit # or brew install pre-commit on macOS pre-commit install # Will install a pre-commit hook into the git repo ``` You can also/alternatively run `pre-commit run` (changes only) or `pre-commit run --all-files` to check even without installing the hook. # Testing Use pytest to run the unit checks: ```bash pytest ``` uhi-1.0.0/.github/dependabot.yml000066400000000000000000000003411505142447500164720ustar00rootroot00000000000000version: 2 updates: # Maintain dependencies for GitHub Actions - package-ecosystem: "github-actions" directory: "/" schedule: interval: "monthly" groups: actions: patterns: - "*" uhi-1.0.0/.github/release.yml000066400000000000000000000001261505142447500160060ustar00rootroot00000000000000changelog: exclude: authors: - dependabot[bot] - pre-commit-ci[bot] uhi-1.0.0/.github/workflows/000077500000000000000000000000001505142447500157015ustar00rootroot00000000000000uhi-1.0.0/.github/workflows/cd.yml000066400000000000000000000032301505142447500170100ustar00rootroot00000000000000name: CD on: workflow_dispatch: push: branches: - main release: types: - published schedule: - cron: "34 5 * * *" permissions: actions: read concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: dist: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: hynek/build-and-inspect-python-package@v2 publish: needs: [dist] runs-on: ubuntu-latest if: github.event_name == 'release' && github.event.action == 'published' environment: name: pypi url: https://pypi.org/p/uhi permissions: id-token: write attestations: write steps: - uses: actions/download-artifact@v4 with: name: Packages path: dist - uses: actions/attest-build-provenance@v2 with: subject-path: "dist/*" - uses: pypa/gh-action-pypi-publish@release/v1 upload_nightly_wheels: name: Upload nightly wheels to Anaconda Cloud if: | (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') && !github.event.repository.fork && github.ref == 'refs/heads/main' needs: [dist] runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v4 with: name: Packages path: dist - name: List all files run: ls -lh dist - name: Upload wheel to Anaconda Cloud as nightly uses: scientific-python/upload-nightly-action@b36e8c0c10dbcfd2e05bf95f17ef8c14fd708dbf # 0.6.2 with: artifacts_path: dist anaconda_nightly_upload_token: ${{ secrets.ANACONDA_ORG_UPLOAD_TOKEN }} uhi-1.0.0/.github/workflows/ci.yml000066400000000000000000000031371505142447500170230ustar00rootroot00000000000000name: CI on: workflow_dispatch: pull_request: push: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: checks: name: Check Python ${{ matrix.python-version }} on ${{ matrix.runs-on }} runs-on: ${{ matrix.runs-on }} strategy: fail-fast: false matrix: python-version: ["3.9", "3.13"] runs-on: [ubuntu-latest, macos-latest, windows-latest] include: - python-version: pypy-3.10 runs-on: ubuntu-latest - python-version: "3.10" runs-on: ubuntu-latest - python-version: "3.11" runs-on: ubuntu-latest - python-version: "3.12" runs-on: ubuntu-latest - python-version: "3.14" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} allow-prereleases: true - uses: astral-sh/setup-uv@v6 - name: Install package run: uv pip install --system -e.[schema] --group test - name: Test run: python -m pytest -ra root: name: ROOT test runs-on: ubuntu-latest defaults: run: shell: bash -l {0} steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: conda-incubator/setup-miniconda@v3 with: use-mamba: true environment-file: environment.yml - name: Install package run: pip install . - name: Test root run: pytest -ra tests/test_root.py uhi-1.0.0/.gitignore000066400000000000000000000040321505142447500142730ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ .pdm.toml # Version file _version.py uhi-1.0.0/.pre-commit-config.yaml000066400000000000000000000042141505142447500165660ustar00rootroot00000000000000ci: autoupdate_commit_msg: 'chore: update pre-commit hooks' autoupdate_schedule: 'monthly' repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: check-added-large-files - id: check-case-conflict - id: check-merge-conflict - id: check-symlinks - id: check-yaml - id: debug-statements - id: end-of-file-fixer - id: mixed-line-ending - id: requirements-txt-fixer - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit rev: "v0.12.7" hooks: - id: ruff-check args: ["--fix", "--show-fixes"] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.17.1 hooks: - id: mypy files: ^(src|tests) args: [] additional_dependencies: - numpy - pytest - importlib_resources - boost-histogram>=1.6.1 - hist>=2.9.0 - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 hooks: - id: rst-backticks - id: rst-directive-colons - id: rst-inline-touching-normal - repo: https://github.com/codespell-project/codespell rev: v2.4.1 hooks: - id: codespell args: ["-L", "hist,thist,ans,nd,gaus"] - repo: local hooks: - id: disallow-caps name: Disallow improper capitalization language: pygrep entry: PyBind|Numpy|Cmake|CCache|Github|PyTest|Weighed exclude: .pre-commit-config.yaml - repo: https://github.com/rbubley/mirrors-prettier rev: "v3.6.2" hooks: - id: prettier types_or: [json] - repo: https://github.com/python-jsonschema/check-jsonschema rev: 0.33.2 hooks: - id: check-readthedocs - id: check-github-workflows - id: check-metaschema files: ^src/uhi/resources/histogram.schema.json$ - id: check-jsonschema name: Validate Histogram examples args: [--schemafile, src/uhi/resources/histogram.schema.json] files: ^tests/resources/valid/.*\.json - repo: https://github.com/henryiii/validate-pyproject-schema-store rev: 2025.07.28 hooks: - id: validate-pyproject uhi-1.0.0/.readthedocs.yml000066400000000000000000000011071505142447500153710ustar00rootroot00000000000000# Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 sphinx: configuration: docs/conf.py build: os: "ubuntu-24.04" tools: python: "3.13" jobs: post_checkout: - git fetch --unshallow || true pre_create_environment: - asdf plugin add uv - asdf install uv latest - asdf global uv latest create_environment: - uv venv "${READTHEDOCS_VIRTUALENV_PATH}" install: - UV_PROJECT_ENVIRONMENT="${READTHEDOCS_VIRTUALENV_PATH}" uv sync --group docs uhi-1.0.0/CITATION.cff000066400000000000000000000016701505142447500142020ustar00rootroot00000000000000cff-version: 1.2.0 message: "Please cite the following works when using this software." type: software title: "uhi" abstract: "UHI is a library that helps connect other Histogramming libraries." authors: - family-names: "Schreiner" given-names: "Henry" orcid: "https://orcid.org/0000-0002-7833-783X" affiliation: "Princeton University" - given-names: Hans family-names: Dembinski email: hans.dembinski@gmail.com affiliation: TU Dortmund orcid: 'https://orcid.org/0000-0003-3337-3850' - family-names: "Pivarski" given-names: "Jim" affiliation: "Princeton University" orcid: "https://orcid.org/0000-0002-6649-343X" email: "jpivarski@gmail.com" - family-names: "Taider" given-names: "Silia" email: "siliataider@gmail.com" doi: https://doi.org/10.5281/zenodo.13883674 repository-code: "https://github.com/scikit-hep/uhi" url: "https://uhi.readthedocs.io/" keywords: - python - histogram - scikit-hep license: "BSD-3-Clause" uhi-1.0.0/LICENSE000066400000000000000000000027651505142447500133230ustar00rootroot00000000000000BSD 3-Clause License Copyright (c) 2021, Henry Schreiner. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the vector package developers nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. uhi-1.0.0/README.md000066400000000000000000000077161505142447500135760ustar00rootroot00000000000000# UHI: Universal Histogram Interface [![Actions Status][actions-badge]][actions-link] [![Documentation Status][rtd-badge]][rtd-link] [![pre-commit.ci Status][pre-commit-badge]][pre-commit-link] [![Code style: black][black-badge]][black-link] [![PyPI version][pypi-version]][pypi-link] [![Conda-forge version][conda-badge]][conda-link] [![PyPI platforms][pypi-platforms]][pypi-link] [![GitHub Discussion][github-discussions-badge]][github-discussions-link] [![Gitter][gitter-badge]][gitter-link] [![Zenodo][zenodo-badge]][zenodo-link] [![Scikit-HEP][sk-badge]](https://scikit-hep.org/) [![SPEC 4 — Using and Creating Nightly Wheels][spec4-badge]][spec4-link] This is a package meant primarily for [documenting][rtd-link] histogram indexing and the PlottableHistogram Protocol and any future cross-library standards. It also contains the code for the PlottableHistogram Protocol, to be used in type checking libraries wanting to conform to the protocol. Eventually, it might gain a set of tools for testing conformance to UHI indexing, as well. It is not usually a runtime dependency, but only a type checking, testing, and/or docs dependency in support of other libraries (such as [boost-histogram][] 0.13+, [hist][] 2.1+, [mplhep][] 0.2.15+, [uproot][] 4+, and [histoprint][] 2+). There are a few useful runtime usable components (listed below). Older versions are available for Python 3.6+. [See what's new](https://github.com/scikit-hep/uhi/releases). To assist plotting libraries in accepting Histograms from classic sources, see `uhi.numpy_plottable.ensure_plottable_histogram`, which will adapt NumPy style tuples into a simple PlottableHistogram. The Protocols provided do support runtime checking, so `isinstance(h, uhi.typing.plotting.PlottableHistogram)` is valid at runtime and might be simpler than manually checking for the expected methods. [actions-badge]: https://github.com/Scikit-HEP/uhi/workflows/CI/badge.svg [actions-link]: https://github.com/Scikit-HEP/uhi/actions [black-badge]: https://img.shields.io/badge/code%20style-black-000000.svg [black-link]: https://github.com/psf/black [conda-badge]: https://img.shields.io/conda/vn/conda-forge/uhi [conda-link]: https://github.com/conda-forge/uhi-feedstock [github-discussions-badge]: https://img.shields.io/static/v1?label=Discussions&message=Ask&color=blue&logo=github [github-discussions-link]: https://github.com/Scikit-HEP/uhi/discussions [gitter-badge]: https://badges.gitter.im/https://github.com/Scikit-HEP/uhi/community.svg [gitter-link]: https://gitter.im/https://github.com/Scikit-HEP/uhi/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge [zenodo-badge]: https://zenodo.org/badge/DOI/10.5281/zenodo.13883674.svg [zenodo-link]: https://doi.org/10.5281/zenodo.13883674 [pre-commit-badge]: https://results.pre-commit.ci/badge/github/scikit-hep/uhi/main.svg [pre-commit-link]: https://results.pre-commit.ci/repo/github/309772485 [pypi-link]: https://pypi.org/project/uhi/ [pypi-platforms]: https://img.shields.io/pypi/pyversions/uhi [pypi-version]: https://badge.fury.io/py/uhi.svg [rtd-badge]: https://readthedocs.org/projects/uhi/badge/?version=latest [rtd-link]: https://uhi.readthedocs.io/en/latest/?badge=latest [sk-badge]: https://scikit-hep.org/assets/images/Scikit--HEP-Project-blue.svg [spec4-badge]: https://img.shields.io/badge/SPEC-4-green?labelColor=%23004811&color=%235CA038 [spec4-link]: https://scientific-python.org/specs/spec-0004/ [boost-histogram]: https://github.com/scikit-hep/boost-histogram [hist]: https://github.com/scikit-hep/hist [mplhep]: https://github.com/scikit-hep/mplhep [uproot]: https://github.com/scikit-hep/uproot4 [histoprint]: https://github.com/scikit-hep/histoprint uhi-1.0.0/docs/000077500000000000000000000000001505142447500132345ustar00rootroot00000000000000uhi-1.0.0/docs/Makefile000066400000000000000000000011721505142447500146750ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) uhi-1.0.0/docs/changelog.md000066400000000000000000000002631505142447500155060ustar00rootroot00000000000000# Changelog ```{changelog} :changelog-url: https://github.com/scikit-hep/uhi/releases :github: https://github.com/scikit-hep/uhi/releases :pypi: https://pypi.org/project/uhi ``` uhi-1.0.0/docs/conf.py000066400000000000000000000036331505142447500145400ustar00rootroot00000000000000# Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html from __future__ import annotations import importlib.metadata import os # -- Project information ----------------------------------------------------- project = "uhi" copyright = "2021, Henry Schreiner, Hans Dembinski, Jim Pivarski" author = "Henry Schreiner, Hans Dembinski, Jim Pivarski" # The full version, including alpha/beta/rc tags version = release = importlib.metadata.version("uhi") # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "myst_parser", "sphinx-jsonschema", "sphinx.ext.napoleon", "sphinx_copybutton", "sphinx_github_changelog", ] source_suffix = [".rst", ".md"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] myst_enable_extensions = [ "colon_fence", ] # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = "furo" # -- Changelog builder ------------------------------------------------------- if "GITHUB_API_TOKEN" in os.environ: sphinx_github_changelog_token = os.environ["GITHUB_API_TOKEN"] commit = os.environ.get("READTHEDOCS_GIT_COMMIT_HASH", "main") code_url = "https://github.com/scikit-hep/uhi/blob" uhi-1.0.0/docs/index.rst000066400000000000000000000023541505142447500151010ustar00rootroot00000000000000.. uhi documentation master file, created by sphinx-quickstart on Tue Jan 19 16:19:27 2021. You can adapt this file completely to your liking, but it should at least contain the root ``toctree`` directive. UHI: Unified Histogram Interface ================================= UHI is a library that helps connect other Histogramming libraries. It is primarily indented to be a guide and static type check helper; you do not need an runtime dependency on UHI. It currently does so with the following components: UHI Indexing, which describes a powerful indexing system for histograms, designed to extend standard Array indexing for Histogram operations. UHI Indexing+ (referred to as UHI+ for short), which describes a set of extensions to the standard indexing that make it easier to use on the command line. The PlottableProtocol, which describes the minimal and complete set of requirements for a source library to produce and a plotting library to consume to plot a histogram, including error bars. .. toctree:: :maxdepth: 2 :caption: Contents: indexing.rst indexing+.rst plotting.rst serialization.md testing.md changelog.md Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` uhi-1.0.0/docs/indexing+.rst000066400000000000000000000035671505142447500156610ustar00rootroot00000000000000.. _usage-indexing+: Indexing+ ========= This is an extended version of UHI, called UHI+. This is not implemented in boost-histogram, but is implemented in Hist. Syntax extensions ----------------- UHI+ avoids using the standard tags found in UHI by using more advanced Python syntax. Location based slicing/access: numeric axes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can replace location based indexing ``loc(1.23) → 1.23j`` (a "j" suffix on a number literal). You can shift by an integer, just like with loc: ``2.3j + 1`` will be one bin past the one containing the location "2.3". .. code:: python3 v = h[2j] # Returns the bin containing "2.0" v = h[2j + 1] # Returns the bin above the one containing "2.0" h2 = h[2j:] # Slices starting with the bin containing "2.0" Location based slicing/access: string axis ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If you have a string based axis, you can use a string directly ``loc("label") → "label"``. .. code:: python3 v = h["a"] # Returns the "a" bin (string category axis) Rebinning ^^^^^^^^^ You can replace ``rebin(2) → 2j`` in the third slot of a slice. .. code:: python3 h2 = h[::2j] # Modification operations (rebin) h2 = h[a:b:2j] # Modifications can combine with slices Named based indexing ^^^^^^^^^^^^^^^^^^^^ An optional extension to indexing is expected for histogram implementations that support names. If named axes are supported, any expression that refers to an axis by an integer can also refer to it by a name string. ``.project(*axis: int | str)`` is probably the most common place to see this, but you can also use strings in the UHI dict access, such as: .. code:: python3 h[{"a": np.s_[::2j]}] # rebin axis "a" by two h[{"x": np.s_[0:3.5j]}] # slice axis "x" from 0 to the data coordinate 3.5 h[{"other": np.s_[0:2:4j]}] # slice and rebin axis "other" uhi-1.0.0/docs/indexing.rst000066400000000000000000000251141505142447500155760ustar00rootroot00000000000000.. _usage-indexing: Indexing ======== This is the design document for Unified Histogram Indexing (UHI). Much of the original plan is now implemented in boost-histogram. Other histogramming libraries can implement support for this as well, and the "tag" functors, like ``sum`` and ``loc`` can be used between libraries. Syntax ------ The following examples assume you have imported ``loc``, ``rebin``, ``underflow``, and ``overflow`` from boost-histogram or any other library that implements UHI. Access: ^^^^^^^ .. code:: python3 v = h[b] # Returns bin contents, indexed by bin number v = h[loc(b)] # Returns the bin containing the value v = h[loc(b) + 1] # Returns the bin above the one containing the value v = h[underflow] # Underflow and overflow can be accessed with special tags Indexing works like Python, with ``IndexError`` thrown if you are out of range. Slicing: ^^^^^^^^ .. code:: python3 h == h[:] # Slice over everything h2 = h[a:b] # Slice of histogram (includes flow bins) h2 = h[:b] # Leaving out endpoints is okay h2 = h[loc(v):] # Slices can be in data coordinates, too h2 = h[::rebin(2)] # Modification operations (rebin) h2 = h[a:b:rebin(2)] # Modifications can combine with slices v2 = h[::sum] # Projection operations # (name may change) v2 = h[a:b:sum] # Adding endpoints to projection operations v2 = h[0:len:sum] # removes under or overflow from the calculation h2 = h[v, a:b] # A single value v is like v:v+1:sum h2 = h[a:b, ...] # Ellipsis work just like normal numpy Setting ^^^^^^^ .. code:: python3 # Single values h[b] = v # Returns bin contents, indexed by bin number h[loc(b)] = v # Returns the bin containing the value h[underflow] = v # Underflow and overflow can be accessed with special tags h[...] = array(...) # Setting with an array or histogram sets the contents if the sizes match # Overflow can optionally be included if endpoints are left out # The number of dimensions for non-scalars should match (broadcasting works normally otherwise) All of this generalizes to multiple dimensions. ``loc(v)`` could return categorical bins, but slicing on categories would (currently) not be allowed. These all return histograms, so flow bins are always preserved - the one exception is projection; since this removes an axis, the only use for the slice edges is to be explicit on what part you are interested for the projection. So an explicit (non-empty) slice here will case the relevant flow bin to be excluded. ``loc`` and ``rebin`` live inside the histogramming package (like boost-histogram), but are completely general and can be created by a user using an explicit API (below). ``underflow`` and ``overflow`` also follow a general API. ``sum`` is just the Python built-in sum function. One drawback of the syntax listed above is that it is hard to select an action to run on an axis or a few axes out of many. For this use case, you can pass a dictionary to the index, and that has the syntax ``{axis:action}``. The actions are slices, and follow the rules listed above. All axes that are unmentioned are left alone, just like ``...`` or explicit ``:`` would do. This looks like: .. code:: python3 h[{0: slice(None, None, bh.rebin(2))}] # rebin axis 0 by two h[{1: slice(0, bh.loc(3.5))}] # slice axis 1 from 0 to the data coordinate 3.5 h[{7: slice(0, 2, bh.rebin(4))}] # slice and rebin axis 7 If you don't like manually building slices, you can use the ``np.s_`` utility to recover the original slicing syntax inside the dict: .. code:: python3 h[{0: np.s_[::rebin(2)]}] # rebin axis 0 by two h[{1: np.s_[0:loc(3.5)]}] # slice axis 1 from 0 to the data coordinate 3.5 h[{7: np.s_[0:2:rebin(4)]}] # slice and rebin axis 7 Invalid syntax: ^^^^^^^^^^^^^^^ .. code:: python3 h[1.0] # Floats are not allowed, just like numpy h[::2] # Skipping is not (currently) supported h[..., None] # None == np.newaxis is not supported Reordering axes ^^^^^^^^^^^^^^^ It is not possible to reorder axis with this syntax; libraries are expected to provide a ``.project(*axis: int)`` method which provides a way to reorder, as well as fast access to a small subset of a large histogram in a complementary way to the above indexing. Rejected proposals or proposals for future consideration, maybe ``hist``-only: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code:: python3 h2 = h[1.0j:2.5j + 1] # Adding a j suffix to a number could be used in place of ``loc(x)`` h2 = h[1.0] # Floats in place of ``loc(x)``: too easy to make a mistake Examples -------- For a histogram, the slice should be thought of like this: .. code:: python3 histogram[start:stop:action] The start and stop can be either a bin number (following Python rules), or a callable; the callable will get the axis being acted on and should return an extended bin number (``-1`` and ``len(ax)`` are flow bins). A provided callable is ``bh.loc``, which converts from axis data coordinates into bin number. The final argument, ``action``, is special. A general API is being worked on, but for now, ``bh.sum`` will “project out” or “integrate over” an axes, and ``bh.rebin(n)`` will rebin by an integral factor. Both work correctly with limits; ``bh.sum`` will remove flow bins if given a range. ``h[0:len:bh.sum]`` will sum without the flow bins. Here are a few examples that highlight the functionality of UHI: Example 1: ^^^^^^^^^^ You want to slice axis 0 from 0 to 20, axis 1 from .5 to 1.5 in data coordinates, axis 2 needs to have double size bins (rebin by 2), and axis 3 should be summed over. You have a 4D histogram. Solution: .. code:: python3 ans = h[:20, bh.loc(-.5):bh.loc(1.5), ::bh.rebin(2), ::bh.sum] Example 2: ^^^^^^^^^^ You want to set all bins above 4.0 in data coordinates to 0 on a 1D histogram. Solution: .. code:: python3 h[bh.loc(4.0):] = 0 You can set with an array, as well. The array can either be the same length as the range you give, or the same length as the range + under/overflows if the range is open ended (no limit given). For example: .. code:: python3 h = bh.Histogram(bh.axis.Regular(10, 0, 1)) h[:] = np.ones(10) # underflow/overflow still 0 h[:] = np.ones(12) # underflow/overflow now set too Note that for clarity, while basic NumPy broadcasting is supported, axis-adding broadcasting is not supported; you must set a 2D histogram with a 2D array or a scalar, not a 1D array. Example 3: ^^^^^^^^^^ You want to sum from -infinity to 2.4 in data coordinates in axis 1, leaving all other axes alone. You have an ND histogram, with N >= 2. Solution: .. code:: python3 ans = h[:, :bh.loc(2.4):bh.sum, ...] Notice that last example could be hard to write if the axis number, 1 in this case, was large or programmatically defined. In these cases, you can pass a dictionary of ``{axis:slice}`` into the indexing operation. A shortcut to quickly generate slices is provided, as well: .. code:: python3 ans = h[{1: slice(None,bh.loc(2.4),bh.sum)}] # Identical: ans = h[{1: np.s_[:bh.loc(2.4):bh.sum]}] Example 4: ^^^^^^^^^^ You want the underflow bin of a 1D histogram. Solution: .. code:: python3 val = h1[bh.underflow] -------------- Extensions ---------- Boost-histogram currently implements a few extra additions to this that are not yet required: * Passing ``sum`` directly acts as if it was given as the third argument (the action). * The ``rebin`` tag can be passed directly, as well. The inner workings of ``rebin`` are being worked on, and will be updated here when they are finalized. Details ------- Implementation notes ^^^^^^^^^^^^^^^^^^^^ loc, rebin, and sum are *not* unique tags, or special types, but rather APIs for classes. New versions of these could be added, and implementations could be shared among Histogram libraries. For clarity, the following code is written in Python 3.6+. `Prototype here `__. `Extra doc here `__. Note that the API comes in two forms; the ``__call__``/``__new__`` operator form is more powerful, slower, optional, and is currently not supported by boost-histogram. A fully conforming UHI implementation must allow the tag form without the operators. Basic implementation example (WIP): .. code:: python3 class loc: "When used in the start or stop of a Histogram's slice, x is taken to be the position in data coordinates." def __init__(self, value, offset): self.value = value self.offset = offset # supporting __add__ and __sub__ also recommended def __call__(self, axis): return axis.index(self.value) + self.offset # Other flags, such as callable functions, could be added and detected later. # UHI will perform a maximum performance sum when python's sum is encountered def underflow(axis): return -1 def overflow(axis): return len(axis) class rebin: """ When used in the step of a Histogram's slice, rebin(n) combines bins, scaling their widths by a factor of n. If the number of bins is not divisible by n, the remainder is added to the overflow bin. """ def __init__(self, factor): # Items with .factor are specially treated in boost-histogram, # performing a high performance rebinning self.factor = factor # Optional and not used by boost-histogram def __call__(self, binning, axis, counts): factor = self.factor if isinstance(binning, Regular): indexes = (numpy.arange(0, binning.num, factor),) num, remainder = divmod(binning.num, factor) high, hasover = binning.high, binning.hasover if binning.hasunder: indexes[0][:] += 1 indexes = ([0],) + indexes if remainder == 0: if binning.hasover: indexes = indexes + ([binning.num + int(binning.hasunder)],) else: high = binning.left(indexes[-1][-1]) hasover = True binning = Regular(num, binning.low, high, hasunder=binning.hasunder, hasover=hasover) counts = numpy.add.reduceat(counts, numpy.concatenate(indexes), axis=axis) return binning, counts else: raise NotImplementedError(type(binning)) uhi-1.0.0/docs/make.bat000066400000000000000000000014331505142447500146420ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd uhi-1.0.0/docs/plotting.rst000066400000000000000000000063351505142447500156350ustar00rootroot00000000000000.. _usage-plotting: Plotting ======== This is a description of the ``PlottableProtocol``. Any plotting library that accepts an object that follows the ``PlottableProtocol`` can plot object that follow this protocol, and libraries that follow this protocol are compatible with plotters. The Protocol is runtime checkable, though as usual, that will only check for the presence of the needed methods at runtime, not for the static types. Using the protocol: ^^^^^^^^^^^^^^^^^^^ Plotters should only depend on the methods and attributes listed below. In short, they are: * ``h.kind``: The ``bh.Kind`` of the histogram (COUNT or MEAN) * ``h.values()``: The value (as given by the kind) * ``h.variances()``: The variance in the value (None if an unweighed histogram was filled with weights) * ``h.counts()``: How many fills the bin received or the effective number of fills if the histogram is weighted * ``h.axes``: A Sequence of axes Axes have: * ``ax[i]``: A tuple of (lower, upper) bin, or the discrete bin value (integer or string) * ``len(ax)``: The number of bins * Iteration is supported * ``ax.traits.circular``: True if circular * ``ax.traits.discrete``: True if the bin represents a single value (e.g. Integer or Category axes) instead of an interval (e.g. Regular or Variable axes) Plotters should see if ``.counts()`` is None; no boost-histogram objects currently return None, but a future storage or different library could. Also check ``.variances``; if not None, this storage holds variance information and error bars should be included. Boost-histogram histograms will return something unless they know that this is an invalid assumption (a weighted fill was made on an unweighted histogram). To statically restrict yourself to valid API usage, use ``PlottableHistogram`` as the parameter type to your function (Not needed at runtime). Implementing the protocol: ^^^^^^^^^^^^^^^^^^^^^^^^^^ Add UHI to your MyPy environment; an example ``.pre-commit-config.yaml`` file: .. code:: yaml - repo: https://github.com/pre-commit/mirrors-mypy rev: v0.812 hooks: - id: mypy files: src additional_dependencies: [uhi, numpy~=1.20.1] Then, check your library against the Protocol like this: .. code:: python3 from typing import TYPE_CHECKING, cast if TYPE_CHECKING: _: PlottableHistogram = cast(MyHistogram, None) Help for plotters ^^^^^^^^^^^^^^^^^ The module ``uhi.numpy_plottable`` has a utility to simplify the common use case of accepting a PlottableProtocol or other common formats, primarily a NumPy ``histogram``/``histogram2d``/``histogramdd`` tuple. The ``ensure_plottable_histogram`` function will take a histogram or NumPy tuple, or an object that implements ``.to_numpy()`` or ``.numpy()`` and convert it to a ``NumPyPlottableHistogram``, which is a minimal implementation of the Protocol. By calling this function on your input, you can then write your plotting function knowing that you always have a ``PlottableProtocol`` object, greatly simplifying your code. The full protocol version 1.2 follows: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Also available as ``uhi.typing.plottable.PlottableProtocol``, for use in tests, etc. .. literalinclude:: ../src/uhi/typing/plottable.py :language: python uhi-1.0.0/docs/serialization.md000066400000000000000000000262561505142447500164460ustar00rootroot00000000000000# Serialization ## Introduction Histogram serialization has to cover a wide range of formats. As such, we describe a form for serialization that covers the metadata structure as JSON-like, with a provided JSON-schema. The data (bins and/or variable edges) is stored out-of-band in a binary format based on what type of data file you are in. For very small (primarily 1D) histograms, data is allowed inline as well. The following formats are being targeted: ``` ┌──────────────┐ ┌────────┐ ┌────────────┐ │ ROOT (todo) │ │ HDF5 │ │ ZIP/JSON │ └──────────────┘ └────────┘ └────────────┘ ``` Other formats can be used as well, assuming they support out-of-band data and text attributes or files for the metadata. We are working on a Zarr backend in the near future. ## Caveats This structure was based heavily on boost-histogram, but it is intended to be general, and can be expanded in the future as needed. As such, the following limitations are required: * Serialization followed by deserialisation may cause axis changes. Axis types may change to an equivalent but less performant axis, growth status will be lost, etc. Libraries can record custom `"writer_info"` attributes to improve round trips, but histograms must always be openable without the extra info. * Metadata should also be reasonably sized; some formats like HDF5 may limit the size of attributes to 64K. * Floating point errors could be incurred on conversion, as the storage format uses a stable but different representation. * Axis `name` is only part of the metadata, and is not standardized. This is due to lack of support from boost-histogram. ## Design The following axes types are supported: * `"regular"`: A regularly spaced set of even bins. Boost-histogram's "integer" axes maps to this axis as well. Has `upper`, `lower`, `bins`, `underflow`, `overflow`, and `circular` properties. * `"variable"`: A continuous axis defined by bins+1 edges. Has `edges`, which is either an in-line list of numbers or a string pointing to an out-of-band data source. Also has `underflow`, `overflow`, and `circular` properties. * `"category_int"`: A list of integer bins, non-continuous. Has `categories`, which is an in-line list of integers. Also has `flow`. * `"category_str"`: A list of string bins. Has `categories`, which is an in-line list of strings. Also has `flow`. * `"boolean"`: A true/false axis. Axes with gaps are currently not supported. All axes support `metadata`, a string-valued dictionary of arbitrary data. Currently, strings, numbers, and booleans are supported. Other values here are not currently supported. Libraries are encouraged to provide a way to indicate unserializable metadata; our recommendation is to avoid adding any metadata that starts with a `@` to the metadata dictionary. Libraries should not include keys with `None` values, as some formats might not support null values. The following storages are supported: * `"int"`: A collection of integers. Boost-histogram's `Int64` and `AtomicInt64` map to this, and sometimes `Unlimited`. * `"double"`: A collection of 64-bit floating point values. Boost-histogram's `Double` storage maps to this, and sometimes `Unlimited`. * `"weighted"`: A collection of two arrays of 64-bit floating point values, `"value"` and `"variance"`. Boost-histogram's `Weight` storage maps to this. * `"mean"`: A collection of three arrays of 64-bit floating point values, "`count"`, `"value"`, and `"variance"`. Boost-histogram's `Mean` storage maps to this. * `"weighted_mean"`: A collection of four arrays of 64-bit floating point values, `"sum_of_weights"`, `"sum_of_weights_squared"`, `"values"`, and `"variances"`. Boost-histogram's `WeightedMean` storage maps to this. A library can fill the optional `"writer_info"` field with a key specific to the library containing library specific metadata anywhere a metadata field is allowed. There is one defined key at the Histogram level, `"version"`, which contains the version of the library that created the histogram. Libraries should include this key when creating a histogram. It is not required for reading histograms. Histogram libraries can put custom metadata here that they can use to record province information or help with same-library round trips. For example, a histogram created with boost-histogram might contain: ```json { "writer_info": { "boost-histogram": { "version": "1.0.0", } }, "...": "...", } ``` There is one more required top-level key: `"uhi_schema"`, which must be set to 1 currently. If there is a future revision with a backward incompatible change, this will be bumped to 2, and readers should always error on future schemas, and support all older schemas. This is hoped to be unlikely/rare, but this also serves as a check that this is in fact a uhi serialization object. Non-breaking changes like additions are allowed without bumping the schema. ## Sparse storage For sparse histograms, storage contains an `index` key. This is a 2D array; the first dimension has the same number of entries as the number of axes, and the second dimension has the same number of entries as the number of filled bins. There should not be any duplicate entries. The values start at 0 for the underflow bin (if there is one), and are in the same order as the axes. The data in this case are 1D arrays, one for each bin. For example, take the following sparse histogram with three filled bins: ```json { "storage": { "index": [[0, 1, 2],[3, 3, 4]], "values": [5, 6, 7], } } ``` The `0, 3` bin is filled with 5, the `1, 3` bin is filled with 6, and the `2, 4` bin is filled with 7. If the first axes has `"underflow"` enabled, that first bin is an underflow bin. If a histogram library doesn't support sparse histograms, you can convert a sparse histogram to a dense one. UHI provides helpers `uhi.io.to_sparse` and `uhi.io.from_sparse` that can be used to support a library that doesn't support sparse histograms. Scalar histograms (with no axes) are always dense. ## CLI/API You can currently test a JSON file against the schema by running: ```console $ python -m uhi.schema some/file.json ``` Or with code: ```python import uhi.schema with filename.open(encoding="utf-8") as f: data = json.load(f) uhi.schema.validate(data) ``` Eventually this should also be usable for JSON's inside zip, HDF5 attributes, and maybe more. ## Format specific details and helpers The `uhi` library contains reference implementations of target formats. You can implement these yourself, but if you are using or have access to Python, feel free to use `uhi` if that helps. If an object has a `_to_uhi_` method, that will be used to convert it to a dictionary following the schema. A `_from_uhi_` classmethod is also recommended; however, this is generally part of the histogram constructor; if a UHI object is passed, it should be converted automatically. ### JSON The simplest format, useful for writing tests. The JSON version is nearly identical to the intermediate representation; the only difference is that data is stored as nested lists. It is not intended for large histograms; the ZIP format (below) is nearly identical and builds on this with out-of-band data. Two utilities are provided; `uhi.io.json.default` and `uhi.io.json.object_hook`. These are used with Python's built-in json module to handle conversions. ```python import json import uhi.io.json ob = json.dumps(h, default=uhi.io.json.default) uhi_hist = json.loads(ob, object_hook=uhi.io.json.object_hook) ``` Above, `h` is a histogram that supports `_to_uhi_` or an intermediate representation,`ob` is a JSON string, and `uhi_hist` is an intermediate representation; you can pass it to `boost_histogram.Histogram` or `hist.Hist`. ### ZIP The zip format is very similar to JSON, but stores data in the numpy zip format. Arrays are replaced by strings, which represent the `.npy` file inside the zip file containing the array. The names are arbitrary; see the uhi code if you want to see how uhi creates names. The metadata is in a file with the name of the histogram and a `.json` extension. We provide `uhi.io.json.write` and `uhi.io.json.read`, which work with open zip files from the standard library (or probably anything with a similar API). ```python import zip import uhi.io.zip with zip.open("myfile.zip", "w") as z: uhi.io.zip.write(zip_file, "histogram", h) with zip.open("myfile.zip", "r") as z: h2 = uhi.io.zip.read(zip_file, "histogram") ``` Above, `h` is a histogram that supports `_to_uhi_` or an intermediate representation, and`uhi_hist` is an intermediate representation; you can pass it to `boost_histogram.Histogram` or `hist.Hist`. The metadata name in the file is `"histogram.json"`. The contents of that file are identical to the JSON format, except arrays are replaced by string names to files inside the zipfile. ### HDF5 The HDF5 format is ideal for combining histograms with other data. You need the `h5py` library installed too to use this format. There is some extra structure here compared to the other formats. The groups are `"axes"`, `"ref_axes"`, `"metadata"`, and `"storage"`. Arrays for the axes are placed in `"ref_axes"`, since hdf5 doesn't have lists of arrays. Storage arrays are in-place. A Reference type is used to link the axes array with the data. "`edges"` and `"categories"` are datasets; the other axes values are attributes (or groups with attributes, like `"metadata"` and `"writer_info"`, which is a nested group). Likewise, the `"storage"` group sets `"type"` as an attribute, the others are datasets. We provide `uhi.io.hdf5.read` and `uhi.io.hdf5.write` to write to an open group. The structure is relative; you can place it anywhere inside a hdf5 file. ```python with h5py.File("myfile.hdf5", "w") as h5_file: uhi.io.hdf5.write(h5_file.create_group("histogram"), h) with h5py.File("myfile.hdf5", "r") as h5_file: h2 = uhi.io.hdf5.read(h5_file["histogram"]) ``` Above, `h` is a histogram that supports `_to_uhi_` or an intermediate representation, and`uhi_hist` is an intermediate representation; you can pass it to `boost_histogram.Histogram` or `hist.Hist`. You should create the group you want the histogram to be in. By default, we do not compress arrays smaller than 1,000 elements. You can control this by setting `min_compress_elements`; set it to 0 to compress all arrays. You can also pass through `compression` and `compression_opts`. :::{warning} Note that h5py doesn't support free-threaded Python with wheels, and it currently (as of 3.14rc2) doesn't provide 3.14 wheels either. ::: ### ROOT ROOT files are not yet implemented. ## Schema A typing helper for the intermediate representation, `HistogramIR`, is provided in `uhi.typing.serialization` as a `TypedDict`. The schema, provided in `resources` as `histogram.schema.json`, also allows strings for data members, since some formats (like ZIP) put data into an optimized location and specify a reference to them. ### Rendered schema ```{jsonschema} ../src/uhi/resources/histogram.schema.json ``` ### Full schema The full schema is below: ```{literalinclude} ../src/uhi/resources/histogram.schema.json :language: json ``` uhi-1.0.0/docs/testing.md000066400000000000000000000022331505142447500152330ustar00rootroot00000000000000# Testing UHI has some testing helpers for use in test suites. This is primarily for library authors. ## Indexing You can see if your library passes the indexing test suite. We provide three sets of tests: 1D, 2D, and 3D tests. These can be implemented on your own library (`my.Histogram` in this example) like this: ```python import uhi.testing.indexing class TestAccess1D(uhi.testing.indexing.Indexing1D[my.Histogram]): @classmethod def make_histogram(cls) -> my.Histogram: return my.Histogram(cls.get_uhi()) class TestAccess1D(uhi.testing.indexing.Indexing2D[my.Histogram]): @classmethod def make_histogram(cls) -> my.Histogram: return my.Histogram(cls.get_uhi()) class TestAccess1D(uhi.testing.indexing.Indexing3D[my.Histogram]): @classmethod def make_histogram(cls) -> my.Histogram: return my.Histogram(cls.get_uhi()) ``` If you don't support serialization, then you can manually set the values with the UHI item, or check the docstrings to see what the correct parameters are. Make sure you don't import from `uhi.testing.indexing`, as some runners (unittest) will pick the base classes up and try to run those too. uhi-1.0.0/environment.yml000066400000000000000000000001551505142447500153740ustar00rootroot00000000000000name: uhi channels: - conda-forge dependencies: - pip - pytest - root - pytest - boost-histogram uhi-1.0.0/noxfile.py000077500000000000000000000045351505142447500143340ustar00rootroot00000000000000#!/usr/bin/env -S uv run -q # /// script # dependencies = ["nox>=2025.2.9"] # /// from __future__ import annotations import argparse from pathlib import Path import nox nox.needs_version = ">=2025.2.9" nox.options.default_venv_backend = "uv|virtualenv" PYPROJECT = nox.project.load_toml() ALL_PYTHONS = nox.project.python_versions(PYPROJECT) DIR = Path(__file__).parent.resolve() @nox.session def lint(session): """ Run the linter. """ session.install("pre-commit") session.run("pre-commit", "run", "--all-files", *session.posargs) @nox.session(python=ALL_PYTHONS) def tests(session): """ Run the unit and regular tests. """ session.install("-e.[schema]", *nox.project.dependency_groups(PYPROJECT, "test")) session.run("pytest", *session.posargs) @nox.session(reuse_venv=True, default=False) def docs(session: nox.Session) -> None: """ Build the docs. Use "--non-interactive" to avoid serving. Pass "-b linkcheck" to check links. """ parser = argparse.ArgumentParser() parser.add_argument( "-b", dest="builder", default="html", help="Build target (default: html)" ) args, posargs = parser.parse_known_args(session.posargs) serve = args.builder == "html" and session.interactive extra_installs = ["sphinx-autobuild"] if serve else [] session.install( "-e.", *extra_installs, *nox.project.dependency_groups(PYPROJECT, "docs") ) session.chdir("docs") shared_args = ( "-n", # nitpicky mode "-T", # full tracebacks f"-b={args.builder}", ".", f"_build/{args.builder}", *posargs, ) if serve: session.run("sphinx-autobuild", "--open-browser", *shared_args) else: session.run("sphinx-build", "--keep-going", *shared_args) @nox.session(default=False) def build(session): """ Build an SDist and wheel. """ if session.venv_backend == "uv": session.run("uv", "build") else: session.install("build") session.run("python", "-m", "build") @nox.session(venv_backend="conda", default=False) def root_tests(session): """ Test against ROOT. """ session.conda_install("--channel=conda-forge", "ROOT", "pytest", "boost-histogram") session.install("-e.") session.run("pytest", "tests/test_root.py") if __name__ == "__main__": nox.run() uhi-1.0.0/pyproject.toml000066400000000000000000000075151505142447500152300ustar00rootroot00000000000000[build-system] requires = ["hatchling", "hatch-vcs"] build-backend = "hatchling.build" [project] name = "uhi" description = "Unified Histogram Interface: tools to help library authors work with histograms" authors = [ {name="Henry Schreiner", email="henryschreineriii@gmail.com"}, {name="Hans Dembinski", email="hans.dembinski@gmail.com"}, {name="Jim Pivaski", email="jpivarski@gmail.com"}, {name="Silia Taider", email="siliataider@gmail.com"}, ] maintainers = [ {name="The Scikit-HEP admins", email="scikit-hep-admins@googlegroups.com"}, ] requires-python = ">=3.9" readme = "README.md" license = "bsd-3-clause" license-files =["LICENSE"] dependencies = [ "numpy>=1.19.3", "typing_extensions>=4; python_version<'3.11'", ] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Operating System :: OS Independent", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Topic :: Scientific/Engineering", "Typing :: Typed", ] dynamic = ["version"] [project.urls] Homepage = "https://github.com/Scikit-HEP/uhi" Repository = "https://github.com/Scikit-HEP/uhi" Documentation = "https://uhi.readthedocs.io/en/latest/" Changelog = "https://github.com/scikit-hep/uhi/releases" [project.optional-dependencies] schema = [ "fastjsonschema", "importlib-resources; python_version<'3.9'", ] hdf5 = [ "h5py", ] [dependency-groups] docs = [ "sphinx>=4.0", "furo", "sphinx-copybutton>=0.3.1", "sphinx-jsonschema", "myst-parser", "sphinx_github_changelog", ] test-core = [ "pytest>=6", "boost-histogram>=1.4", "hist>=2.6", "fastjsonschema", "packaging", ] test = [ { include-group = "test-core" }, "h5py; platform_python_implementation == 'CPython' and python_version<'3.14'", # Doesn't support free-threaded Python currently ] dev = [{ include-group = "test" }] [tool.hatch] version.source = "vcs" build.hooks.vcs.version-file = "src/uhi/_version.py" [tool.pytest.ini_options] minversion = "6.0" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true filterwarnings = ["error"] testpaths = ["tests"] log_cli_level = "INFO" [tool.mypy] files = ["src", "tests"] python_version = "3.9" warn_unused_configs = true strict = true warn_unreachable = true enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] [[tool.mypy.overrides]] module = ["fastjsonschema", "h5py"] ignore_missing_imports = true [tool.ruff.lint] extend-select = [ "B", # flake8-bugbear "I", # isort "ARG", # flake8-unused-arguments "C4", # flake8-comprehensions "EM", # flake8-errmsg "ICN", # flake8-import-conventions "ISC", # flake8-implicit-str-concat "G", # flake8-logging-format "PGH", # pygrep-hooks "PIE", # flake8-pie "PL", # pylint "PT", # flake8-pytest-style "PTH", # flake8-use-pathlib "RET", # flake8-return "RUF", # Ruff-specific "SIM", # flake8-simplify "T20", # flake8-print "UP", # pyupgrade "YTT", # flake8-2020 "EXE", # flake8-executable "NPY", # NumPy specific rules "PD", # pandas-vet ] ignore = [ "PLR09", # Design related pylint codes "PLR2004", # Magic value in comparison "PT013", # Incorrect import of pytest ] flake8-unused-arguments.ignore-variadic-names = true isort.required-imports = ["from __future__ import annotations"] [tool.ruff.lint.per-file-ignores] "tests/**" = ["T20", "PLC0415"] "noxfile.py" = ["T20"] "tests/test_ensure.py" = ["NPY002"] "src/**" = ["PT"] uhi-1.0.0/src/000077500000000000000000000000001505142447500130735ustar00rootroot00000000000000uhi-1.0.0/src/uhi/000077500000000000000000000000001505142447500136605ustar00rootroot00000000000000uhi-1.0.0/src/uhi/__init__.py000066400000000000000000000001541505142447500157710ustar00rootroot00000000000000from __future__ import annotations from ._version import version as __version__ __all__ = ["__version__"] uhi-1.0.0/src/uhi/_version.pyi000066400000000000000000000001661505142447500162320ustar00rootroot00000000000000from __future__ import annotations version: str version_tuple: tuple[int, int, int] | tuple[int, int, int, str, str] uhi-1.0.0/src/uhi/io/000077500000000000000000000000001505142447500142675ustar00rootroot00000000000000uhi-1.0.0/src/uhi/io/__init__.py000066400000000000000000000107551505142447500164100ustar00rootroot00000000000000from __future__ import annotations import copy import sys from typing import Any, TypeVar import numpy as np from ..typing.serialization import AnyHistogramIR, AxisIR, HistogramIR if sys.version_info < (3, 11): from typing_extensions import assert_never else: from typing import assert_never __all__ = ["ARRAY_KEYS", "LIST_KEYS", "from_sparse", "remove_writer_info", "to_sparse"] ARRAY_KEYS = frozenset( [ "index", "values", "variances", "edges", "counts", "sum_of_weights", "sum_of_weights_squared", ] ) LIST_KEYS = frozenset( [ "categories", ] ) T = TypeVar("T", bound="dict[str, Any]") H = TypeVar("H", bound="dict[str, Any] | HistogramIR | AnyHistogramIR") def remove_writer_info(obj: T, /, *, library: str | None) -> T: """ Removes all ``writer_info`` for a library from a histogram dict, axes dict, or storage dict. Makes copies where required, and the outer dictionary is always copied. Specify a library name, or ``None`` to remove all. """ obj = copy.copy(obj) if library is None: obj.pop("writer_info") elif library in obj.get("writer_info", {}): obj["writer_info"] = copy.copy(obj["writer_info"]) del obj["writer_info"][library] if "axes" in obj: obj["axes"] = [remove_writer_info(ax, library=library) for ax in obj["axes"]] if "storage" in obj: obj["storage"] = remove_writer_info(obj["storage"], library=library) return obj def _compute_axis_length(axis: AxisIR) -> int: if axis["type"] == "regular": return axis["bins"] + axis["underflow"] + axis["overflow"] if axis["type"] == "variable": return len(axis["edges"]) - 1 + axis["underflow"] + axis["overflow"] if axis["type"] == "category_str" or axis["type"] == "category_int": return len(axis["categories"]) + axis["flow"] if axis["type"] == "boolean": return 2 assert_never(axis["type"]) def _zerokey(storage_type: str, key: str) -> bool: """ Returns False if the comparison should be NaN instead of zero. """ return storage_type != "weighted_mean" or key != "variances" def to_sparse(hist: H, /) -> H: """ Convert a dense histogram to a sparse one. Leaves a sparse histogram alone. """ storage = hist["storage"] storage_type = storage["type"] # Ignore histograms that have 0 dimensions or are already sparse if "index" in storage or not hist["axes"]: return hist # Get the arrays inside storage, ignoring "type" arrays = {k: np.asarray(v) for k, v in storage.items() if k != "type"} # Build mask of nonzero bins across *all* present keys mask = np.any( [ arr != 0 if _zerokey(storage_type, k) else ~np.isnan(arr) for k, arr in arrays.items() ], axis=0, ) # Get the flat indices (or unravel them) nonzero_indices = np.nonzero(mask) # Pack indices into a single (ndim, n_nonzero) array index = np.vstack(nonzero_indices) # Build sparse storage dict sparse_storage = {"type": storage_type, "index": index} for k, arr in arrays.items(): sparse_storage[k] = arr[mask] # Return new histogram dict with modified storage sparse_hist = copy.copy(hist) sparse_hist["storage"] = sparse_storage # type: ignore[arg-type] return sparse_hist def from_sparse(sparse: H, /) -> H: """ Convert sparse histogram data back to dense format. If the histogram is already dense, just return it. """ storage = sparse["storage"] storage_type = storage["type"] index = storage.get("index") if index is None: return sparse ndim, n_nonzero = index.shape shape = [_compute_axis_length(a) for a in sparse["axes"]] # type: ignore[arg-type] if len(shape) != ndim: msg = f"Shape {shape} does not match sparse index dimension {ndim}" raise ValueError(msg) dense_storage = {"type": storage["type"]} for k, arr1d in storage.items(): arr1dnp = np.asarray(arr1d) if k in {"index", "type"}: continue # Allocate a zeros (or nan) array of the original shape full = np.full( shape, 0 if _zerokey(storage_type, k) else np.nan, dtype=arr1dnp.dtype ) # Scatter sparse values back into dense array full[tuple(index)] = arr1dnp dense_storage[k] = full retval = copy.copy(sparse) retval["storage"] = dense_storage # type: ignore[arg-type] return retval uhi-1.0.0/src/uhi/io/_common.py000066400000000000000000000023471505142447500162760ustar00rootroot00000000000000""" Common helpers for the different formats. """ from __future__ import annotations import typing from ..typing.serialization import AnyAxisIR, AnyHistogramIR, ToUHIHistogram __all__ = ["_check_uhi_schema_version", "_convert_input"] def _check_uhi_schema_version(uhi_schema: int, /) -> None: if uhi_schema != 1: msg = "Only uhi_schema=1 supported in this uhi version. Please update uhi." raise TypeError(msg) def _remove_empty_metadata(hist: AnyHistogramIR, /) -> AnyHistogramIR: if "metadata" in hist and not hist["metadata"]: hist = typing.cast( AnyHistogramIR, {k: v for k, v in hist.items() if k != "metadata"} ) for i in range(len(hist["axes"])): axis = hist["axes"][i] if "metadata" in axis and not axis["metadata"]: hist["axes"][i] = typing.cast( AnyAxisIR, {k: v for k, v in axis.items() if k != "metadata"} ) return hist def _convert_input(hist: AnyHistogramIR | ToUHIHistogram, /) -> AnyHistogramIR: any_hist = typing.cast( AnyHistogramIR, hist._to_uhi_() if isinstance(hist, ToUHIHistogram) else hist ) _check_uhi_schema_version(any_hist["uhi_schema"]) return _remove_empty_metadata(any_hist) uhi-1.0.0/src/uhi/io/hdf5.py000066400000000000000000000147321505142447500154760ustar00rootroot00000000000000from __future__ import annotations import typing from typing import Any import h5py import numpy as np from ..typing.serialization import ( AnyAxisIR, AnyHistogramIR, AnyStorageIR, HistogramIR, SupportedMetadata, ToUHIHistogram, ) from . import ARRAY_KEYS from ._common import _check_uhi_schema_version, _convert_input __all__ = ["read", "write"] def __dir__() -> list[str]: return __all__ def _handle_metadata_writer_info( grp: h5py.Group, metadata: dict[str, SupportedMetadata] | None, writer_info: dict[str, dict[str, SupportedMetadata]] | None, ) -> None: # Metadata if metadata: metadata_grp = grp.create_group("metadata") for key, val1 in metadata.items(): metadata_grp.attrs[key] = val1 # Writer info if writer_info: writer_info_grp = grp.create_group("writer_info") for key, value in writer_info.items(): inner_wi_grp = writer_info_grp.create_group(key) for k, v in value.items(): inner_wi_grp.attrs[k] = v def write( grp: h5py.Group, /, histogram: AnyHistogramIR | ToUHIHistogram, *, compression: str = "gzip", compression_opts: int = 4, min_compress_elements: int = 1_000, ) -> None: """ Write a histogram to an HDF5 group. Arrays larger than `min_compress_elements` will be compressed; set to 0 to compress all arrays. The `compression` and `compression_opts` arguments are passed through. """ histogram = _convert_input(histogram) # All referenced objects will be stored inside of /{name}/ref_axes hist_folder_storage = grp.create_group("ref_axes") # UHI version number grp.attrs["uhi_schema"] = histogram["uhi_schema"] _handle_metadata_writer_info( grp, histogram.get("metadata"), histogram.get("writer_info") ) # Axes axes_dataset = grp.create_dataset( "axes", len(histogram["axes"]), dtype=h5py.special_dtype(ref=h5py.Reference) ) for i, axis in enumerate(histogram["axes"]): # Iterating through the axes, calling `create_axes_object` for each of them, # creating references to new groups and appending it to the `items` dataset defined above ax_group = hist_folder_storage.create_group(f"axis_{i}") ax_info = axis.copy() ax_edges_raw = ax_info.pop("edges", None) ax_edges = np.asarray(ax_edges_raw) if ax_edges_raw is not None else None ax_cats: list[int] | list[str] | None = ax_info.pop("categories", None) _handle_metadata_writer_info( ax_group, ax_info.pop("metadata", None), ax_info.pop("writer_info", None) ) for key, val2 in ax_info.items(): ax_group.attrs[key] = val2 if ax_edges is not None: if ax_edges.size < min_compress_elements: ax_group.create_dataset("edges", data=ax_edges) else: ax_group.create_dataset( "edges", data=ax_edges, compression=compression, compression_opts=compression_opts, ) if ax_cats is not None: if len(ax_cats) < min_compress_elements: ax_group.create_dataset("categories", data=ax_cats) else: ax_group.create_dataset( "categories", data=ax_cats, compression=compression, compression_opts=compression_opts, ) axes_dataset[i] = ax_group.ref # Storage storage_grp = grp.create_group("storage") storage_type = histogram["storage"]["type"] storage_grp.attrs["type"] = storage_type for key, val3 in histogram["storage"].items(): if key == "type": continue npvalue = np.asarray(val3) if npvalue.size < min_compress_elements: storage_grp.create_dataset(key, data=npvalue) else: storage_grp.create_dataset( key, data=npvalue, compression=compression, compression_opts=compression_opts, ) def _convert_item(name: str, item: Any, /) -> Any: """ Convert an HDF5 item to a native Python type. """ if isinstance(item, bytes): return item.decode("utf-8") if name == "metadata": return {k: _convert_item("", v) for k, v in item.items()} if name in ARRAY_KEYS: return item if isinstance(item, np.generic): return item.item() return item def _read_metadata_writer_info( output: AnyHistogramIR | AnyAxisIR, group: h5py.Group | h5py.Dataset | h5py.Datatype ) -> None: if "metadata" in group: output["metadata"] = _convert_item("metadata", group["metadata"].attrs) if "writer_info" in group: output["writer_info"] = { k: _convert_item("metadata", v.attrs) for k, v in group["writer_info"].items() } def _convert_axes(group: h5py.Group | h5py.Dataset | h5py.Datatype) -> AnyAxisIR: """ Convert an HDF5 axis reference to a dictionary. """ assert isinstance(group, h5py.Group) axis = typing.cast( AnyAxisIR, {k: _convert_item(k, v) for k, v in group.attrs.items()} ) if "edges" in group: edges = group["edges"] assert isinstance(edges, h5py.Dataset) axis["edges"] = np.asarray(edges) if "categories" in group: categories = group["categories"] assert isinstance(categories, h5py.Dataset) axis["categories"] = [_convert_item("", c) for c in categories] _read_metadata_writer_info(axis, group) return axis def read(grp: h5py.Group, /) -> HistogramIR: """ Read a histogram from an HDF5 group. """ uhi_schema = _convert_item("", grp.attrs["uhi_schema"]) _check_uhi_schema_version(uhi_schema) axes_grp = grp["axes"] axes_ref = grp["ref_axes"] assert isinstance(axes_ref, h5py.Group) assert isinstance(axes_grp, h5py.Dataset) axes = [_convert_axes(axes_ref[unref_axis_ref]) for unref_axis_ref in axes_ref] storage_grp = grp["storage"] assert isinstance(storage_grp, h5py.Group) storage = AnyStorageIR(type=storage_grp.attrs["type"]) for key in storage_grp: storage[key] = np.asarray(storage_grp[key]) # type: ignore[literal-required] histogram_dict = AnyHistogramIR(uhi_schema=uhi_schema, axes=axes, storage=storage) _read_metadata_writer_info(histogram_dict, grp) return histogram_dict # type: ignore[return-value] uhi-1.0.0/src/uhi/io/json.py000066400000000000000000000014061505142447500156130ustar00rootroot00000000000000from __future__ import annotations from typing import Any import numpy as np from . import ARRAY_KEYS from ._common import _convert_input __all__ = ["default", "object_hook"] def __dir__() -> list[str]: return __all__ def default(obj: Any, /) -> Any: if hasattr(obj, "_to_uhi_"): return _convert_input(obj) if isinstance(obj, np.ndarray): return obj.tolist() # Convert ndarray to list msg = f"Object of type {type(obj)} is not JSON serializable" raise TypeError(msg) def object_hook(dct: dict[str, Any], /) -> dict[str, Any]: """ Decode a histogram from a dictionary. """ for item in ARRAY_KEYS & dct.keys(): if isinstance(dct[item], list): dct[item] = np.asarray(dct[item]) return dct uhi-1.0.0/src/uhi/io/zip.py000066400000000000000000000037171505142447500154530ustar00rootroot00000000000000from __future__ import annotations import functools import json import zipfile from typing import Any import numpy as np from ..typing.serialization import AnyHistogramIR, ToUHIHistogram from . import ARRAY_KEYS from ._common import _check_uhi_schema_version, _convert_input __all__ = ["read", "write"] def __dir__() -> list[str]: return __all__ def write( zip_file: zipfile.ZipFile, /, name: str, histogram: AnyHistogramIR | ToUHIHistogram, ) -> None: """ Write a histogram to a zip file. """ histogram = _convert_input(histogram) # Write out numpy arrays to files in the zipfile for storage_key in ARRAY_KEYS & histogram["storage"].keys(): path = f"{name}_storage_{storage_key}.npy" with zip_file.open(path, "w") as f: np.save(f, histogram["storage"][storage_key]) # type: ignore[literal-required] histogram["storage"][storage_key] = path # type: ignore[literal-required] for axis in histogram["axes"]: for key in ARRAY_KEYS & axis.keys(): path = f"{name}_axis_{key}.npy" with zip_file.open(path, "w") as f: np.save(f, axis[key]) # type: ignore[literal-required] axis[key] = path # type: ignore[literal-required] hist_json = json.dumps(histogram) zip_file.writestr(f"{name}.json", hist_json) def _object_hook( dct: dict[str, Any], /, *, zip_file: zipfile.ZipFile ) -> dict[str, Any]: for item in ARRAY_KEYS & dct.keys(): if isinstance(dct[item], str): dct[item] = np.load(zip_file.open(dct[item])) return dct def read(zip_file: zipfile.ZipFile, /, name: str) -> dict[str, Any]: """ Read histograms from a zip file. """ object_hook = functools.partial(_object_hook, zip_file=zip_file) with zip_file.open(f"{name}.json") as f: output: dict[str, Any] = json.load(f, object_hook=object_hook) _check_uhi_schema_version(output["uhi_schema"]) return output uhi-1.0.0/src/uhi/numpy_plottable.py000066400000000000000000000303121505142447500174470ustar00rootroot00000000000000""" This file holds the NumPyPlottableHistogram, meant to adapt any histogram that does not support the PlottableHistogram Protocol into a NumPy-backed standin for it, so plotting functions can remain simple and depend on having a PlottableHistogram regardless of the input. And this comes with an adaptor function, ensure_plottable_histogram, which will adapt common input types to a NumPyPlottibleHistogram, and pass a valid PlottibleHistogram through. Keep in mind, NumPyPlottableHistogram is a minimal PlottableHistogram instance, and does not provide any further functionality and is not intended to be used beyond plotting. Please see a full histogram library like boost-histogram or hist. """ from __future__ import annotations import abc import enum import typing from collections.abc import Iterator, Sequence from typing import TYPE_CHECKING, Any import numpy as np from uhi.typing.plottable import ( PlottableAxis, PlottableAxisGeneric, PlottableHistogram, PlottableTraits, ) if TYPE_CHECKING: from numpy.typing import ArrayLike else: ArrayLike = Any class Kind(str, enum.Enum): COUNT = "COUNT" MEAN = "MEAN" class Traits: __slots__ = ("circular", "discrete") def __init__(self, *, circular: bool = False, discrete: bool = False) -> None: self.circular = circular self.discrete = discrete if TYPE_CHECKING: _traits: PlottableTraits = typing.cast(Traits, None) class NumPyPlottableAxis: def __init__(self, edges: np.typing.NDArray[Any]) -> None: """ The vals should already be an Nx2 ndarray of edges. """ self.traits: PlottableTraits = Traits() assert edges.ndim == 2, "Must be 2D array of edges" assert edges.shape[1] == 2, "Second dimension must be 2 (lower, upper)" self.edges = edges def __repr__(self) -> str: """ Just to be nice for debugging. Not required for the Protocol. """ return f"{self.__class__.__name__}({self.edges!r})" def __getitem__(self, index: int) -> tuple[float, float]: """ Get the pair of edges (not discrete) or bin label (discrete). """ return tuple(self.edges[index]) def __len__(self) -> int: """ Return the number of bins (not counting flow bins, which are ignored for this Protocol currently). """ return self.edges.shape[0] # type: ignore[no-any-return] def __eq__(self, other: Any) -> bool: """ Needed for the protocol (should be present to be stored in a Sequence). """ return np.allclose(self.edges, other.edges) def __iter__(self) -> Iterator[tuple[float, float]]: """ A useful part of the Protocol for easy access by plotters. """ return iter(self[t] for t in range(len(self))) __hash__ = None # type: ignore[assignment] if TYPE_CHECKING: _axis: PlottableAxisGeneric[tuple[float, float]] = typing.cast( NumPyPlottableAxis, None ) def _bin_helper(shape: int, bins: np.typing.NDArray[Any] | None) -> NumPyPlottableAxis: """ Returns a axis built from the input bins array, which can be None (0 to N), 2D lower, upper edges), or 1D (N+1 in length). """ if bins is None: return NumPyPlottableAxis( np.array([np.arange(0, shape), np.arange(1, shape + 1)]).T ) if bins.ndim == 2: return NumPyPlottableAxis(bins) if bins.ndim == 1: return NumPyPlottableAxis(np.array([bins[:-1], bins[1:]]).T) msg = "Bins not understood, should be 2d array of min/max edges or 1D array of edges or None" raise ValueError(msg) class NumPyPlottableHistogram: def __init__( self, hist: np.typing.NDArray[Any], *bins: ( np.typing.NDArray[Any] | None | tuple[np.typing.NDArray[Any] | None, ...] ), variances: np.typing.NDArray[Any] | None = None, kind: Kind = Kind.COUNT, ) -> None: self._values = hist self._variances = variances if len(bins) == 1 and isinstance(bins[0], tuple): (bins,) = bins # type: ignore[assignment] if len(bins) == 0: bins = tuple([None] * len(hist.shape)) self.kind = kind self.axes: Sequence[PlottableAxis] = [ _bin_helper(shape, b) for shape, b in zip(hist.shape, bins) ] def __repr__(self) -> str: """ Just to be nice for debugging. Not required for the Protocol. """ axes = ", ".join(repr(s) for s in self.axes) return f"{self.__class__.__name__}({self._values!r}, <{axes}>)" def values(self) -> np.typing.NDArray[Any]: return self._values def counts(self) -> np.typing.NDArray[Any]: return self._values def variances(self) -> np.typing.NDArray[Any] | None: return self._variances if TYPE_CHECKING: # Verify that the above class is a valid PlottableHistogram _: PlottableHistogram = typing.cast(NumPyPlottableHistogram, None) def _roottarray_asnumpy( tarr: Any, shape: tuple[int, ...] | None = None ) -> np.typing.NDArray[Any]: llv = tarr.GetArray() arr: np.typing.NDArray[Any] = np.frombuffer( llv, dtype=llv.typecode, count=tarr.GetSize() ) if shape is not None: return np.reshape(arr, shape, order="F") return arr class ROOTAxis(abc.ABC): def __init__(self, tAxis: Any) -> None: self.tAx = tAxis def __len__(self) -> int: return self.tAx.GetNbins() # type: ignore[no-any-return] @abc.abstractmethod def __getitem__(self, index: int) -> Any: pass def __eq__(self, other: Any) -> bool: if not isinstance(other, ROOTAxis): return NotImplemented return len(self) == len(other) and all( aEdges == bEdges for aEdges, bEdges in zip(self, other) ) @abc.abstractmethod def __iter__(self) -> Iterator[tuple[float, float]] | Iterator[str]: pass @staticmethod def create(tAx: Any) -> DiscreteROOTAxis | ContinuousROOTAxis: if all(tAx.GetBinLabel(i + 1) for i in range(tAx.GetNbins())): return DiscreteROOTAxis(tAx) return ContinuousROOTAxis(tAx) __hash__ = None # type: ignore[assignment] class ContinuousROOTAxis(ROOTAxis): @property def traits(self) -> PlottableTraits: return Traits(circular=False, discrete=False) def __getitem__(self, index: int) -> tuple[float, float]: return (self.tAx.GetBinLowEdge(index + 1), self.tAx.GetBinUpEdge(index + 1)) def __iter__(self) -> Iterator[tuple[float, float]]: for i in range(len(self)): yield self[i] class DiscreteROOTAxis(ROOTAxis): @property def traits(self) -> PlottableTraits: return Traits(circular=False, discrete=True) def __getitem__(self, index: int) -> str: return self.tAx.GetBinLabel(index + 1) # type: ignore[no-any-return] def __iter__(self) -> Iterator[str]: for i in range(len(self)): yield self[i] class ROOTPlottableHistBase: """Common base for ROOT histograms and TProfile""" def __init__(self, thist: Any) -> None: self.thist: Any = thist nDim = thist.GetDimension() self._shape: tuple[int, ...] = tuple( getattr(thist, f"GetNbins{ax}")() + 2 for ax in "XYZ"[:nDim] ) self.axes: tuple[ContinuousROOTAxis | DiscreteROOTAxis, ...] = tuple( ROOTAxis.create(getattr(thist, f"Get{ax}axis")()) for ax in "XYZ"[:nDim] ) @property def name(self) -> str: return self.thist.GetName() # type: ignore[no-any-return] class ROOTPlottableHistogram(ROOTPlottableHistBase): def __init__(self, thist: Any) -> None: super().__init__(thist) @property def hasWeights(self) -> bool: return bool(self.thist.GetSumw2() and self.thist.GetSumw2N()) @property def kind(self) -> str: return Kind.COUNT def values(self) -> np.typing.NDArray[Any]: return _roottarray_asnumpy(self.thist, shape=self._shape)[ tuple([slice(1, -1)] * len(self._shape)) ] def variances(self) -> np.typing.NDArray[Any]: if self.hasWeights: return _roottarray_asnumpy(self.thist.GetSumw2(), shape=self._shape)[ tuple([slice(1, -1)] * len(self._shape)) ] return self.values() def counts(self) -> np.typing.NDArray[Any]: if not self.hasWeights: return self.values() sumw = self.values() return np.divide( sumw**2, self.variances(), out=np.zeros_like(sumw, dtype=np.float64), where=sumw != 0, ) class ROOTPlottableProfile(ROOTPlottableHistBase): def __init__(self, thist: Any) -> None: super().__init__(thist) @property def kind(self) -> str: return Kind.MEAN def values(self) -> np.typing.NDArray[Any]: return np.array( [self.thist.GetBinContent(i) for i in range(self.thist.GetNcells())] ).reshape(self._shape, order="F")[tuple([slice(1, -1)] * len(self._shape))] def variances(self) -> np.typing.NDArray[Any]: return ( np.array([self.thist.GetBinError(i) for i in range(self.thist.GetNcells())]) ** 2 ).reshape(self._shape, order="F")[tuple([slice(1, -1)] * len(self._shape))] def counts(self) -> np.typing.NDArray[Any]: sumw = _roottarray_asnumpy(self.thist, shape=self._shape)[ tuple([slice(1, -1)] * len(self._shape)) ] if not (self.thist.GetSumw2() and self.thist.GetSumw2N()): return sumw sumw2 = _roottarray_asnumpy(self.thist.GetSumw2(), shape=self._shape)[ tuple([slice(1, -1)] * len(self._shape)) ] return np.divide( sumw**2, sumw2, out=np.zeros_like(sumw, dtype=np.float64), where=sumw != 0, ) if TYPE_CHECKING: # Verify that the above class is a valid PlottableHistogram _axis = typing.cast(ContinuousROOTAxis, None) _axis2: PlottableAxisGeneric[str] = typing.cast(DiscreteROOTAxis, None) _ = typing.cast(ROOTPlottableHistogram, None) _ = typing.cast(ROOTPlottableProfile, None) def ensure_plottable_histogram(hist: Any) -> PlottableHistogram: """ Ensure a histogram follows the PlottableHistogram Protocol. Currently supports adapting the following inputs: * .to_numpy() objects * .numpy() objects (uproot3/ROOT) * A tuple of NumPy style input. If dd style tuple, must contain "np.typing.NDArray[Any]"s. It can contain None's instead of values, including just a single None for any number of axes. """ if isinstance(hist, PlottableHistogram): # Already satisfies the Protocol return hist if hasattr(hist, "to_numpy"): # Generic (possibly Uproot 4) _tup1: tuple[np.typing.NDArray[Any], ...] = hist.to_numpy(flow=False) return NumPyPlottableHistogram(*_tup1) if hasattr(hist, "numpy"): # uproot/TH1 - TODO: could support variances _tup2: tuple[np.typing.NDArray[Any], ...] = hist.numpy() return NumPyPlottableHistogram(*_tup2) if isinstance(hist, tuple): # NumPy histogram tuple if len(hist) < 2: msg = "Can't be applied to less than 2D tuple" raise TypeError(msg) if ( len(hist) == 2 and isinstance(hist[1], (list, tuple)) and all(isinstance(h, np.ndarray) for h in hist[1]) ): # histogramdd tuple return NumPyPlottableHistogram( np.asarray(hist[0]), *(np.asarray(h) for h in hist[1]) ) if hist[1] is None: return NumPyPlottableHistogram( np.asarray(hist[0]), *(None for _ in np.asarray(hist[0]).shape) ) # Standard tuple return NumPyPlottableHistogram(*(np.asarray(h) for h in hist)) if hasattr(hist, "InheritsFrom") and hist.InheritsFrom("TH1"): if any( hist.InheritsFrom(profCls) for profCls in ("TProfile", "TProfile2D", "TProfile3D") ): return ROOTPlottableProfile(hist) return ROOTPlottableHistogram(hist) msg = f"Can't be used on this type of object: {hist!r}" raise TypeError(msg) uhi-1.0.0/src/uhi/py.typed000066400000000000000000000000001505142447500153450ustar00rootroot00000000000000uhi-1.0.0/src/uhi/resources/000077500000000000000000000000001505142447500156725ustar00rootroot00000000000000uhi-1.0.0/src/uhi/resources/histogram.schema.json000066400000000000000000000227501505142447500220270ustar00rootroot00000000000000{ "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://raw.githubusercontent.com/scikit-hep/uhi/main/src/uhi/resources/histogram.schema.json", "title": "Histogram", "type": "object", "additionalProperties": false, "patternProperties": { ".+": { "type": "object", "required": ["axes", "storage"], "additionalProperties": false, "properties": { "uhi_schema": { "const": 1, "description": "The schema version number" }, "writer_info": { "$ref": "#/$defs/writer_info" }, "metadata": { "$ref": "#/$defs/metadata" }, "axes": { "type": "array", "description": "A list of the axes of the histogram.", "items": { "oneOf": [ { "$ref": "#/$defs/regular_axis" }, { "$ref": "#/$defs/variable_axis" }, { "$ref": "#/$defs/category_str_axis" }, { "$ref": "#/$defs/category_int_axis" }, { "$ref": "#/$defs/boolean_axis" } ] } }, "storage": { "description": "The storage of the bins of the histogram.", "oneOf": [ { "$ref": "#/$defs/int_storage" }, { "$ref": "#/$defs/double_storage" }, { "$ref": "#/$defs/weighted_storage" }, { "$ref": "#/$defs/mean_storage" }, { "$ref": "#/$defs/weighted_mean_storage" } ] } } } }, "$defs": { "supported_metadata": { "oneOf": [ { "type": "string" }, { "type": "number" }, { "type": "boolean" } ] }, "metadata": { "type": "object", "description": "Arbitrary metadata dictionary.", "additionalProperties": false, "patternProperties": { ".+": { "$ref": "#/$defs/supported_metadata" } } }, "writer_info": { "type": "object", "description": "Information from the library that created the histogram.", "additionalProperties": false, "patternProperties": { ".+": { "type": "object", "description": "Library specific information.", "additionalProperties": false, "patternProperties": { ".+": { "$ref": "#/$defs/supported_metadata" } } } } }, "ndarray": { "type": "array", "items": { "oneOf": [{ "type": "number" }, { "$ref": "#/$defs/ndarray" }] }, "description": "A ND (nested) array of numbers." }, "data_array": { "oneOf": [ { "type": "string", "description": "A path (similar to URI) to the floating point bin data" }, { "$ref": "#/$defs/ndarray" } ] }, "sparse_array": { "oneOf": [ { "type": "string", "description": "A path (similar to URI) to the index data" }, { "type": "array", "items": { "type": "array", "items": { "type": "integer", "description": "Bin indexes, 0 is the underflow if present" }, "description": "A 2D array of integers." }, "description": "The 2D index array, outer dimension is axis number, inner matches the 1D data." } ] }, "regular_axis": { "type": "object", "description": "An evenly spaced set of continuous bins.", "required": [ "type", "lower", "upper", "bins", "underflow", "overflow", "circular" ], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "regular" }, "lower": { "type": "number", "description": "Lower edge of the axis." }, "upper": { "type": "number", "description": "Upper edge of the axis." }, "bins": { "type": "integer", "minimum": 0, "description": "Number of bins in the axis." }, "underflow": { "type": "boolean", "description": "True if there is a bin for underflow." }, "overflow": { "type": "boolean", "description": "True if there is a bin for overflow." }, "circular": { "type": "boolean", "description": "True if the axis wraps around." }, "metadata": { "$ref": "#/$defs/metadata" }, "writer_info": { "$ref": "#/$defs/writer_info" } } }, "variable_axis": { "type": "object", "description": "A variably spaced set of continuous bins.", "required": ["type", "edges", "underflow", "overflow", "circular"], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "variable" }, "edges": { "oneOf": [ { "type": "array", "items": { "type": "number", "minItems": 2, "uniqueItems": true } }, { "type": "string", "description": "A path (URI?) to the edges data." } ] }, "underflow": { "type": "boolean" }, "overflow": { "type": "boolean" }, "circular": { "type": "boolean" }, "metadata": { "$ref": "#/$defs/metadata" }, "writer_info": { "$ref": "#/$defs/writer_info" } } }, "category_str_axis": { "type": "object", "description": "A set of string categorical bins.", "required": ["type", "categories", "flow"], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "category_str" }, "categories": { "type": "array", "items": { "type": "string" }, "uniqueItems": true }, "flow": { "type": "boolean", "description": "True if flow bin (at the overflow position) present." }, "metadata": { "$ref": "#/$defs/metadata" }, "writer_info": { "$ref": "#/$defs/writer_info" } } }, "category_int_axis": { "type": "object", "description": "A set of integer categorical bins in any order.", "required": ["type", "categories", "flow"], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "category_int" }, "categories": { "type": "array", "items": { "type": "integer" }, "uniqueItems": true }, "flow": { "type": "boolean", "description": "True if flow bin (at the overflow position) present." }, "metadata": { "$ref": "#/$defs/metadata" }, "writer_info": { "$ref": "#/$defs/writer_info" } } }, "boolean_axis": { "type": "object", "description": "A simple true/false axis with no flow.", "required": ["type"], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "boolean" }, "metadata": { "$ref": "#/$defs/metadata" }, "writer_info": { "$ref": "#/$defs/writer_info" } } }, "int_storage": { "type": "object", "description": "A storage holding integer counts.", "required": ["type", "values"], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "int" }, "index": { "$ref": "#/$defs/sparse_array" }, "values": { "$ref": "#/$defs/data_array" } } }, "double_storage": { "type": "object", "description": "A storage holding floating point counts.", "required": ["type", "values"], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "double" }, "index": { "$ref": "#/$defs/sparse_array" }, "values": { "$ref": "#/$defs/data_array" } } }, "weighted_storage": { "type": "object", "description": "A storage holding floating point counts and variances.", "required": ["type", "values", "variances"], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "weighted" }, "index": { "$ref": "#/$defs/sparse_array" }, "values": { "$ref": "#/$defs/data_array" }, "variances": { "$ref": "#/$defs/data_array" } } }, "mean_storage": { "type": "object", "description": "A storage holding 'profile'-style floating point counts, values, and variances.", "required": ["type", "counts", "values", "variances"], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "mean" }, "index": { "$ref": "#/$defs/sparse_array" }, "counts": { "$ref": "#/$defs/data_array" }, "values": { "$ref": "#/$defs/data_array" }, "variances": { "$ref": "#/$defs/data_array" } } }, "weighted_mean_storage": { "type": "object", "description": "A storage holding 'profile'-style floating point ∑weights, ∑weights², values, and variances.", "required": [ "type", "sum_of_weights", "sum_of_weights_squared", "values", "variances" ], "additionalProperties": false, "properties": { "type": { "type": "string", "const": "weighted_mean" }, "index": { "$ref": "#/$defs/sparse_array" }, "sum_of_weights": { "$ref": "#/$defs/data_array" }, "sum_of_weights_squared": { "$ref": "#/$defs/data_array" }, "values": { "$ref": "#/$defs/data_array" }, "variances": { "$ref": "#/$defs/data_array" } } } } } uhi-1.0.0/src/uhi/schema.py000066400000000000000000000025241505142447500154750ustar00rootroot00000000000000from __future__ import annotations import functools import json import sys from collections.abc import Callable from importlib import resources from pathlib import Path from typing import Any histogram_file = resources.files("uhi") / "resources/histogram.schema.json" __all__ = ["histogram_file", "validate"] def __dir__() -> list[str]: return __all__ @functools.cache def _histogram_schema() -> Callable[[dict[str, Any]], None]: import fastjsonschema # noqa: PLC0415 with histogram_file.open(encoding="utf-8") as f: return fastjsonschema.compile(json.load(f)) # type: ignore[no-any-return] def validate(data: dict[str, Any]) -> None: """Validate a histogram object against the schema.""" validate = _histogram_schema() validate(data) def main(*files: str) -> None: """Validate histogram files.""" import fastjsonschema # noqa: PLC0415 retval = 0 for file in files: with Path(file).open(encoding="utf-8") as f: data = json.load(f) try: validate(data) except fastjsonschema.JsonSchemaValueException as e: print(f"ERROR {file}: {e.message}") # noqa: T201 retval = 1 else: print(f"OK {file}") # noqa: T201 if retval: raise SystemExit(retval) if __name__ == "__main__": main(*sys.argv[1:]) uhi-1.0.0/src/uhi/tag.py000066400000000000000000000054701505142447500150130ustar00rootroot00000000000000from __future__ import annotations import copy import typing from typing import Any from .typing.plottable import PlottableAxis __all__ = ["Locator", "Overflow", "Underflow", "at", "loc", "rebin"] def __dir__() -> list[str]: return __all__ T = typing.TypeVar("T", bound="Locator") class Locator: __slots__ = ("offset",) NAME = "" def __init__(self, offset: int = 0) -> None: if not isinstance(offset, int): msg = "The offset must be an integer" # type: ignore[unreachable] raise ValueError(msg) self.offset = offset def __add__(self: T, offset: int) -> T: other = copy.copy(self) other.offset += offset return other def __sub__(self: T, offset: int) -> T: other = copy.copy(self) other.offset -= offset return other def _print_self_(self) -> str: return "" def __repr__(self) -> str: s = self.NAME or self.__class__.__name__ s += self._print_self_() if self.offset != 0: s += " + " if self.offset > 0 else " - " s += str(abs(self.offset)) return s if typing.TYPE_CHECKING: # Type checkers think that this is required def __index__(self) -> int: return 42 class loc(Locator): __slots__ = ("value",) def __init__(self, value: str | float, offset: int = 0) -> None: super().__init__(offset) self.value = value def _print_self_(self) -> str: return f"({self.value})" # TODO: clarify that .index() is required def __call__(self, axis: Any) -> int: return axis.index(self.value) + self.offset # type: ignore[no-any-return] class Underflow(Locator): __slots__ = () NAME = "underflow" def __call__(self, axis: PlottableAxis) -> int: # noqa: ARG002 return -1 + self.offset underflow = Underflow() class Overflow(Locator): __slots__ = () NAME = "overflow" def __call__(self, axis: PlottableAxis) -> int: return len(axis) + self.offset overflow = Overflow() class at: __slots__ = ("value",) def __init__(self, value: int) -> None: self.value = value def __call__(self, axis: PlottableAxis) -> int: # noqa: ARG002 return self.value class rebin: """ When used in the step of a Histogram's slice, rebin(n) combines bins, scaling their widths by a factor of n. If the number of bins is not divisible by n, the remainder is added to the overflow bin. """ def __init__(self, factor: int) -> None: # Items with .factor are specially treated in boost-histogram, # performing a high performance rebinning self.factor = factor if typing.TYPE_CHECKING: # Type checkers think that this is required def __index__(self) -> int: return 42 uhi-1.0.0/src/uhi/testing/000077500000000000000000000000001505142447500153355ustar00rootroot00000000000000uhi-1.0.0/src/uhi/testing/__init__.py000066400000000000000000000000001505142447500174340ustar00rootroot00000000000000uhi-1.0.0/src/uhi/testing/indexing.py000066400000000000000000000671221505142447500175240ustar00rootroot00000000000000from __future__ import annotations import abc import typing import unittest from typing import Any import numpy as np import uhi.tag from uhi.typing.serialization import HistogramIR T = typing.TypeVar("T", bound=Any) __all__ = ["Indexing1D", "Indexing3D"] if typing.TYPE_CHECKING: # This is a workaround for the issue with type checkers # and generics with custom __getitem__ methods. sum = 42 def __dir__() -> list[str]: return __all__ class Indexing(abc.ABC, unittest.TestCase): """ This super class provides the basic structure for indexing tests. """ def bin_to_value(self, bin: Any) -> Any: """ Allow downstream classes to handle more complex bin objects. """ return bin def value_to_bin(self, value: Any) -> Any: """ Inverse of bin_to_value, for downstream classes with complex bins. """ return value def values_to_bins(self, values: Any) -> Any: """ Loops over values and converts them to bins using value_to_bin. """ return [self.value_to_bin(v) for v in values] def sum_to_value(self, bin: Any) -> Any: """ Allow downstream classes to handle more summed bin objects. E.g. it can be a projection from a Histo2D to Histo1D. """ return bin def assertEqualBinValue(self, bin: Any, value: Any) -> None: self.assertEqual(self.bin_to_value(bin), value) def assertEqualSum(self, bin: Any, value: Any) -> None: self.assertEqual(self.sum_to_value(bin), value) class Indexing1D(typing.Generic[T], Indexing): """ This test requires a histogram to be created first. h is a 1D histogram with 10 bins from 0 to 1. Each bin has 2 more than the one before, starting with 0. The overflow bin has 1. The underflow bin has 3. You can access the UHI serialized version with `.uhi`. """ h: T tag = uhi.tag @staticmethod def get_uhi() -> HistogramIR: return { "uhi_schema": 1, "axes": [ { "type": "regular", "lower": 0.0, "upper": 1.0, "bins": 10, "underflow": True, "overflow": True, "circular": False, } ], "storage": { "type": "double", "values": np.array( [3.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 1.0] ), }, } @classmethod @abc.abstractmethod def make_histogram(cls) -> T: pass @classmethod def setUpClass(cls) -> None: cls.h = cls.make_histogram() def test_access_integer(self) -> None: for i in range(10): with self.subTest(i=i): self.assertEqualBinValue(self.h[i], 2 * i) with self.assertRaises(IndexError): self.h[10] self.assertEqualBinValue(self.h[-1], 18) def test_access_integer_flow(self) -> None: self.assertEqualBinValue(self.h[self.tag.underflow], 3) self.assertEqualBinValue(self.h[self.tag.overflow], 1) def test_access_loc(self) -> None: self.assertEqualBinValue(self.h[self.tag.loc(0.05)], 0) self.assertEqualBinValue(self.h[self.tag.loc(0.15)], 2) self.assertEqualBinValue(self.h[self.tag.loc(0.95)], 18) self.assertEqualBinValue(self.h[self.tag.loc(-1)], 3) self.assertEqualBinValue(self.h[self.tag.loc(2)], 1) def test_access_loc_addition(self) -> None: self.assertEqualBinValue(self.h[self.tag.loc(0.05) + 1], 2) self.assertEqualBinValue(self.h[self.tag.loc(0.55) + 2], 14) self.assertEqualBinValue(self.h[self.tag.loc(0.55) - 2], 6) def test_slicing_all(self) -> None: self.assertEqual(self.h[:], self.h) self.assertEqual(self.h[...], self.h) def test_slicing_closed(self) -> None: h = self.h[2:4] self.assertEqualBinValue(h[self.tag.underflow], 5) self.assertEqualBinValue(h[0], 4) self.assertEqualBinValue(h[1], 6) self.assertEqualBinValue(h[self.tag.overflow], 79) with self.assertRaises(IndexError): h[2] def test_slicing_open_upper(self) -> None: h = self.h[5:] self.assertEqualBinValue(h[self.tag.underflow], 23) self.assertEqualBinValue(h[0], 10) self.assertEqualBinValue(h[4], 18) self.assertEqualBinValue(h[self.tag.overflow], 1) with self.assertRaises(IndexError): h[5] def test_slicing_open_lower(self) -> None: h = self.h[:5] self.assertEqualBinValue(h[self.tag.underflow], 3) self.assertEqualBinValue(h[0], 0) self.assertEqualBinValue(h[4], 8) self.assertEqualBinValue(h[self.tag.overflow], 71) with self.assertRaises(IndexError): h[5] def test_slicing_loc_closed(self) -> None: h = self.h[self.tag.loc(0.2) : self.tag.loc(0.4)] self.assertEqualBinValue(h[self.tag.underflow], 5) self.assertEqualBinValue(h[0], 4) self.assertEqualBinValue(h[1], 6) self.assertEqualBinValue(h[self.tag.overflow], 79) with self.assertRaises(IndexError): h[2] def test_slicing_loc_open_upper(self) -> None: h = self.h[self.tag.loc(0.5) :] self.assertEqualBinValue(h[self.tag.underflow], 23) self.assertEqualBinValue(h[0], 10) self.assertEqualBinValue(h[4], 18) self.assertEqualBinValue(h[self.tag.overflow], 1) with self.assertRaises(IndexError): h[5] def test_slicing_loc_open_lower(self) -> None: h = self.h[: self.tag.loc(0.5)] self.assertEqualBinValue(h[self.tag.underflow], 3) self.assertEqualBinValue(h[0], 0) self.assertEqualBinValue(h[4], 8) self.assertEqualBinValue(h[self.tag.overflow], 71) with self.assertRaises(IndexError): h[5] def test_slicing_loc_mixed(self) -> None: h = self.h[2 : self.tag.loc(0.4) + 1] self.assertEqualBinValue(h[self.tag.underflow], 5) self.assertEqualBinValue(h[0], 4) self.assertEqualBinValue(h[1], 6) self.assertEqualBinValue(h[2], 8) self.assertEqualBinValue(h[self.tag.overflow], 71) with self.assertRaises(IndexError): h[3] def test_rebinning(self) -> None: # Boost-histogram allows the `::` to be skipped. h = self.h[:: self.tag.rebin(2)] self.assertEqualBinValue(h[0], 2) self.assertEqualBinValue(h[1], 10) self.assertEqualBinValue(h[2], 18) self.assertEqualBinValue(h[4], 34) with self.assertRaises(IndexError): h[5] def test_rebinning_with_endpoints(self) -> None: h = self.h[1 : 5 : self.tag.rebin(2)] self.assertEqualBinValue(h[0], 6) self.assertEqualBinValue(h[1], 14) with self.assertRaises(IndexError): h[2] def test_rebinning_with_endpoints_mixed(self) -> None: h = self.h[: self.tag.loc(0.55) : self.tag.rebin(2)] self.assertEqualBinValue(h[0], 2) self.assertEqualBinValue(h[1], 10) with self.assertRaises(IndexError): h[2] def test_full_integration(self) -> None: # boost-histogram allows the `::` to be skipped. v = self.h[::sum] self.assertEqualSum(v, 94) def test_non_flow_integration(self) -> None: v = self.h[0:len:sum] # type: ignore[misc] self.assertEqualSum(v, 90) def test_ranged_integration(self) -> None: v = self.h[2:5:sum] self.assertEqualSum(v, 18) def test_open_lower_integration(self) -> None: v = self.h[:4:sum] self.assertEqualSum(v, 15) def test_open_upper_integration(self) -> None: v = self.h[4::sum] self.assertEqualSum(v, 79) def test_setting_single_value(self) -> None: h = self.make_histogram() h[0] = self.value_to_bin(42) self.assertEqualBinValue(h[0], 42) self.assertEqualBinValue(h[1], 2) self.assertEqualBinValue(h[-1], 18) h[-1] = self.value_to_bin(99) self.assertEqualBinValue(h[-1], 99) self.assertEqualBinValue(h[0], 42) self.assertEqualBinValue(h[1], 2) def test_setting_single_value_loc(self) -> None: h = self.make_histogram() h[self.tag.loc(0.05)] = self.value_to_bin(42) self.assertEqualBinValue(h[0], 42) self.assertEqualBinValue(h[1], 2) def test_setting_underflow(self) -> None: h = self.make_histogram() h[self.tag.underflow] = self.value_to_bin(42) self.assertEqualBinValue(h[self.tag.underflow], 42) self.assertEqualBinValue(h[0], 0) def test_setting_overflow(self) -> None: h = self.make_histogram() h[self.tag.overflow] = self.value_to_bin(42) self.assertEqualBinValue(h[self.tag.overflow], 42) self.assertEqualBinValue(h[-1], 18) def test_setting_array(self) -> None: h = self.make_histogram() h[1:3] = self.value_to_bin(42) # TODO: this is broken, fix! # self.assertEqual(h[0], 0) self.assertEqualBinValue(h[1], 42) self.assertEqualBinValue(h[2], 42) # self.assertEqual(h[3], 6) def test_setting_array_slice(self) -> None: h = self.make_histogram() h[1:3] = self.values_to_bins([42, 42]) self.assertEqualBinValue(h[0], 0) self.assertEqualBinValue(h[1], 42) self.assertEqualBinValue(h[2], 42) self.assertEqualBinValue(h[3], 6) def test_setting_array_without_underflow(self) -> None: h = self.make_histogram() h[:3] = self.values_to_bins([42, 43, 44]) self.assertEqualBinValue(h[self.tag.underflow], 3) self.assertEqualBinValue(h[0], 42) self.assertEqualBinValue(h[1], 43) self.assertEqualBinValue(h[2], 44) self.assertEqualBinValue(h[3], 6) def test_setting_array_with_underflow(self) -> None: h = self.make_histogram() h[:3] = self.values_to_bins([41, 42, 43, 44]) self.assertEqualBinValue(h[self.tag.underflow], 41) self.assertEqualBinValue(h[0], 42) self.assertEqualBinValue(h[1], 43) self.assertEqualBinValue(h[2], 44) self.assertEqualBinValue(h[3], 6) def test_setting_array_without_overflow(self) -> None: h = self.make_histogram() h[7:] = self.values_to_bins([42, 43, 44]) self.assertEqualBinValue(h[6], 12) self.assertEqualBinValue(h[7], 42) self.assertEqualBinValue(h[8], 43) self.assertEqualBinValue(h[9], 44) self.assertEqualBinValue(h[self.tag.overflow], 1) def test_setting_array_with_overflow(self) -> None: h = self.make_histogram() h[7:] = self.values_to_bins([42, 43, 44, 45]) self.assertEqualBinValue(h[6], 12) self.assertEqualBinValue(h[7], 42) self.assertEqualBinValue(h[8], 43) self.assertEqualBinValue(h[9], 44) self.assertEqualBinValue(h[self.tag.overflow], 45) def test_setting_whole_array(self) -> None: h = self.make_histogram() h[:] = self.values_to_bins(range(10)) self.assertEqualBinValue(h[self.tag.underflow], 3) self.assertEqualBinValue(h[0], 0) self.assertEqualBinValue(h[1], 1) self.assertEqualBinValue(h[2], 2) self.assertEqualBinValue(h[3], 3) self.assertEqualBinValue(h[4], 4) self.assertEqualBinValue(h[5], 5) self.assertEqualBinValue(h[6], 6) self.assertEqualBinValue(h[7], 7) self.assertEqualBinValue(h[8], 8) self.assertEqualBinValue(h[9], 9) self.assertEqualBinValue(h[self.tag.overflow], 1) def test_setting_whole_array_with_flow(self) -> None: h = self.make_histogram() h[:] = self.values_to_bins(range(12)) self.assertEqualBinValue(h[self.tag.underflow], 0) self.assertEqualBinValue(h[0], 1) self.assertEqualBinValue(h[1], 2) self.assertEqualBinValue(h[2], 3) self.assertEqualBinValue(h[3], 4) self.assertEqualBinValue(h[4], 5) self.assertEqualBinValue(h[5], 6) self.assertEqualBinValue(h[6], 7) self.assertEqualBinValue(h[7], 8) self.assertEqualBinValue(h[8], 9) self.assertEqualBinValue(h[9], 10) self.assertEqualBinValue(h[self.tag.overflow], 11) def test_setting_len_mismatch(self) -> None: h = self.make_histogram() with self.assertRaises(ValueError): h[:] = self.values_to_bins(range(9)) with self.assertRaises(ValueError): h[:] = self.values_to_bins(range(11)) with self.assertRaises(ValueError): h[:] = self.values_to_bins(range(13)) with self.assertRaises(ValueError): h[1:4] = self.values_to_bins(range(2)) with self.assertRaises(ValueError): h[1:4] = self.values_to_bins(range(4)) with self.assertRaises(ValueError): h[1:4] = self.values_to_bins(range(5)) class Indexing2D(typing.Generic[T], Indexing): """ This test requires histograms to be created first. h is a 2D histogram with [2,5] bins. The contents are x+2y, where x and y are the bin indices. """ h: T tag = uhi.tag @staticmethod def get_uhi() -> HistogramIR: x, y = np.mgrid[0:2, 0:5] data = np.pad(x + 2 * y, 1, mode="constant") return { "uhi_schema": 1, "axes": [ { "type": "regular", "lower": 0.0, "upper": 2.0, "bins": 2, "underflow": True, "overflow": True, "circular": False, }, { "type": "regular", "lower": 0.0, "upper": 5.0, "bins": 5, "underflow": True, "overflow": True, "circular": False, }, ], "storage": { "type": "double", "values": data, }, } @staticmethod @abc.abstractmethod def make_histogram() -> T: pass @classmethod def setUpClass(cls) -> None: cls.h = cls.make_histogram() def test_access_integer(self) -> None: for i in range(2): for j in range(5): with self.subTest(i=i, j=j): self.assertEqualBinValue(self.h[i, j], i + 2 * j) with self.assertRaises(IndexError): self.h[2, 0] with self.assertRaises(IndexError): self.h[0, 5] self.assertEqualBinValue(self.h[-1, -1], 9) def test_access_loc(self) -> None: self.assertEqualBinValue(self.h[self.tag.loc(0.5), self.tag.loc(0.5)], 0) self.assertEqualBinValue(self.h[self.tag.loc(0.5), self.tag.loc(1.5)], 2) self.assertEqualBinValue(self.h[self.tag.loc(0.5), self.tag.loc(4.5)], 8) def test_access_loc_underflow(self) -> None: self.assertEqualBinValue(self.h[self.tag.loc(-1), self.tag.loc(0.5)], 0) def test_access_loc_mixed(self) -> None: self.assertEqualBinValue(self.h[self.tag.loc(0.5), 0], 0) self.assertEqualBinValue(self.h[0, self.tag.loc(1.5)], 2) self.assertEqualBinValue(self.h[1, self.tag.loc(4.5)], 9) def test_slicing_all(self) -> None: self.assertEqual(self.h[:, :], self.h) self.assertEqual(self.h[...], self.h) self.assertEqual(self.h[:, ...], self.h) self.assertEqual(self.h[..., :], self.h) def test_slicing_loc(self) -> None: h = self.h[ 1, self.tag.loc(0.5) : self.tag.loc(2.5), ] self.assertEqualBinValue(h[0], 1) self.assertEqualBinValue(h[1], 3) with self.assertRaises(IndexError): h[2] def test_rebinning(self) -> None: h = self.h[:: self.tag.rebin(2), :: self.tag.rebin(2)] self.assertEqualBinValue(h[0, 0], 6) self.assertEqualBinValue(h[0, 1], 22) with self.assertRaises(IndexError): h[0, 2] with self.assertRaises(IndexError): h[1, 0] def test_full_integration(self) -> None: v = self.h[::sum, ::sum] self.assertEqualSum(v, 45) def test_mixed_integration(self) -> None: h = self.h[:2:sum, 1:3] self.assertEqualSum(h[0], 5) self.assertEqualSum(h[1], 9) with self.assertRaises(IndexError): h[2] def test_mixed_single_integration(self) -> None: v = self.h[1, ::sum] self.assertEqualSum(v, 25) def test_mixed_single_integration_dict(self) -> None: v = self.h[{0: 1, 1: np.s_[::sum]}] self.assertEqualSum(v, 25) def test_ellipsis_integration(self) -> None: h = self.h[..., ::sum] self.assertEqualSum(h[0], 20) self.assertEqualSum(h[1], 25) with self.assertRaises(IndexError): h[2] def test_ellipsis_integration_dict(self) -> None: h = self.h[{0: np.s_[::sum]}] self.assertEqualSum(h[0], 1) self.assertEqualSum(h[1], 5) self.assertEqualSum(h[4], 17) with self.assertRaises(IndexError): h[5] def test_setting_single_value(self) -> None: h = self.make_histogram() h[0, 0] = self.value_to_bin(42) self.assertEqualBinValue(h[0, 0], 42) self.assertEqualBinValue(h[1, 1], 3) def test_setting_underflow(self) -> None: h = self.make_histogram() h[self.tag.underflow, ...] = self.value_to_bin(42) self.assertEqualBinValue(h[self.tag.underflow, 0], 42) def test_setting_array(self) -> None: h = self.make_histogram() h[0:2, 0:2] = np.array( [self.values_to_bins([42, 43]), self.values_to_bins([44, 45])] ) self.assertEqualBinValue(h[0, 0], 42) self.assertEqualBinValue(h[0, 1], 43) self.assertEqualBinValue(h[1, 0], 44) self.assertEqualBinValue(h[1, 1], 45) self.assertEqualBinValue(h[1, 2], 5) def test_setting_array_broadcast(self) -> None: h = self.make_histogram() h[0:2, 0:2] = np.array([self.values_to_bins([42]), self.values_to_bins([3])]) self.assertEqualBinValue(h[0, 0], 42) self.assertEqualBinValue(h[0, 1], 42) self.assertEqualBinValue(h[1, 0], 3) self.assertEqualBinValue(h[1, 1], 3) self.assertEqualBinValue(h[1, 2], 5) def test_setting_dict(self) -> None: h = self.make_histogram() h[{0: 1, 1: 0}] = self.value_to_bin(42) self.assertEqualBinValue(h[1, 0], 42) self.assertEqualBinValue(h[0, 1], 2) def test_setting_dict_slice(self) -> None: h = self.make_histogram() h[{0: 1, 1: slice(2, 4)}] = self.values_to_bins(range(42, 44)) self.assertEqualBinValue(h[1, 1], 3) self.assertEqualBinValue(h[1, 2], 42) self.assertEqualBinValue(h[1, 3], 43) self.assertEqualBinValue(h[1, 4], 9) def test_setting_dict_slicer(self) -> None: h = self.make_histogram() h[{0: 1, 1: np.s_[3:5]}] = self.values_to_bins(range(42, 44)) self.assertEqualBinValue(h[1, 2], 5) self.assertEqualBinValue(h[1, 3], 42) self.assertEqualBinValue(h[1, 4], 43) class Indexing3D(typing.Generic[T], Indexing): """ This test requires histograms to be created first. h is a 3D histogram with [2,5,10] bins. The contents are x+2y+3z, where x, y, and z are the bin indices. """ h: T tag = uhi.tag @staticmethod def get_uhi() -> HistogramIR: x, y, z = np.mgrid[0:2, 0:5, 0:10] data = np.pad(x + 2 * y + 3 * z, 1, mode="constant") return { "uhi_schema": 1, "writer_info": {"boost-histogram": {"version": "1.6.1"}}, "axes": [ { "type": "regular", "lower": 0.0, "upper": 2.0, "bins": 2, "underflow": True, "overflow": True, "circular": False, "metadata": {}, }, { "type": "regular", "lower": 0.0, "upper": 5.0, "bins": 5, "underflow": True, "overflow": True, "circular": False, "metadata": {}, }, { "type": "regular", "lower": 0.0, "upper": 10.0, "bins": 10, "underflow": True, "overflow": True, "circular": False, "metadata": {}, }, ], "storage": { "type": "double", "values": data, }, "metadata": {"_variance_known": True}, } @classmethod @abc.abstractmethod def make_histogram(cls) -> T: pass @classmethod def setUpClass(cls) -> None: cls.h = cls.make_histogram() def test_access_integer(self) -> None: for i in range(2): for j in range(5): for k in range(10): with self.subTest(i=i, j=j, k=k): self.assertEqualBinValue(self.h[i, j, k], i + 2 * j + 3 * k) with self.assertRaises(IndexError): self.h[2, 0, 0] with self.assertRaises(IndexError): self.h[0, 5, 0] with self.assertRaises(IndexError): self.h[0, 0, 10] self.assertEqualBinValue(self.h[-1, -1, -1], 36) def test_access_loc(self) -> None: self.assertEqualBinValue( self.h[self.tag.loc(0.5), self.tag.loc(0.5), self.tag.loc(0.5)], 0 ) self.assertEqualBinValue( self.h[self.tag.loc(0.5), self.tag.loc(1.5), self.tag.loc(0.5)], 2 ) self.assertEqualBinValue( self.h[self.tag.loc(0.5), self.tag.loc(4.5), self.tag.loc(9.5)], 35 ) self.assertEqualBinValue( self.h[self.tag.loc(-1), self.tag.loc(0.5), self.tag.loc(0.5)], 0 ) def test_access_loc_mixed(self) -> None: self.assertEqualBinValue(self.h[self.tag.loc(0.5), 0, self.tag.loc(0.5)], 0) self.assertEqualBinValue(self.h[0, self.tag.loc(1.5), self.tag.loc(0.5)], 2) self.assertEqualBinValue(self.h[self.tag.loc(0.5), self.tag.loc(4.5), 9], 35) self.assertEqualBinValue(self.h[1, self.tag.loc(4.5), 9], 36) def test_slicing_all(self) -> None: self.assertEqual(self.h[:, :, :], self.h) self.assertEqual(self.h[...], self.h) self.assertEqual(self.h[:, ...], self.h) self.assertEqual(self.h[..., :], self.h) def test_slicing_loc(self) -> None: h = self.h[ 1, 3, self.tag.loc(0.5) : self.tag.loc(2.5), ] self.assertEqualBinValue(h[0], 7) self.assertEqualBinValue(h[1], 10) with self.assertRaises(IndexError): h[2] def test_rebinning(self) -> None: h = self.h[:: self.tag.rebin(2), :: self.tag.rebin(2), :: self.tag.rebin(2)] self.assertEqualBinValue(h[0, 0, 0], 24) self.assertEqualBinValue(h[0, 0, 1], 72) self.assertEqualBinValue(h[0, 1, 0], 56) self.assertEqualBinValue(h[0, 1, 4], 248) with self.assertRaises(IndexError): h[0, 1, 5] with self.assertRaises(IndexError): h[0, 2, 4] with self.assertRaises(IndexError): h[1, 1, 4] def test_full_integration(self) -> None: v = self.h[::sum, ::sum, ::sum] self.assertEqualSum(v, 1800) def test_mixed_integration(self) -> None: h = self.h[::sum, :2:sum, 1:3] self.assertEqualSum(h[0], 18) self.assertEqualSum(h[1], 30) with self.assertRaises(IndexError): h[2] def test_mixed_single_integration(self) -> None: h = self.h[1, ::sum, 1:3] self.assertEqualSum(h[0], 40) self.assertEqualSum(h[1], 55) with self.assertRaises(IndexError): h[2] def test_mixed_single_integration_dict(self) -> None: h = self.h[{0: 1, 1: np.s_[::sum], 2: np.s_[1:3]}] self.assertEqualSum(h[0], 40) self.assertEqualSum(h[1], 55) with self.assertRaises(IndexError): h[2] def test_ellipsis_integration(self) -> None: h = self.h[::sum, ..., ::sum] self.assertEqualSum(h[0], 280) self.assertEqualSum(h[1], 320) self.assertEqualSum(h[4], 440) with self.assertRaises(IndexError): h[5] def test_ellipsis_integration_dict(self) -> None: h = self.h[{0: np.s_[::sum], 2: np.s_[::sum]}] self.assertEqualSum(h[0], 280) self.assertEqualSum(h[1], 320) self.assertEqualSum(h[4], 440) with self.assertRaises(IndexError): h[5] def test_setting_single_value(self) -> None: h = self.make_histogram() h[0, 0, 0] = 42 self.assertEqualBinValue(h[0, 0, 0], 42) self.assertEqualBinValue(h[1, 1, 1], 6) def test_setting_underflow(self) -> None: h = self.make_histogram() h[self.tag.underflow, ...] = 42 self.assertEqualBinValue(h[self.tag.underflow, 0, 0], 42) def test_setting_array(self) -> None: h = self.make_histogram() h[0:2, 0:2, 0:2] = np.array( [ [self.values_to_bins([42, 43]), self.values_to_bins([44, 45])], [self.values_to_bins([46, 47]), self.values_to_bins([48, 49])], ] ) self.assertEqualBinValue(h[0, 0, 0], 42) self.assertEqualBinValue(h[0, 0, 1], 43) self.assertEqualBinValue(h[0, 1, 0], 44) self.assertEqualBinValue(h[0, 1, 1], 45) self.assertEqualBinValue(h[1, 0, 0], 46) self.assertEqualBinValue(h[1, 0, 1], 47) self.assertEqualBinValue(h[1, 1, 0], 48) self.assertEqualBinValue(h[1, 1, 1], 49) self.assertEqualBinValue(h[1, 1, 2], 9) def test_setting_array_broadcast(self) -> None: h = self.make_histogram() h[0:2, 0:2, 0:2] = np.array( [ [self.values_to_bins([42]), self.values_to_bins([3])], [self.values_to_bins([46]), self.values_to_bins([4])], ] ) self.assertEqualBinValue(h[0, 0, 0], 42) self.assertEqualBinValue(h[0, 0, 1], 42) self.assertEqualBinValue(h[0, 1, 0], 3) self.assertEqualBinValue(h[0, 1, 1], 3) self.assertEqualBinValue(h[1, 0, 0], 46) self.assertEqualBinValue(h[1, 0, 1], 46) self.assertEqualBinValue(h[1, 1, 0], 4) self.assertEqualBinValue(h[1, 1, 1], 4) self.assertEqualBinValue(h[1, 1, 2], 9) def test_setting_dict(self) -> None: h = self.make_histogram() h[{0: 1, 1: 0, 2: 3}] = self.value_to_bin(3) self.assertEqualBinValue(h[1, 0, 3], 3) self.assertEqualBinValue(h[1, 0, 4], 13) def test_setting_dict_slice(self) -> None: h = self.make_histogram() h[{0: 1, 1: 0, 2: slice(3, 5)}] = self.values_to_bins(range(42, 44)) self.assertEqualBinValue(h[1, 0, 2], 7) self.assertEqualBinValue(h[1, 0, 3], 42) self.assertEqualBinValue(h[1, 0, 4], 43) self.assertEqualBinValue(h[1, 0, 5], 16) def test_setting_dict_slicer(self) -> None: h = self.make_histogram() h[{0: 1, 1: 0, 2: np.s_[3:5]}] = self.values_to_bins(range(42, 44)) self.assertEqualBinValue(h[1, 0, 2], 7) self.assertEqualBinValue(h[1, 0, 3], 42) self.assertEqualBinValue(h[1, 0, 4], 43) self.assertEqualBinValue(h[1, 0, 5], 16) uhi-1.0.0/src/uhi/typing/000077500000000000000000000000001505142447500151725ustar00rootroot00000000000000uhi-1.0.0/src/uhi/typing/__init__.py000066400000000000000000000000001505142447500172710ustar00rootroot00000000000000uhi-1.0.0/src/uhi/typing/plottable.py000066400000000000000000000130071505142447500175330ustar00rootroot00000000000000""" Using the protocol: Producers: use isinstance(myhist, PlottableHistogram) in your tests; part of the protocol is checkable at runtime, though ideally you should use MyPy; if your histogram class supports PlottableHistogram, this will pass. Consumers: Make your functions accept the PlottableHistogram static type, and MyPy will force you to only use items in the Protocol. """ from __future__ import annotations from collections.abc import Iterator, Sequence from typing import Any, Protocol, TypeVar, Union, runtime_checkable # NumPy 1.20+ will work much, much better than previous versions when type checking import numpy as np protocol_version = (1, 2) # Known kinds of histograms. A Producer can add Kinds not defined here; a # Consumer should check for known types if it matters. A simple plotter could # just use .value and .variance if non-None and ignore .kind. # # Could have been Kind = Literal["COUNT", "MEAN"] - left as a generic string so # it can be extendable. Kind = str # Implementations are highly encouraged to use the following construct: # class Kind(str, enum.Enum): # COUNT = "COUNT" # MEAN = "MEAN" # Then return and use Kind.COUNT or Kind.MEAN. @runtime_checkable class PlottableTraits(Protocol): @property def circular(self) -> bool: """ True if the axis "wraps around" """ @property def discrete(self) -> bool: """ True if each bin is discrete - Integer, Boolean, or Category, for example """ T_co = TypeVar("T_co", covariant=True) @runtime_checkable class PlottableAxisGeneric(Protocol[T_co]): # noqa: PLW1641 # name: str - Optional, not part of Protocol # label: str - Optional, not part of Protocol # # Plotters are encouraged to plot label if it exists and is not None, and # name otherwise if it exists and is not None, but these properties are not # available on all histograms and not part of the Protocol. @property def traits(self) -> PlottableTraits: ... def __getitem__(self, index: int) -> T_co: """ Get the pair of edges (not discrete) or bin label (discrete). """ def __len__(self) -> int: """ Return the number of bins (not counting flow bins, which are ignored for this Protocol currently). """ def __eq__(self, other: Any) -> bool: """ Required to be sequence-like. """ def __iter__(self) -> Iterator[T_co]: """ Useful element of a Sequence to include. """ PlottableAxisContinuous = PlottableAxisGeneric[tuple[float, float]] PlottableAxisInt = PlottableAxisGeneric[int] PlottableAxisStr = PlottableAxisGeneric[str] PlottableAxis = Union[PlottableAxisContinuous, PlottableAxisInt, PlottableAxisStr] @runtime_checkable class PlottableHistogram(Protocol): @property def axes(self) -> Sequence[PlottableAxis]: ... @property def kind(self) -> Kind: ... # All methods can have a flow=False argument - not part of this Protocol. # If this is included, it should return an array with flow bins added, # normal ordering. def values(self) -> np.typing.NDArray[Any]: """ Returns the accumulated values. The counts for simple histograms, the sum of weights for weighted histograms, the mean for profiles, etc. If counts is equal to 0, the value in that cell is undefined if kind == "MEAN". """ def variances(self) -> np.typing.NDArray[Any] | None: """ Returns the estimated variance of the accumulated values. The sum of squared weights for weighted histograms, the variance of samples for profiles, etc. For an unweighed histogram where kind == "COUNT", this should return the same as values if the histogram was not filled with weights, and None otherwise. If counts is equal to 1 or less, the variance in that cell is undefined if kind == "MEAN". If kind == "MEAN", the counts can be used to compute the error on the mean as sqrt(variances / counts), this works whether or not the entries are weighted if the weight variance was tracked by the implementation. """ def counts(self) -> np.typing.NDArray[Any] | None: """ Returns the number of entries in each bin for an unweighted histogram or profile and an effective number of entries (defined below) for a weighted histogram or profile. An exotic generalized histogram could have no sensible .counts, so this is Optional and should be checked by Consumers. If kind == "MEAN", counts (effective or not) can and should be used to determine whether the mean value and its variance should be displayed (see documentation of values and variances, respectively). The counts should also be used to compute the error on the mean (see documentation of variances). For a weighted histogram, counts is defined as sum_of_weights ** 2 / sum_of_weights_squared. It is equal or less than the number of times the bin was filled, the equality holds when all filled weights are equal. The larger the spread in weights, the smaller it is, but it is always 0 if filled 0 times, and 1 if filled once, and more than 1 otherwise. A suggested implementation is: return np.divide( sum_of_weights**2, sum_of_weights_squared, out=np.zeros_like(sum_of_weights, dtype=np.float64), where=sum_of_weights_squared != 0) """ uhi-1.0.0/src/uhi/typing/serialization.py000066400000000000000000000111441505142447500204220ustar00rootroot00000000000000"""Serialization types for UHI. Two types of dictionaries are defined here: 1. ``AnyAxis``, ``AnyStorage``, and ``AnyHistogram`` are used for inputs. They represent the merger of all possible types. 2. ``Axis``, ``Storage``, and ``histogram`` are used for outputs. These have precise entries defined for each Literal type. There's also a Protocol, `ToUHIHistogram`, for anything that supports conversion. """ from __future__ import annotations import sys import typing from typing import Any, Literal, Protocol, TypedDict, Union from numpy.typing import ArrayLike if sys.version_info < (3, 11): from typing_extensions import NotRequired, Required else: from typing import NotRequired, Required __all__ = [ "AnyAxisIR", "AnyHistogramIR", "AnyStorageIR", "AxisIR", "BooleanAxisIR", "CategoryIntAxisIR", "CategoryStrAxisIR", "DoubleStorageIR", "HistogramIR", "IntStorageIR", "MeanStorageIR", "RegularAxisIR", "StorageIR", "ToUHIHistogram", "VariableAxisIR", "WeightedMeanStorageIR", "WeightedStorageIR", ] SupportedMetadata = Union[float, str, bool] def __dir__() -> list[str]: return __all__ class RegularAxisIR(TypedDict): type: Literal["regular"] lower: float upper: float bins: int underflow: bool overflow: bool circular: bool metadata: NotRequired[dict[str, SupportedMetadata]] writer_info: NotRequired[dict[str, dict[str, SupportedMetadata]]] class VariableAxisIR(TypedDict): type: Literal["variable"] edges: ArrayLike underflow: bool overflow: bool circular: bool metadata: NotRequired[dict[str, SupportedMetadata]] writer_info: NotRequired[dict[str, dict[str, SupportedMetadata]]] class CategoryStrAxisIR(TypedDict): type: Literal["category_str"] categories: list[str] flow: bool metadata: NotRequired[dict[str, SupportedMetadata]] writer_info: NotRequired[dict[str, dict[str, SupportedMetadata]]] class CategoryIntAxisIR(TypedDict): type: Literal["category_int"] categories: list[int] flow: bool metadata: NotRequired[dict[str, SupportedMetadata]] writer_info: NotRequired[dict[str, dict[str, SupportedMetadata]]] class BooleanAxisIR(TypedDict): type: Literal["boolean"] metadata: NotRequired[dict[str, SupportedMetadata]] writer_info: NotRequired[dict[str, dict[str, SupportedMetadata]]] class IntStorageIR(TypedDict): type: Literal["int"] values: ArrayLike index: NotRequired[ArrayLike] class DoubleStorageIR(TypedDict): type: Literal["double"] values: ArrayLike index: NotRequired[ArrayLike] class WeightedStorageIR(TypedDict): type: Literal["weighted"] values: ArrayLike variances: ArrayLike index: NotRequired[ArrayLike] class MeanStorageIR(TypedDict): type: Literal["mean"] counts: ArrayLike values: ArrayLike variances: ArrayLike index: NotRequired[ArrayLike] class WeightedMeanStorageIR(TypedDict): type: Literal["weighted_mean"] sum_of_weights: ArrayLike sum_of_weights_squared: ArrayLike values: ArrayLike variances: ArrayLike index: NotRequired[ArrayLike] StorageIR = Union[ IntStorageIR, DoubleStorageIR, WeightedStorageIR, MeanStorageIR, WeightedMeanStorageIR, ] AxisIR = Union[ RegularAxisIR, VariableAxisIR, CategoryStrAxisIR, CategoryIntAxisIR, BooleanAxisIR ] class AnyStorageIR(TypedDict, total=False): type: Required[Literal["int", "double", "weighted", "mean", "weighted_mean"]] index: ArrayLike values: ArrayLike variances: ArrayLike sum_of_weights: ArrayLike sum_of_weights_squared: ArrayLike counts: ArrayLike class AnyAxisIR(TypedDict, total=False): type: Required[ Literal["regular", "variable", "category_str", "category_int", "boolean"] ] metadata: dict[str, SupportedMetadata] writer_info: dict[str, dict[str, SupportedMetadata]] lower: float upper: float bins: int edges: ArrayLike categories: list[str] | list[int] underflow: bool overflow: bool flow: bool circular: bool class HistogramIR(TypedDict): uhi_schema: int axes: list[AxisIR] storage: StorageIR metadata: NotRequired[dict[str, SupportedMetadata]] writer_info: NotRequired[dict[str, dict[str, SupportedMetadata]]] class AnyHistogramIR(TypedDict): uhi_schema: int axes: list[AnyAxisIR] storage: AnyStorageIR metadata: NotRequired[dict[str, SupportedMetadata]] writer_info: NotRequired[dict[str, dict[str, SupportedMetadata]]] @typing.runtime_checkable class ToUHIHistogram(Protocol): def _to_uhi_(self) -> dict[str, Any]: ... uhi-1.0.0/tests/000077500000000000000000000000001505142447500134465ustar00rootroot00000000000000uhi-1.0.0/tests/conftest.py000066400000000000000000000014611505142447500156470ustar00rootroot00000000000000from __future__ import annotations from pathlib import Path import pytest DIR = Path(__file__).parent.resolve() VALID_FILES = DIR.glob("resources/valid/*.json") INVALID_FILES = DIR.glob("resources/invalid/*.json") @pytest.fixture(scope="session") def resources() -> Path: return DIR / "resources" @pytest.fixture(params=VALID_FILES, ids=lambda p: p.name) def valid(request: pytest.FixtureRequest) -> Path: return request.param # type: ignore[no-any-return] @pytest.fixture(params=INVALID_FILES, ids=lambda p: p.name) def invalid(request: pytest.FixtureRequest) -> Path: return request.param # type: ignore[no-any-return] @pytest.fixture(params=[False, True], ids=["dense", "sparse"]) def sparse(request: pytest.FixtureRequest) -> bool: return request.param # type: ignore[no-any-return] uhi-1.0.0/tests/resources/000077500000000000000000000000001505142447500154605ustar00rootroot00000000000000uhi-1.0.0/tests/resources/invalid/000077500000000000000000000000001505142447500171065ustar00rootroot00000000000000uhi-1.0.0/tests/resources/invalid/metadata_nested.error.txt000066400000000000000000000000761505142447500241240ustar00rootroot00000000000000data.one.metadata.one must be valid exactly by one definition uhi-1.0.0/tests/resources/invalid/metadata_nested.json000066400000000000000000000002211505142447500231160ustar00rootroot00000000000000{ "one": { "uhi_schema": 1, "metadata": { "one": { "two": 2 } }, "axes": [], "storage": { "type": "int", "values": [1] } } } uhi-1.0.0/tests/resources/invalid/missing_axis.error.txt000066400000000000000000000000521505142447500234710ustar00rootroot00000000000000data.one must contain ['axes'] properties uhi-1.0.0/tests/resources/invalid/missing_axis.json000066400000000000000000000001301505142447500224700ustar00rootroot00000000000000{ "one": { "uhi_schema": 1, "storage": { "type": "int", "values": [1] } } } uhi-1.0.0/tests/resources/invalid/missing_storage.error.txt000066400000000000000000000000551505142447500241740ustar00rootroot00000000000000data.one must contain ['storage'] properties uhi-1.0.0/tests/resources/invalid/missing_storage.json000066400000000000000000000000671505142447500232010ustar00rootroot00000000000000{ "one": { "uhi_schema": 1, "axes": [] } } uhi-1.0.0/tests/resources/valid/000077500000000000000000000000001505142447500165575ustar00rootroot00000000000000uhi-1.0.0/tests/resources/valid/2d.json000066400000000000000000000007661505142447500177700ustar00rootroot00000000000000{ "main": { "uhi_schema": 1, "axes": [ { "type": "variable", "edges": [1, 2, 3, 4], "underflow": true, "overflow": false, "circular": false }, { "type": "category_str", "categories": ["a", "b", "c"], "flow": false } ], "storage": { "type": "double", "values": [ [1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0] ] } } } uhi-1.0.0/tests/resources/valid/mean.json000066400000000000000000000026371505142447500204020ustar00rootroot00000000000000{ "weighted_mean": { "uhi_schema": 1, "writer_info": { "boost-histogram": { "version": "1.6.1" } }, "axes": [ { "type": "regular", "lower": 0.0, "upper": 1.0, "bins": 4, "underflow": true, "overflow": true, "circular": false } ], "storage": { "type": "weighted_mean", "sum_of_weights": [0.0, 3.9000000000000004, 0.6, 0.0, 0.0, 0.0], "sum_of_weights_squared": [0.0, 8.73, 0.36, 0.0, 0.0, 0.0], "values": [0.0, 5.6923076923076925, 7.000000000000001, 0.0, 0.0, 0.0], "variances": [NaN, 0.4999999999999996, -Infinity, NaN, NaN, NaN] }, "metadata": { "_variance_known": true, "fill": "h.fill([.1, .1, .4], weight=[1.2, 2.7, .6], sample=[5, 6, 7])" } }, "mean": { "uhi_schema": 1, "writer_info": { "boost-histogram": { "version": "1.6.1" } }, "axes": [ { "type": "regular", "lower": 0.0, "upper": 1.0, "bins": 4, "underflow": true, "overflow": true, "circular": false } ], "storage": { "type": "mean", "counts": [0.0, 3.9000000000000004, 0.6, 0.0, 0.0, 0.0], "values": [0.0, 5.6923076923076925, 7.000000000000001, 0.0, 0.0, 0.0], "variances": [ -0.0, 0.28647214854111386, 9.325873406851315e-15, -0.0, -0.0, -0.0 ] }, "metadata": { "_variance_known": true } } } uhi-1.0.0/tests/resources/valid/noaxes.json000066400000000000000000000001501505142447500207430ustar00rootroot00000000000000{ "one": { "uhi_schema": 1, "axes": [], "storage": { "type": "int", "values": [1] } } } uhi-1.0.0/tests/resources/valid/reg.json000066400000000000000000000015511505142447500202310ustar00rootroot00000000000000{ "one": { "uhi_schema": 1, "metadata": { "one": true, "two": 2, "three": "three" }, "axes": [ { "type": "regular", "lower": 0, "upper": 5, "bins": 3, "underflow": true, "overflow": true, "circular": false, "metadata": { "simple": 1 }, "writer_info": { "uhi": { "bool": true } } } ], "storage": { "type": "int", "values": [1, 2, 3, 4, 5] } }, "two": { "uhi_schema": 1, "writer_info": { "uhi": { "version": "1.0.0", "other_info": "Something" } }, "axes": [ { "type": "regular", "lower": 0, "upper": 5, "bins": 5, "underflow": true, "overflow": true, "circular": false } ], "storage": { "type": "double", "values": [1, 2, 3, 4, 5, 6, 7] } } } uhi-1.0.0/tests/resources/valid/sparse.json000066400000000000000000000007201505142447500207460ustar00rootroot00000000000000{ "first": { "uhi_schema": 1, "metadata": { "one": true, "two": 2, "three": "three" }, "axes": [ { "type": "regular", "lower": 0, "upper": 5, "bins": 3, "underflow": true, "overflow": true, "circular": false, "metadata": { "simple": 1 }, "writer_info": { "uhi": { "bool": true } } } ], "storage": { "type": "int", "index": [[1, 2]], "values": [3, 5] } } } uhi-1.0.0/tests/test_ensure.py000066400000000000000000000057221505142447500163660ustar00rootroot00000000000000from __future__ import annotations import numpy as np import pytest from pytest import approx from uhi.numpy_plottable import ensure_plottable_histogram def test_from_numpy() -> None: hist1 = ((1, 2, 3, 4, 1, 2), (0, 1, 2, 3)) h = ensure_plottable_histogram(hist1) assert h.values() == approx(np.array(hist1[0])) assert len(h.axes) == 1 assert len(h.axes[0]) == 3 assert h.axes[0][0] == (0, 1) assert h.axes[0][1] == (1, 2) assert h.axes[0][2] == (2, 3) def test_from_numpy_2d() -> None: np.random.seed(42) x = np.random.normal(1, 2, 1000) y = np.random.normal(-1, 1, 1000) result = np.histogram2d(x, y) h = ensure_plottable_histogram(result) assert h.values() == approx(result[0]) assert len(h.axes) == 2 assert len(h.axes[0]) == 10 assert h.axes[0][0] == approx(result[1][0:2]) assert h.axes[0][1] == approx(result[1][1:3]) assert h.axes[1][0] == approx(result[2][0:2]) assert h.axes[1][1] == approx(result[2][1:3]) def test_from_numpy_dd() -> None: np.random.seed(42) x = np.random.normal(1, 2, 1000) y = np.random.normal(-1, 1, 1000) z = np.random.normal(3, 3, 1000) result = np.histogramdd((x, y, z)) h = ensure_plottable_histogram(result) assert h.values() == approx(result[0]) assert len(h.axes) == 3 assert len(h.axes[0]) == 10 assert h.axes[0][0] == approx(result[1][0][0:2]) assert h.axes[0][1] == approx(result[1][0][1:3]) assert h.axes[1][0] == approx(result[1][1][0:2]) assert h.axes[1][1] == approx(result[1][1][1:3]) assert h.axes[2][0] == approx(result[1][2][0:2]) assert h.axes[2][1] == approx(result[1][2][1:3]) def test_from_bh_regular() -> None: bh = pytest.importorskip("boost_histogram") h1 = bh.Histogram(bh.axis.Regular(5, 0, 5)) h1[...] = (3, 2, 1, 2, 3) h = ensure_plottable_histogram(h1) assert h is h1 assert h.values() == approx(np.array((3, 2, 1, 2, 3))) assert len(h.axes) == 1 assert len(h.axes[0]) == 5 assert h.axes[0][0] == approx(np.array((0, 1))) assert h.axes[0][1] == approx(np.array((1, 2))) assert h.axes[0][2] == approx(np.array((2, 3))) def test_from_bh_integer() -> None: bh = pytest.importorskip("boost_histogram") h1 = bh.Histogram(bh.axis.Integer(1, 6)) h1[...] = (3, 2, 1, 2, 3) h = ensure_plottable_histogram(h1) assert h is h1 assert h.values() == approx(np.array((3, 2, 1, 2, 3))) assert len(h.axes) == 1 assert len(h.axes[0]) == 5 assert h.axes[0][0] == 1 assert h.axes[0][1] == 2 assert h.axes[0][2] == 3 def test_from_bh_str_cat() -> None: bh = pytest.importorskip("boost_histogram") h1 = bh.Histogram(bh.axis.StrCategory(["hi", "ho"])) h1.fill(["hi", "hi", "hi", "ho"]) h = ensure_plottable_histogram(h1) assert h is h1 assert h.values() == approx(np.array((3, 1))) assert len(h.axes) == 1 assert len(h.axes[0]) == 2 assert h.axes[0][0] == "hi" assert h.axes[0][1] == "ho" uhi-1.0.0/tests/test_hdf5.py000066400000000000000000000076021505142447500157120ustar00rootroot00000000000000from __future__ import annotations import importlib.metadata import json from pathlib import Path import packaging.version import pytest import uhi.io.json from uhi.io import to_sparse h5py = pytest.importorskip("h5py", reason="h5py is not installed") uhi_io_hdf5 = pytest.importorskip("uhi.io.hdf5") BHVERSION = packaging.version.Version(importlib.metadata.version("boost_histogram")) HISTVERSION = packaging.version.Version(importlib.metadata.version("hist")) def test_valid_json(valid: Path, tmp_path: Path, sparse: bool) -> None: data = valid.read_text(encoding="utf-8") hists = json.loads(data, object_hook=uhi.io.json.object_hook) if sparse: hists = {name: to_sparse(hist) for name, hist in hists.items()} tmp_file = tmp_path / "test.h5" with h5py.File(tmp_file, "w") as h5_file: for name, hist in hists.items(): uhi_io_hdf5.write(h5_file.create_group(name), hist) with h5py.File(tmp_file, "r") as h5_file: rehists = {name: uhi_io_hdf5.read(h5_file[name]) for name in hists} assert hists.keys() == rehists.keys() for name in hists: hist = hists[name] rehist = rehists[name] # Check that the JSON representation is the same data = json.dumps(hist, default=uhi.io.json.default, sort_keys=True) redata = json.dumps(rehist, default=uhi.io.json.default, sort_keys=True) redata = redata.replace(" ", "").replace("\n", "") data = data.replace(" ", "").replace("\n", "") assert redata == data def test_reg_load(tmp_path: Path, resources: Path) -> None: data = resources / "valid/reg.json" hists = json.loads( data.read_text(encoding="utf-8"), object_hook=uhi.io.json.object_hook ) tmp_file = tmp_path / "test.h5" with h5py.File(tmp_file, "w") as h5_file: uhi_io_hdf5.write( h5_file.create_group("one"), hists["one"], min_compress_elements=0 ) with h5py.File(tmp_file, "r") as h5_file: one = uhi_io_hdf5.read(h5_file["one"]) assert one["metadata"] == {"one": True, "two": 2, "three": "three"} assert len(one["axes"]) == 1 assert one["axes"][0]["type"] == "regular" assert one["axes"][0]["lower"] == pytest.approx(0) assert one["axes"][0]["upper"] == pytest.approx(5) assert one["axes"][0]["bins"] == 3 assert one["axes"][0]["underflow"] assert one["axes"][0]["overflow"] assert not one["axes"][0]["circular"] assert one["storage"]["type"] == "int" assert one["storage"]["values"] == pytest.approx([1, 2, 3, 4, 5]) @pytest.mark.skipif( packaging.version.Version("1.6.1") > BHVERSION, reason="Requires boost-histogram 1.6+", ) def test_convert_bh(tmp_path: Path) -> None: import boost_histogram as bh h = bh.Histogram( bh.axis.Regular(3, 13, 10, __dict__={"name": "x"}), storage=bh.storage.Weight() ) tmp_file = tmp_path / "test.h5" with h5py.File(tmp_file, "w") as h5_file: uhi_io_hdf5.write(h5_file.create_group("histogram"), h) with h5py.File(tmp_file, "r") as h5_file: rehist = uhi_io_hdf5.read(h5_file["histogram"]) h2 = bh.Histogram(rehist) assert h == h2 @pytest.mark.skipif( packaging.version.Version("1.6.1") > BHVERSION or packaging.version.Version("2.9.0") > HISTVERSION, reason="Requires boost-histogram 1.6+ / Hist 2.9+", ) def test_convert_hist(tmp_path: Path) -> None: import hist h = hist.Hist( hist.axis.Regular(10, 0, 1, name="a", label="A"), hist.axis.Integer(7, 13, overflow=False, name="b", label="B"), storage=hist.storage.Weight(), name="h", label="H", ) tmp_file = tmp_path / "test.h5" with h5py.File(tmp_file, "w") as h5_file: uhi_io_hdf5.write(h5_file.create_group("histogram"), h) with h5py.File(tmp_file, "r") as h5_file: rehist = uhi_io_hdf5.read(h5_file["histogram"]) h2 = hist.Hist(rehist) assert h == h2 uhi-1.0.0/tests/test_histogram_schema.py000066400000000000000000000013341505142447500203750ustar00rootroot00000000000000from __future__ import annotations import json import re from pathlib import Path import fastjsonschema import pytest import uhi.schema def test_valid_schemas(valid: Path) -> None: with valid.open(encoding="utf-8") as f: data = json.load(f) uhi.schema.validate(data) def test_invalid_schemas(invalid: Path) -> None: with invalid.open(encoding="utf-8") as f: data = json.load(f) try: errmsg = invalid.with_suffix(".error.txt").read_text(encoding="utf-8").strip() except FileNotFoundError: errmsg = "NO ERROR MESSAGE FILE FOUND" with pytest.raises( fastjsonschema.exceptions.JsonSchemaException, match=re.escape(errmsg) ): uhi.schema.validate(data) uhi-1.0.0/tests/test_json.py000066400000000000000000000060601505142447500160320ustar00rootroot00000000000000from __future__ import annotations import importlib.metadata import json from pathlib import Path import packaging.version import pytest import uhi.io.json BHVERSION = packaging.version.Version(importlib.metadata.version("boost_histogram")) HISTVERSION = packaging.version.Version(importlib.metadata.version("hist")) def test_valid_json(valid: Path) -> None: data = valid.read_text(encoding="utf-8") hist = json.loads(data, object_hook=uhi.io.json.object_hook) redata = json.dumps(hist, default=uhi.io.json.default) rehist = json.loads(redata, object_hook=uhi.io.json.object_hook) assert redata.replace(" ", "").replace("\n", "") == data.replace(" ", "").replace( "\n", "" ) assert hist.keys() == rehist.keys() def test_reg_load(resources: Path) -> None: data = resources / "valid/reg.json" hists = json.loads( data.read_text(encoding="utf-8"), object_hook=uhi.io.json.object_hook ) one = hists["one"] two = hists["two"] assert one["metadata"] == {"one": True, "two": 2, "three": "three"} assert len(one["axes"]) == 1 assert one["axes"][0]["type"] == "regular" assert one["axes"][0]["lower"] == pytest.approx(0) assert one["axes"][0]["upper"] == pytest.approx(5) assert one["axes"][0]["bins"] == 3 assert one["axes"][0]["underflow"] assert one["axes"][0]["overflow"] assert not one["axes"][0]["circular"] assert one["storage"]["type"] == "int" assert one["storage"]["values"] == pytest.approx([1, 2, 3, 4, 5]) assert len(two["axes"]) == 1 assert two["axes"][0]["type"] == "regular" assert two["axes"][0]["lower"] == pytest.approx(0) assert two["axes"][0]["upper"] == pytest.approx(5) assert two["axes"][0]["bins"] == 5 assert two["axes"][0]["underflow"] assert two["axes"][0]["overflow"] assert not two["axes"][0]["circular"] assert two["storage"]["type"] == "double" assert two["storage"]["values"] == pytest.approx([1, 2, 3, 4, 5, 6, 7]) @pytest.mark.skipif( packaging.version.Version("1.6.1") > BHVERSION, reason="Requires boost-histogram 1.6+", ) def test_convert_bh() -> None: import boost_histogram as bh h = bh.Histogram( bh.axis.Regular(3, 13, 10, __dict__={"name": "x"}), storage=bh.storage.Weight() ) redata = json.dumps(h, default=uhi.io.json.default) rehist = json.loads(redata, object_hook=uhi.io.json.object_hook) h2 = bh.Histogram(rehist) assert h == h2 @pytest.mark.skipif( packaging.version.Version("1.6.1") > BHVERSION or packaging.version.Version("2.9.0") > HISTVERSION, reason="Requires boost-histogram 1.6+ / Hist 2.9+", ) def test_convert_hist() -> None: import hist h = hist.Hist( hist.axis.Regular(10, 0, 1, name="a", label="A"), hist.axis.Integer(7, 13, overflow=False, name="b", label="B"), storage=hist.storage.Weight(), name="h", label="H", ) redata = json.dumps(h, default=uhi.io.json.default) rehist = json.loads(redata, object_hook=uhi.io.json.object_hook) h2 = hist.Hist(rehist) assert h == h2 uhi-1.0.0/tests/test_root.py000066400000000000000000000022351505142447500160440ustar00rootroot00000000000000from __future__ import annotations import numpy as np import pytest from pytest import approx from uhi.numpy_plottable import ensure_plottable_histogram ROOT = pytest.importorskip("ROOT") def test_root_imported() -> None: assert ROOT.TString("hi") == "hi" def test_root_th1f_convert() -> None: th = ROOT.TH1F("h1", "h1", 50, -2.5, 2.5) th.FillRandom("gaus", 10000) h = ensure_plottable_histogram(th) assert all(th.GetBinContent(i + 1) == approx(iv) for i, iv in enumerate(h.values())) assert all( th.GetBinError(i + 1) == approx(ie) for i, ie in enumerate(np.sqrt(h.variances())) ) def test_root_th2f_convert() -> None: th = ROOT.TH2F("h2", "h2", 50, -2.5, 2.5, 50, -2.5, 2.5) _ = ROOT.TF2("xyg", "xygaus", -2.5, 2.5, -2.5, 2.5) th.FillRandom("xyg", 10000) h = ensure_plottable_histogram(th) assert all( th.GetBinContent(i + 1, j + 1) == approx(iv) for i, row in enumerate(h.values()) for j, iv in enumerate(row) ) assert all( th.GetBinError(i + 1, j + 1) == approx(ie) for i, row in enumerate(np.sqrt(h.variances())) for j, ie in enumerate(row) ) uhi-1.0.0/tests/test_serialization.py000066400000000000000000000032551505142447500177410ustar00rootroot00000000000000from __future__ import annotations import dataclasses from typing import Any from uhi.io import remove_writer_info from uhi.io._common import _convert_input def test_remove_writer_info() -> None: d = {"uhi_schema": 1, "writer_info": {"a": {"foo": "bar"}, "b": {"FOO": "BAR"}}} assert remove_writer_info(d, library=None) == {"uhi_schema": 1} assert remove_writer_info(d, library="a") == { "uhi_schema": 1, "writer_info": {"b": {"FOO": "BAR"}}, } assert remove_writer_info(d, library="b") == { "uhi_schema": 1, "writer_info": {"a": {"foo": "bar"}}, } assert remove_writer_info(d, library="c") == d @dataclasses.dataclass class _Simple: value: dict[str, Any] def _to_uhi_(self) -> dict[str, Any]: return self.value def test_remove_empty_metadata() -> None: d = { "uhi_schema": 1, "writer_info": {"boost-histogram": {"version": "1.6.1"}}, "axes": [ { "type": "regular", "lower": 0.0, "upper": 1.0, "bins": 4, "underflow": True, "overflow": True, "circular": False, "metadata": {}, } ], "storage": { "type": "weighted_mean", "sum_of_weights": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "sum_of_weights_squared": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "values": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0], "variances": [float("nan")] * 6, }, "metadata": {}, } h = _Simple(d) ir = _convert_input(h) assert "metadata" not in ir assert "metadata" not in ir["axes"][0] uhi-1.0.0/tests/test_sparse.py000066400000000000000000000072231505142447500163600ustar00rootroot00000000000000from __future__ import annotations import json from pathlib import Path import numpy as np import pytest import uhi.io.json from uhi.io import from_sparse, to_sparse from uhi.typing.serialization import HistogramIR, WeightedStorageIR def test_to_from_sparse_roundtrip() -> None: # Original dense data hist: HistogramIR = { "uhi_schema": 1, "storage": { "type": "weighted", "values": np.array([[0, 1, 0], [0, 2, 0]], dtype=float), "variances": np.array([[0, 0.1, 0], [0, 0, 0]], dtype=float), }, "axes": [ {"type": "boolean"}, { "type": "regular", "bins": 3, "overflow": False, "underflow": False, "lower": 0, "upper": 1, "circular": False, }, ], } # Convert to sparse shist = to_sparse(hist) sparse: WeightedStorageIR = shist["storage"] # type: ignore[assignment] # Basic checks on sparse structure assert "index" in sparse index = sparse["index"] assert index.shape[0] == 2 assert index.shape[1] == 2 # Verify sparse arrays align with mask assert np.all(sparse["values"] == np.array([1.0, 2.0])) assert np.all(sparse["variances"] == np.array([0.1, 0.0])) # Convert back to dense dense = from_sparse(shist) # Check round-trip reconstruction for key, value in hist["storage"].items(): if key == "type": continue assert np.allclose(dense["storage"][key], value) # type: ignore[literal-required] def test_all_valid(valid: Path) -> None: data = valid.read_text(encoding="utf-8") hists = json.loads(data, object_hook=uhi.io.json.object_hook) hists = { k: from_sparse(v) if "index" in v.get("storage", {}) else v for k, v in hists.items() } shists = {k: to_sparse(v) for k, v in hists.items()} for h in shists.values(): if h["axes"]: assert "index" in h["storage"] dhists = {k: from_sparse(v) for k, v in shists.items()} for h in dhists.values(): assert "index" not in h["storage"] assert hists.keys() == dhists.keys() for v, dv in zip(hists.values(), dhists.values()): assert v.keys() == dv.keys() assert v["axes"] == dv["axes"] assert v["storage"].keys() == dv["storage"].keys() assert v["storage"]["values"] == pytest.approx(dv["storage"]["values"]) if v["storage"]["type"] == "weighted_mean": v_var = v["storage"]["variances"] dv_var = dv["storage"]["variances"] assert np.all(np.isnan(v_var) == np.isnan(dv_var)) assert v_var[~np.isnan(v_var)] == pytest.approx(dv_var[~np.isnan(dv_var)]) assert v["storage"]["sum_of_weights"] == pytest.approx( dv["storage"]["sum_of_weights"] ) assert v["storage"]["sum_of_weights_squared"] == pytest.approx( dv["storage"]["sum_of_weights_squared"] ) else: assert v["storage"].get("variances") == pytest.approx( dv["storage"].get("variances") ) assert v["storage"].get("counts") == pytest.approx( dv["storage"].get("counts") ) @pytest.mark.parametrize("name", ["mean", "weighted_mean"]) def test_mean(resources: Path, name: str) -> None: data = resources.joinpath("valid/mean.json").read_text() hist = json.loads(data, object_hook=uhi.io.json.object_hook)[name] sparse_hist = to_sparse(hist) assert len(sparse_hist["storage"]["values"]) == 2 assert sparse_hist["storage"]["index"].shape == (1, 2) uhi-1.0.0/tests/test_testing.py000066400000000000000000000014501505142447500165340ustar00rootroot00000000000000from __future__ import annotations import boost_histogram as bh import uhi.testing.indexing class TestAccess1D(uhi.testing.indexing.Indexing1D[bh.Histogram]): @classmethod def make_histogram(cls) -> bh.Histogram: return bh.Histogram(dict(cls.get_uhi())) class TestAccess2D(uhi.testing.indexing.Indexing2D[bh.Histogram]): @classmethod def make_histogram(cls) -> bh.Histogram: return bh.Histogram(dict(cls.get_uhi())) class TestAccess3D(uhi.testing.indexing.Indexing3D[bh.Histogram]): @classmethod def make_histogram(cls) -> bh.Histogram: return bh.Histogram(dict(cls.get_uhi())) class TestAccessBHTag1D(TestAccess1D): tag = bh.tag class TestAccessBHTag2D(TestAccess2D): tag = bh.tag class TestAccessBHTag3D(TestAccess3D): tag = bh.tag uhi-1.0.0/tests/test_uhi.py000066400000000000000000000002651505142447500156470ustar00rootroot00000000000000from __future__ import annotations import importlib.metadata from uhi import __version__ def test_version() -> None: assert __version__ == importlib.metadata.version("uhi") uhi-1.0.0/tests/test_zip.py000066400000000000000000000113301505142447500156570ustar00rootroot00000000000000from __future__ import annotations import copy import importlib.metadata import json import zipfile from pathlib import Path import packaging.version import pytest import uhi.io.json import uhi.io.zip from uhi.io import to_sparse BHVERSION = packaging.version.Version(importlib.metadata.version("boost_histogram")) HISTVERSION = packaging.version.Version(importlib.metadata.version("hist")) def test_valid_json(valid: Path, tmp_path: Path, sparse: bool) -> None: data = valid.read_text(encoding="utf-8") hists = json.loads(data, object_hook=uhi.io.json.object_hook) if sparse: hists = {name: to_sparse(hist) for name, hist in hists.items()} tmp_file = tmp_path / "test.zip" with zipfile.ZipFile(tmp_file, "w") as zip_file: for name, hist in hists.items(): uhi.io.zip.write(zip_file, name, copy.deepcopy(hist)) with zipfile.ZipFile(tmp_file, "r") as zip_file: rehists = {name: uhi.io.zip.read(zip_file, name) for name in hists} assert hists.keys() == rehists.keys() for name in hists: hist = hists[name] rehist = rehists[name] # Check that the JSON representation is the same redata = json.dumps(hist, default=uhi.io.json.default) data = json.dumps(rehist, default=uhi.io.json.default) assert redata.replace(" ", "").replace("\n", "") == data.replace( " ", "" ).replace("\n", "") def test_reg_load(tmp_path: Path, resources: Path) -> None: data = resources / "valid/reg.json" hists = json.loads( data.read_text(encoding="utf-8"), object_hook=uhi.io.json.object_hook ) tmp_file = tmp_path / "test.zip" with zipfile.ZipFile(tmp_file, "w") as zip_file: for name, hist in hists.items(): uhi.io.zip.write(zip_file, name, hist) with zipfile.ZipFile(tmp_file, "r") as zip_file: names = zip_file.namelist() rehists = { name[:-5]: uhi.io.zip.read(zip_file, name[:-5]) for name in names if name.endswith(".json") } with zip_file.open("one.json") as f: native_one = json.load(f) assert set(names) == { "one_storage_values.npy", "one.json", "two_storage_values.npy", "two.json", } assert native_one["storage"]["values"] == "one_storage_values.npy" one = rehists["one"] two = rehists["two"] assert one.get("metadata", {}) == {"one": True, "two": 2, "three": "three"} assert len(one["axes"]) == 1 assert one["axes"][0]["type"] == "regular" assert one["axes"][0]["lower"] == pytest.approx(0) assert one["axes"][0]["upper"] == pytest.approx(5) assert one["axes"][0]["bins"] == 3 assert one["axes"][0]["underflow"] assert one["axes"][0]["overflow"] assert not one["axes"][0]["circular"] assert one["storage"]["type"] == "int" assert one["storage"]["values"] == pytest.approx([1, 2, 3, 4, 5]) assert len(two["axes"]) == 1 assert two["axes"][0]["type"] == "regular" assert two["axes"][0]["lower"] == pytest.approx(0) assert two["axes"][0]["upper"] == pytest.approx(5) assert two["axes"][0]["bins"] == 5 assert two["axes"][0]["underflow"] assert two["axes"][0]["overflow"] assert not two["axes"][0]["circular"] assert two["storage"]["type"] == "double" assert two["storage"]["values"] == pytest.approx([1, 2, 3, 4, 5, 6, 7]) @pytest.mark.skipif( packaging.version.Version("1.6.1") > BHVERSION, reason="Requires boost-histogram 1.6+", ) def test_convert_bh(tmp_path: Path) -> None: import boost_histogram as bh h = bh.Histogram( bh.axis.Regular(3, 13, 10, __dict__={"name": "x"}), storage=bh.storage.Weight() ) tmp_file = tmp_path / "test.zip" with zipfile.ZipFile(tmp_file, "w") as zip_file: uhi.io.zip.write(zip_file, "histogram", h) with zipfile.ZipFile(tmp_file, "r") as zip_file: rehist = uhi.io.zip.read(zip_file, "histogram") h2 = bh.Histogram(rehist) assert h == h2 @pytest.mark.skipif( packaging.version.Version("1.6.1") > BHVERSION or packaging.version.Version("2.9.0") > HISTVERSION, reason="Requires boost-histogram 1.6+ / Hist 2.9+", ) def test_convert_hist(tmp_path: Path) -> None: import hist h = hist.Hist( hist.axis.Regular(10, 0, 1, name="a", label="A"), hist.axis.Integer(7, 13, overflow=False, name="b", label="B"), storage=hist.storage.Weight(), name="h", label="H", ) tmp_file = tmp_path / "test.zip" with zipfile.ZipFile(tmp_file, "w") as zip_file: uhi.io.zip.write(zip_file, "histogram", h) with zipfile.ZipFile(tmp_file, "r") as zip_file: rehist = uhi.io.zip.read(zip_file, "histogram") h2 = hist.Hist(rehist) assert h == h2