moxcms-0.7.7/.cargo_vcs_info.json0000644000000001360000000000100123550ustar { "git": { "sha1": "3f73cd502fa5b55cd344730432487d2a89c59608" }, "path_in_vcs": "" }moxcms-0.7.7/.github/FUNDING.yml000064400000000000000000000000761046102023000143250ustar 00000000000000# These are supported funding model platforms github: awxkee moxcms-0.7.7/.github/workflows/build_push.yml000064400000000000000000000102461046102023000174260ustar 00000000000000name: Build concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true on: push: branches: - 'master' - '!ci_test_*' tags-ignore: - '*' pull_request: branches: - 'master' jobs: build: name: Build runs-on: ubuntu-latest strategy: fail-fast: false matrix: rust: [ "1.85.0", stable ] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable with: toolchain: ${{ matrix.rust }} - run: rustup target add aarch64-unknown-linux-gnu x86_64-unknown-linux-gnu i686-unknown-linux-gnu powerpc-unknown-linux-gnu wasm32-unknown-unknown - run: RUSTFLAGS="-C target-feature=+neon" cargo build --target aarch64-unknown-linux-gnu - run: RUSTFLAGS="-C target-feature=+neon" cargo build --target aarch64-unknown-linux-gnu --no-default-features - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --target i686-unknown-linux-gnu - run: cargo build --target powerpc-unknown-linux-gnu - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --target x86_64-unknown-linux-gnu - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --target x86_64-unknown-linux-gnu --no-default-features --features avx - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --target x86_64-unknown-linux-gnu --no-default-features --features sse - run: RUSTFLAGS="-C target-feature=+avx2" cargo +nightly build --target x86_64-unknown-linux-gnu --no-default-features --features avx,avx512 - run: RUSTFLAGS="-C target-feature=+simd128" cargo build --target wasm32-unknown-unknown tests_arm: name: Tests strategy: matrix: features: [ "", neon ] runs-on: macos-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - run: cargo test --no-default-features --features "${{ matrix.features }}" tests_x86: name: Tests strategy: matrix: features: [ "", sse, avx ] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - run: cargo test --no-default-features --features "${{ matrix.features }}" clippy_x86: name: Clippy x86 Stable runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - run: cargo clippy --features avx,sse,neon,options -- -D warnings clippy_x86_nightly: name: Clippy x86 Nightly runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly - run: rustup component add clippy - run: cargo clippy --all-features -- -D warnings clippy_arm: name: Clippy ARM runs-on: macos-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable - run: cargo clippy -- -D warnings fuzz_arm: name: Fuzzing ARM runs-on: macos-latest strategy: matrix: feature: [ neon ] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly - run: cargo install cargo-fuzz - run: cargo fuzz run unsafe --no-default-features --features ${{ matrix.feature }} -- -max_total_time=10 - run: cargo fuzz run lut --no-default-features --features ${{ matrix.feature }} -- -max_total_time=12 fuzz_x86_64: name: Fuzzing x86_64 runs-on: ubuntu-latest strategy: matrix: feature: [ sse, avx ] steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly - run: cargo install cargo-fuzz - run: cargo fuzz run unsafe --no-default-features --features ${{ matrix.feature }} -- -max_total_time=12 - run: cargo fuzz run lut --no-default-features --features ${{ matrix.feature }} -- -max_total_time=12 fuzz_reader: name: Fuzzing Reader runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly - run: cargo install cargo-fuzz - run: cargo fuzz run safe_read --no-default-features -- -max_total_time=20 -max_len=512000 - run: cargo fuzz run safe_read_create --no-default-features -- -max_total_time=20 -max_len=512000 moxcms-0.7.7/.github/workflows/nightly_fuzzing.yml000064400000000000000000000010031046102023000205110ustar 00000000000000name: Nightly Fuzzing on: schedule: - cron: '0 2 * * *' # every day at 2:00 UTC workflow_dispatch: jobs: fuzz_reader: name: Fuzzing Reader runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly - run: cargo install cargo-fuzz - run: cargo fuzz run safe_read --no-default-features -- -max_total_time=300 -max_len=512000 - run: cargo fuzz run safe_read_create --no-default-features -- -max_total_time=300 -max_len=512000 moxcms-0.7.7/.github/workflows/no-response.yml000064400000000000000000000023621046102023000175400ustar 00000000000000name: no-response on: schedule: - cron: '0 0 * * *' # Runs daily at midnight workflow_dispatch: jobs: noResponse: permissions: issues: write pull-requests: write runs-on: ubuntu-latest steps: - uses: actions/stale@v9 with: repo-token: ${{ github.token }} days-before-stale: -1 days-before-close: 14 only-labels: 'waiting for author' stale-issue-label: 'waiting for author' stale-pr-label: 'waiting for author' remove-stale-when-updated: true ignore-updates: false close-issue-message: This issue has been automatically closed due to inactivity. We requested additional information but have not received a response from the original author. Without the requested details, we cannot proceed. If you have or find the information needed, please comment so we can reopen the issue. close-pr-message: This pull request has been automatically closed due to inactivity. We requested additional information but have not received a response from the original author. Without the requested details, we cannot proceed. If you have the needed information or updates, please reopen the PR or comment so we can continue the review.moxcms-0.7.7/.github/workflows/publish_release.yml000064400000000000000000000007601046102023000204360ustar 00000000000000name: Create Release concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true on: push: tags: - '*' jobs: build_and_publish: name: Build runs-on: ubuntu-latest environment: Cargo steps: - uses: actions/checkout@v4 - uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Make a release env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_TOKEN }} run: cargo publish --manifest-path Cargo.tomlmoxcms-0.7.7/.gitignore000064400000000000000000000001471046102023000131370ustar 00000000000000/target Cargo.lock .idea app/target flamegraph.svg perf.data profile.json.gz .cargo rust-toolchain.tomlmoxcms-0.7.7/Cargo.lock0000644000000110530000000000100103300ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "cfg-if" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "getrandom" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "libc", "r-efi", "wasi", ] [[package]] name = "libc" version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "moxcms" version = "0.7.7" dependencies = [ "num-traits", "pxfm", "rand", ] [[package]] name = "num-traits" version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "ppv-lite86" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro2" version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] [[package]] name = "pxfm" version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3cbdf373972bf78df4d3b518d07003938e2c7d1fb5891e55f9cb6df57009d84" dependencies = [ "num-traits", ] [[package]] name = "quote" version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ "getrandom", ] [[package]] name = "syn" version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "unicode-ident" version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" [[package]] name = "wasi" version = "0.14.7+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" dependencies = [ "wasip2", ] [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ "wit-bindgen", ] [[package]] name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "zerocopy" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", "syn", ] moxcms-0.7.7/Cargo.toml0000644000000030350000000000100103540ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2024" rust-version = "1.85.0" name = "moxcms" version = "0.7.7" authors = ["Radzivon Bartoshyk"] build = false exclude = [ "*.jpg", "../../assets/*", "*.png", "*.icc", "./assets/*", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Simple Color Management in Rust" homepage = "https://github.com/awxkee/moxcms" documentation = "https://github.com/awxkee/moxcms" readme = "README.md" keywords = [ "icc", "cms", "color", "cmyk", ] categories = ["multimedia::images"] license = "BSD-3-Clause OR Apache-2.0" repository = "https://github.com/awxkee/moxcms.git" [package.metadata.docs.rs] all-features = true rustdoc-args = [ "--cfg", "docsrs", ] [features] avx = [] avx512 = [] default = [ "avx", "sse", "neon", ] neon = [] options = [] sse = [] [lib] name = "moxcms" path = "src/lib.rs" [dependencies.num-traits] version = "0.2" [dependencies.pxfm] version = "^0.1.1" [dev-dependencies.rand] version = "0.9" [profile.profiling] debug = 2 inherits = "release" moxcms-0.7.7/Cargo.toml.orig000064400000000000000000000026161046102023000140410ustar 00000000000000workspace = { members = ["app", "fuzz"] } [package] name = "moxcms" version = "0.7.7" edition = "2024" description = "Simple Color Management in Rust" readme = "./README.md" keywords = ["icc", "cms", "color", "cmyk"] license = "BSD-3-Clause OR Apache-2.0" authors = ["Radzivon Bartoshyk"] documentation = "https://github.com/awxkee/moxcms" categories = ["multimedia::images"] homepage = "https://github.com/awxkee/moxcms" repository = "https://github.com/awxkee/moxcms.git" exclude = ["*.jpg", "../../assets/*", "*.png", "*.icc", "./assets/*"] rust-version = "1.85.0" [dependencies] num-traits = "0.2" pxfm = "^0.1.1" [dev-dependencies] rand = "0.9" [features] # If no unsafe intrinsics active then `forbid(unsafe)` will be used. default = ["avx", "sse", "neon"] # Enables AVX2 acceleration where possible avx = [] # Enables SSE4.1 acceleration where possible sse = [] # Enables NEON intrinsics where possible neon = [] # Enables AVX-512 acceleration where possible. This will work only from 1.89 on stable. avx512 = [] # Allows configuring interpolation methods and LUT weights precision. # Disabled by default to prevent binary bloat. options = [] [package.metadata.docs.rs] # To build locally: # RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features --no-deps --open --manifest-path ./Cargo.toml all-features = true rustdoc-args = ["--cfg", "docsrs"] [profile.profiling] inherits = "release" debug = true moxcms-0.7.7/LICENSE-APACHE.md000064400000000000000000000261241046102023000134750ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2024 Radzivon Bartoshyk Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. moxcms-0.7.7/LICENSE.md000064400000000000000000000027421046102023000125560ustar 00000000000000Copyright (c) Radzivon Bartoshyk. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.moxcms-0.7.7/README.md000064400000000000000000000044651046102023000124350ustar 00000000000000# Rust ICC Management Fast and safe conversion between ICC profiles; in pure Rust. Supports CMYK⬌RGBX, RGBX⬌RGBX, RGBX⬌GRAY, LAB⬌RGBX and CMYK⬌LAB, GRAY⬌RGB, any 3/4 color profiles to RGB and vice versa. Also supports almost any to any Display Class ICC profiles up to 16 inks. ## Example ```rust let f_str = "./assets/dci_p3_profile.jpeg"; let file = File::open(f_str).expect("Failed to open file"); let img = image::ImageReader::open(f_str).unwrap().decode().unwrap(); let rgb = img.to_rgb8(); let mut decoder = JpegDecoder::new(BufReader::new(file)).unwrap(); let icc = decoder.icc_profile().unwrap().unwrap(); let color_profile = ColorProfile::new_from_slice(&icc).unwrap(); let dest_profile = ColorProfile::new_srgb(); let transform = color_profile .create_transform_8bit(&dest_profile, Layout::Rgb8, TransformOptions::default()) .unwrap(); let mut dst = vec![0u8; rgb.len()]; for (src, dst) in rgb .chunks_exact(img.width() as usize * 3) .zip(dst.chunks_exact_mut(img.dimensions().0 as usize * 3)) { transform .transform( &src[..img.dimensions().0 as usize * 3], &mut dst[..img.dimensions().0 as usize * 3], ) .unwrap(); } image::save_buffer( "v1.jpg", &dst, img.dimensions().0, img.dimensions().1, image::ExtendedColorType::Rgb8, ) .unwrap(); ``` ## Benchmarks ### ICC Transform 8-Bit Tests were ran with a 1997×1331 resolution image. | Conversion | time(NEON) | Time(AVX2) | |--------------------|:----------:|:----------:| | moxcms RGB⮕RGB | 2.68ms | 4.52ms | | moxcms LUT RGB⮕RGB | 7.18ms | 17.50ms | | moxcms RGBA⮕RGBA | 2.96ms | 4.83ms | | moxcms CMYK⮕RGBA | 11.86ms | 27.98ms | | lcms2 RGB⮕RGB | 13.1ms | 27.73ms | | lcms2 LUT RGB⮕RGB | 27.60ms | 58.26ms | | lcms2 RGBA⮕RGBA | 21.97ms | 35.70ms | | lcms2 CMYK⮕RGBA | 39.71ms | 79.40ms | | qcms RGB⮕RGB | 6.47ms | 4.59ms | | qcms LUT RGB⮕RGB | 26.72ms | 60.80ms | | qcms RGBA⮕RGBA | 6.83ms | 4.99ms | | qcms CMYK⮕RGBA | 25.97ms | 61.54ms | ## License This project is licensed under either of - BSD-3-Clause License (see [LICENSE](LICENSE.md)) - Apache License, Version 2.0 (see [LICENSE](LICENSE-APACHE.md)) at your option. moxcms-0.7.7/src/chad.rs000064400000000000000000000127571046102023000132150ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::matrix::{Matrix3f, Vector3f, Xyz}; use crate::{Chromaticity, Matrix3d, Vector3d, XyY}; pub(crate) const BRADFORD_D: Matrix3d = Matrix3d { v: [ [0.8951, 0.2664, -0.1614], [-0.7502, 1.7135, 0.0367], [0.0389, -0.0685, 1.0296], ], }; pub(crate) const BRADFORD_F: Matrix3f = BRADFORD_D.to_f32(); #[inline] pub(crate) const fn compute_chromatic_adaption( source_white_point: Xyz, dest_white_point: Xyz, chad: Matrix3f, ) -> Matrix3f { let cone_source_xyz = Vector3f { v: [ source_white_point.x, source_white_point.y, source_white_point.z, ], }; let cone_source_rgb = chad.mul_vector(cone_source_xyz); let cone_dest_xyz = Vector3f { v: [dest_white_point.x, dest_white_point.y, dest_white_point.z], }; let cone_dest_rgb = chad.mul_vector(cone_dest_xyz); let cone = Matrix3f { v: [ [cone_dest_rgb.v[0] / cone_source_rgb.v[0], 0., 0.], [0., cone_dest_rgb.v[1] / cone_source_rgb.v[1], 0.], [0., 0., cone_dest_rgb.v[2] / cone_source_rgb.v[2]], ], }; let chad_inv = chad.inverse(); let p0 = cone.mat_mul_const(chad); chad_inv.mat_mul_const(p0) } #[inline] pub(crate) const fn compute_chromatic_adaption_d( source_white_point: Xyz, dest_white_point: Xyz, chad: Matrix3d, ) -> Matrix3d { let cone_source_xyz = Vector3d { v: [ source_white_point.x as f64, source_white_point.y as f64, source_white_point.z as f64, ], }; let cone_source_rgb = chad.mul_vector(cone_source_xyz); let cone_dest_xyz = Vector3d { v: [ dest_white_point.x as f64, dest_white_point.y as f64, dest_white_point.z as f64, ], }; let cone_dest_rgb = chad.mul_vector(cone_dest_xyz); let cone = Matrix3d { v: [ [cone_dest_rgb.v[0] / cone_source_rgb.v[0], 0., 0.], [0., cone_dest_rgb.v[1] / cone_source_rgb.v[1], 0.], [0., 0., cone_dest_rgb.v[2] / cone_source_rgb.v[2]], ], }; let chad_inv = chad.inverse(); let p0 = cone.mat_mul_const(chad); chad_inv.mat_mul_const(p0) } pub const fn adaption_matrix(source_illumination: Xyz, target_illumination: Xyz) -> Matrix3f { compute_chromatic_adaption(source_illumination, target_illumination, BRADFORD_F) } pub const fn adaption_matrix_d(source_illumination: Xyz, target_illumination: Xyz) -> Matrix3d { compute_chromatic_adaption_d(source_illumination, target_illumination, BRADFORD_D) } pub const fn adapt_to_d50(r: Matrix3f, source_white_pt: XyY) -> Matrix3f { adapt_to_illuminant(r, source_white_pt, Chromaticity::D50.to_xyz()) } pub const fn adapt_to_d50_d(r: Matrix3d, source_white_pt: XyY) -> Matrix3d { adapt_to_illuminant_d(r, source_white_pt, Chromaticity::D50.to_xyz()) } pub const fn adapt_to_illuminant( r: Matrix3f, source_white_pt: XyY, illuminant_xyz: Xyz, ) -> Matrix3f { let bradford = adaption_matrix(source_white_pt.to_xyz(), illuminant_xyz); bradford.mat_mul_const(r) } pub const fn adapt_to_illuminant_d( r: Matrix3d, source_white_pt: XyY, illuminant_xyz: Xyz, ) -> Matrix3d { let bradford = adaption_matrix_d(source_white_pt.to_xyz(), illuminant_xyz); bradford.mat_mul_const(r) } pub const fn adapt_to_illuminant_xyz( r: Matrix3f, source_white_pt: Xyz, illuminant_xyz: Xyz, ) -> Matrix3f { if source_white_pt.y == 0.0 { return r; } let bradford = adaption_matrix(source_white_pt, illuminant_xyz); bradford.mat_mul_const(r) } pub const fn adapt_to_illuminant_xyz_d( r: Matrix3d, source_white_pt: Xyz, illuminant_xyz: Xyz, ) -> Matrix3d { if source_white_pt.y == 0.0 { return r; } let bradford = adaption_matrix_d(source_white_pt, illuminant_xyz); bradford.mat_mul_const(r) } moxcms-0.7.7/src/chromaticity.rs000064400000000000000000000121601046102023000150010ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::{CmsError, XyY, XyYRepresentable, Xyz, Xyzd}; #[derive(Clone, Debug, Copy)] #[repr(C)] pub struct Chromaticity { pub x: f32, pub y: f32, } impl Chromaticity { #[inline] pub const fn new(x: f32, y: f32) -> Self { Self { x, y } } /// Converts this chromaticity (`x`, `y`) to a tristimulus [`Xyz`] value, /// normalized such that `y = 1.0`. #[inline] pub const fn to_xyz(&self) -> Xyz { let reciprocal = if self.y != 0. { 1. / self.y } else { 0. }; Xyz { x: self.x * reciprocal, y: 1f32, z: (1f32 - self.x - self.y) * reciprocal, } } /// Get the color representation with component sum `1`. /// /// In contrast to the XYZ representation defined through setting `Y` to a known /// value (such as `1` in [`Self::to_xyz`]) this representation can be uniquely /// derived from the `xy` coordinates with no ambiguities. It is scaled from the /// original XYZ color by diving by `X + Y + Z`. Note that, in particular, this /// method is well-defined even if the original color had pure chromamatic /// information with no luminance (Y = `0`) and will preserve that information, /// whereas [`Self::to_xyz`] is ill-defined and returns an incorrect value. #[inline] pub const fn to_scaled_xyzd(&self) -> Xyzd { let z = 1.0 - self.x as f64 - self.y as f64; Xyzd::new(self.x as f64, self.y as f64, z) } /// Get the color representation with component sum `1`. /// /// In contrast to the XYZ representation defined through setting `Y` to a known /// value (such as `1` in [`Self::to_xyz`]) this representation can be uniquely /// derived from the `xy` coordinates with no ambiguities. It is scaled from the /// original XYZ color by diving by `X + Y + Z`. Note that, in particular, this /// method is well-defined even if the original color had pure chromamatic /// information with no luminance (Y = `0`) and will preserve that information, /// whereas [`Self::to_xyz`] is ill-defined and returns an incorrect value. #[inline] pub const fn to_scaled_xyz(&self) -> Xyz { let z = 1.0 - self.x - self.y; Xyz::new(self.x, self.y, z) } #[inline] pub const fn to_xyzd(&self) -> Xyzd { let reciprocal = if self.y != 0. { 1. / self.y } else { 0. }; Xyzd { x: self.x as f64 * reciprocal as f64, y: 1f64, z: (1f64 - self.x as f64 - self.y as f64) * reciprocal as f64, } } #[inline] pub const fn to_xyyb(&self) -> XyY { XyY { x: self.x as f64, y: self.y as f64, yb: 1., } } pub const D65: Chromaticity = Chromaticity { x: 0.31272, y: 0.32903, }; pub const D50: Chromaticity = Chromaticity { x: 0.34567, y: 0.35850, }; } impl XyYRepresentable for Chromaticity { fn to_xyy(self) -> XyY { self.to_xyyb() } } impl TryFrom for Chromaticity { type Error = CmsError; #[inline] fn try_from(xyz: Xyz) -> Result { let sum = xyz.x + xyz.y + xyz.z; // Avoid division by zero or invalid XYZ values if sum == 0.0 { return Err(CmsError::DivisionByZero); } let rec = 1f32 / sum; let chromaticity_x = xyz.x * rec; let chromaticity_y = xyz.y * rec; Ok(Chromaticity { x: chromaticity_x, y: chromaticity_y, }) } } moxcms-0.7.7/src/cicp.rs000064400000000000000000000633701046102023000132310ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::gamma::{ bt1361_to_linear, hlg_to_linear, iec61966_to_linear, log100_sqrt10_to_linear, log100_to_linear, pq_to_linear, smpte240_to_linear, smpte428_to_linear, }; use crate::{ Chromaticity, ColorProfile, Matrix3d, Matrix3f, XyYRepresentable, err::CmsError, trc::{ToneReprCurve, build_trc_table, curve_from_gamma}, }; use std::convert::TryFrom; /// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 2 /// Values 0, 3, 13–21, 23–255 are all reserved so all map to the same variant #[derive(Clone, Copy, Debug, PartialEq)] pub enum CicpColorPrimaries { /// For future use by ITU-T | ISO/IEC Reserved, /// Rec. ITU-R BT.709-6
/// Rec. ITU-R BT.1361-0 conventional colour gamut system and extended colour gamut system (historical)
/// IEC 61966-2-1 sRGB or sYCC IEC 61966-2-4
/// Society of Motion Picture and Television Engineers (MPTE) RP 177 (1993) Annex B
Bt709 = 1, /// Unspecified
/// Image characteristics are unknown or are determined by the application. Unspecified = 2, /// Rec. ITU-R BT.470-6 System M (historical)
/// United States National Television System Committee 1953 Recommendation for transmission standards for color television
/// United States Federal Communications Commission (2003) Title 47 Code of Federal Regulations 73.682 (a) (20)
Bt470M = 4, /// Rec. ITU-R BT.470-6 System B, G (historical) Rec. ITU-R BT.601-7 625
/// Rec. ITU-R BT.1358-0 625 (historical)
/// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM
Bt470Bg = 5, /// Rec. ITU-R BT.601-7 525
/// Rec. ITU-R BT.1358-1 525 or 625 (historical) Rec. ITU-R BT.1700-0 NTSC
/// SMPTE 170M (2004)
/// (functionally the same as the value 7)
Bt601 = 6, /// SMPTE 240M (1999) (historical) (functionally the same as the value 6)
Smpte240 = 7, /// Generic film (colour filters using Illuminant C)
GenericFilm = 8, /// Rec. ITU-R BT.2020-2
/// Rec. ITU-R BT.2100-0
Bt2020 = 9, /// SMPTE ST 428-1
/// (CIE 1931 XYZ as in ISO 11664-1)
Xyz = 10, /// SMPTE RP 431-2 (2011)
Smpte431 = 11, /// SMPTE EG 432-1 (2010)
Smpte432 = 12, /// EBU Tech. 3213-E (1975)
Ebu3213 = 22, } impl TryFrom for CicpColorPrimaries { type Error = CmsError; #[allow(unreachable_patterns)] fn try_from(value: u8) -> Result { match value { // Values 0, 3, 13–21, 23–255 are all reserved so all map to the // same variant. 0 | 3 | 13..=21 | 23..=255 => Ok(Self::Reserved), 1 => Ok(Self::Bt709), 2 => Ok(Self::Unspecified), 4 => Ok(Self::Bt470M), 5 => Ok(Self::Bt470Bg), 6 => Ok(Self::Bt601), 7 => Ok(Self::Smpte240), 8 => Ok(Self::GenericFilm), 9 => Ok(Self::Bt2020), 10 => Ok(Self::Xyz), 11 => Ok(Self::Smpte431), 12 => Ok(Self::Smpte432), 22 => Ok(Self::Ebu3213), _ => Err(CmsError::InvalidCicp), } } } #[derive(Clone, Copy, Debug)] #[repr(C)] pub struct ColorPrimaries { pub red: Chromaticity, pub green: Chromaticity, pub blue: Chromaticity, } /// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 2. impl ColorPrimaries { /// [ACEScg](https://en.wikipedia.org/wiki/Academy_Color_Encoding_System#ACEScg). pub const ACES_CG: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.713, y: 0.293 }, green: Chromaticity { x: 0.165, y: 0.830 }, blue: Chromaticity { x: 0.128, y: 0.044 }, }; /// [ACES2065-1](https://en.wikipedia.org/wiki/Academy_Color_Encoding_System#ACES2065-1). pub const ACES_2065_1: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.7347, y: 0.2653, }, green: Chromaticity { x: 0.0000, y: 1.0000, }, blue: Chromaticity { x: 0.0001, y: -0.0770, }, }; /// [Adobe RGB](https://en.wikipedia.org/wiki/Adobe_RGB_color_space) (1998). pub const ADOBE_RGB: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.64, y: 0.33 }, green: Chromaticity { x: 0.21, y: 0.71 }, blue: Chromaticity { x: 0.15, y: 0.06 }, }; /// [DCI P3](https://en.wikipedia.org/wiki/DCI-P3#DCI_P3). /// /// This is the same as [`DISPLAY_P3`](Self::DISPLAY_P3), /// [`SMPTE_431`](Self::SMPTE_431) and [`SMPTE_432`](Self::SMPTE_432). pub const DCI_P3: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.680, y: 0.320 }, green: Chromaticity { x: 0.265, y: 0.690 }, blue: Chromaticity { x: 0.150, y: 0.060 }, }; /// [Diplay P3](https://en.wikipedia.org/wiki/DCI-P3#Display_P3). /// /// This is the same as [`DCI_P3`](Self::DCI_P3), /// [`SMPTE_431`](Self::SMPTE_431) and [`SMPTE_432`](Self::SMPTE_432). pub const DISPLAY_P3: ColorPrimaries = Self::DCI_P3; /// SMPTE RP 431-2 (2011). /// /// This is the same as [`DCI_P3`](Self::DCI_P3), /// [`DISPLAY_P3`](Self::DISPLAY_P3) and [`SMPTE_432`](Self::SMPTE_432). pub const SMPTE_431: ColorPrimaries = Self::DCI_P3; /// SMPTE EG 432-1 (2010). /// /// This is the same as [`DCI_P3`](Self::DCI_P3), /// [`DISPLAY_P3`](Self::DISPLAY_P3) and [`SMPTE_431`](Self::SMPTE_431). pub const SMPTE_432: ColorPrimaries = Self::DCI_P3; /// [ProPhoto RGB](https://en.wikipedia.org/wiki/ProPhoto_RGB_color_space). pub const PRO_PHOTO_RGB: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.734699, y: 0.265301, }, green: Chromaticity { x: 0.159597, y: 0.840403, }, blue: Chromaticity { x: 0.036598, y: 0.000105, }, }; /// Rec. ITU-R BT.709-6 /// /// Rec. ITU-R BT.1361-0 conventional colour gamut system and extended /// colour gamut system (historical). /// /// IEC 61966-2-1 sRGB or sYCC IEC 61966-2-4). /// /// Society of Motion Picture and Television Engineers (MPTE) RP 177 (1993) Annex B. pub const BT_709: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.64, y: 0.33 }, green: Chromaticity { x: 0.30, y: 0.60 }, blue: Chromaticity { x: 0.15, y: 0.06 }, }; /// Rec. ITU-R BT.470-6 System M (historical). /// /// United States National Television System Committee 1953 Recommendation /// for transmission standards for color television. /// /// United States Federal Communications Commission (2003) Title 47 Code of /// Federal Regulations 73.682 (a) (20). pub const BT_470M: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.67, y: 0.33 }, green: Chromaticity { x: 0.21, y: 0.71 }, blue: Chromaticity { x: 0.14, y: 0.08 }, }; /// Rec. ITU-R BT.470-6 System B, G (historical) Rec. ITU-R BT.601-7 625. /// /// Rec. ITU-R BT.1358-0 625 (historical). /// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM. pub const BT_470BG: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.64, y: 0.33 }, green: Chromaticity { x: 0.29, y: 0.60 }, blue: Chromaticity { x: 0.15, y: 0.06 }, }; /// Rec. ITU-R BT.601-7 525. /// /// Rec. ITU-R BT.1358-1 525 or 625 (historical) Rec. ITU-R BT.1700-0 NTSC. /// /// SMPTE 170M (2004) (functionally the same as the [`SMPTE_240`](Self::SMPTE_240)). pub const BT_601: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.630, y: 0.340 }, green: Chromaticity { x: 0.310, y: 0.595 }, blue: Chromaticity { x: 0.155, y: 0.070 }, }; /// SMPTE 240M (1999) (historical) (functionally the same as [`BT_601`](Self::BT_601)). pub const SMPTE_240: ColorPrimaries = Self::BT_601; /// Generic film (colour filters using Illuminant C). pub const GENERIC_FILM: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.681, y: 0.319 }, green: Chromaticity { x: 0.243, y: 0.692 }, blue: Chromaticity { x: 0.145, y: 0.049 }, }; /// Rec. ITU-R BT.2020-2. /// /// Rec. ITU-R BT.2100-0. pub const BT_2020: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.708, y: 0.292 }, green: Chromaticity { x: 0.170, y: 0.797 }, blue: Chromaticity { x: 0.131, y: 0.046 }, }; /// SMPTE ST 428-1 (CIE 1931 XYZ as in ISO 11664-1). pub const XYZ: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 1.0, y: 0.0 }, green: Chromaticity { x: 0.0, y: 1.0 }, blue: Chromaticity { x: 0.0, y: 0.0 }, }; /// EBU Tech. 3213-E (1975). pub const EBU_3213: ColorPrimaries = ColorPrimaries { red: Chromaticity { x: 0.630, y: 0.340 }, green: Chromaticity { x: 0.295, y: 0.605 }, blue: Chromaticity { x: 0.155, y: 0.077 }, }; } impl ColorPrimaries { /// Returns RGB -> XYZ conversion matrix /// /// # Arguments /// /// * `white_point`: [Chromaticity] or [crate::XyY] or any item conforming [XyYRepresentable] /// /// returns: [Matrix3d] pub fn transform_to_xyz_d(self, white_point: impl XyYRepresentable) -> Matrix3d { let red_xyz = self.red.to_scaled_xyzd(); let green_xyz = self.green.to_scaled_xyzd(); let blue_xyz = self.blue.to_scaled_xyzd(); let xyz_matrix = Matrix3d { v: [ [red_xyz.x, green_xyz.x, blue_xyz.x], [red_xyz.y, green_xyz.y, blue_xyz.y], [red_xyz.z, green_xyz.z, blue_xyz.z], ], }; ColorProfile::rgb_to_xyz_d(xyz_matrix, white_point.to_xyy().to_xyzd()) } /// Returns RGB -> XYZ conversion matrix /// /// # Arguments /// /// * `white_point`: [Chromaticity] or [crate::XyY] or any item conforming [XyYRepresentable] /// /// returns: [Matrix3f] pub fn transform_to_xyz(self, white_point: impl XyYRepresentable) -> Matrix3f { let red_xyz = self.red.to_scaled_xyz(); let green_xyz = self.green.to_scaled_xyz(); let blue_xyz = self.blue.to_scaled_xyz(); let xyz_matrix = Matrix3f { v: [ [red_xyz.x, green_xyz.x, blue_xyz.x], [red_xyz.y, green_xyz.y, blue_xyz.y], [red_xyz.z, green_xyz.z, blue_xyz.z], ], }; ColorProfile::rgb_to_xyz_static(xyz_matrix, white_point.to_xyy().to_xyz()) } } /// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 3 /// Values 0, 3, 19–255 are all reserved so all map to the same variant #[derive(Clone, Copy, Debug, PartialEq)] pub enum TransferCharacteristics { /// For future use by ITU-T | ISO/IEC Reserved, /// Rec. ITU-R BT.709-6
/// Rec. ITU-R BT.1361-0 conventional colour gamut system (historical)
/// (functionally the same as the values 6, 14 and 15)
Bt709 = 1, /// Image characteristics are unknown or are determined by the application.
Unspecified = 2, /// Rec. ITU-R BT.470-6 System M (historical)
/// United States National Television System Committee 1953 Recommendation for transmission standards for color television
/// United States Federal Communications Commission (2003) Title 47 Code of Federal Regulations 73.682 (a) (20)
/// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM
Bt470M = 4, /// Rec. ITU-R BT.470-6 System B, G (historical)
Bt470Bg = 5, /// Rec. ITU-R BT.601-7 525 or 625
/// Rec. ITU-R BT.1358-1 525 or 625 (historical)
/// Rec. ITU-R BT.1700-0 NTSC SMPTE 170M (2004)
/// (functionally the same as the values 1, 14 and 15)
Bt601 = 6, /// SMPTE 240M (1999) (historical)
Smpte240 = 7, /// Linear transfer characteristics
Linear = 8, /// Logarithmic transfer characteristic (100:1 range)
Log100 = 9, /// Logarithmic transfer characteristic (100 * Sqrt( 10 ) : 1 range)
Log100sqrt10 = 10, /// IEC 61966-2-4
Iec61966 = 11, /// Rec. ITU-R BT.1361-0 extended colour gamut system (historical)
Bt1361 = 12, /// IEC 61966-2-1 sRGB or sYCC
Srgb = 13, /// Rec. ITU-R BT.2020-2 (10-bit system)
/// (functionally the same as the values 1, 6 and 15)
Bt202010bit = 14, /// Rec. ITU-R BT.2020-2 (12-bit system)
/// (functionally the same as the values 1, 6 and 14)
Bt202012bit = 15, /// SMPTE ST 2084 for 10-, 12-, 14- and 16-bitsystems
/// Rec. ITU-R BT.2100-0 perceptual quantization (PQ) system
Smpte2084 = 16, /// SMPTE ST 428-1
Smpte428 = 17, /// ARIB STD-B67
/// Rec. ITU-R BT.2100-0 hybrid log- gamma (HLG) system
Hlg = 18, } impl TryFrom for TransferCharacteristics { type Error = CmsError; #[allow(unreachable_patterns)] fn try_from(value: u8) -> Result { match value { 0 | 3 | 19..=255 => Ok(Self::Reserved), 1 => Ok(Self::Bt709), 2 => Ok(Self::Unspecified), 4 => Ok(Self::Bt470M), 5 => Ok(Self::Bt470Bg), 6 => Ok(Self::Bt601), 7 => Ok(Self::Smpte240), // unimplemented 8 => Ok(Self::Linear), 9 => Ok(Self::Log100), 10 => Ok(Self::Log100sqrt10), 11 => Ok(Self::Iec61966), // unimplemented 12 => Ok(Self::Bt1361), // unimplemented 13 => Ok(Self::Srgb), 14 => Ok(Self::Bt202010bit), 15 => Ok(Self::Bt202012bit), 16 => Ok(Self::Smpte2084), 17 => Ok(Self::Smpte428), // unimplemented 18 => Ok(Self::Hlg), _ => Err(CmsError::InvalidCicp), } } } impl CicpColorPrimaries { pub(crate) const fn has_chromaticity(self) -> bool { self as u8 != Self::Reserved as u8 && self as u8 != Self::Unspecified as u8 } pub(crate) const fn white_point(self) -> Result { Ok(match self { Self::Reserved => return Err(CmsError::UnsupportedColorPrimaries(self as u8)), Self::Bt709 | Self::Bt470Bg | Self::Bt601 | Self::Smpte240 | Self::Bt2020 | Self::Smpte432 | Self::Ebu3213 => Chromaticity::D65, Self::Unspecified => return Err(CmsError::UnsupportedColorPrimaries(self as u8)), Self::Bt470M => Chromaticity { x: 0.310, y: 0.316 }, Self::GenericFilm => Chromaticity { x: 0.310, y: 0.316 }, Self::Xyz => Chromaticity { x: 1. / 3., y: 1. / 3., }, Self::Smpte431 => Chromaticity { x: 0.314, y: 0.351 }, }) } } impl TryFrom for ColorPrimaries { type Error = CmsError; fn try_from(value: CicpColorPrimaries) -> Result { match value { CicpColorPrimaries::Reserved => Err(CmsError::UnsupportedColorPrimaries(value as u8)), CicpColorPrimaries::Bt709 => Ok(ColorPrimaries::BT_709), CicpColorPrimaries::Unspecified => { Err(CmsError::UnsupportedColorPrimaries(value as u8)) } CicpColorPrimaries::Bt470M => Ok(ColorPrimaries::BT_470M), CicpColorPrimaries::Bt470Bg => Ok(ColorPrimaries::BT_470BG), CicpColorPrimaries::Bt601 | CicpColorPrimaries::Smpte240 => Ok(ColorPrimaries::BT_601), CicpColorPrimaries::GenericFilm => Ok(ColorPrimaries::GENERIC_FILM), CicpColorPrimaries::Bt2020 => Ok(ColorPrimaries::BT_2020), CicpColorPrimaries::Xyz => Ok(ColorPrimaries::XYZ), // These two share primaries, but have distinct white points CicpColorPrimaries::Smpte431 | CicpColorPrimaries::Smpte432 => { Ok(ColorPrimaries::SMPTE_431) } CicpColorPrimaries::Ebu3213 => Ok(ColorPrimaries::EBU_3213), } } } impl TransferCharacteristics { pub(crate) fn has_transfer_curve(self) -> bool { self != Self::Reserved && self != Self::Unspecified } } pub(crate) fn create_rec709_parametric() -> [f32; 5] { const POW_EXP: f32 = 0.45; const G: f32 = 1. / POW_EXP; const B: f32 = (0.09929682680944f64 / 1.09929682680944f64) as f32; const C: f32 = 1f32 / 4.5f32; const D: f32 = (4.5f64 * 0.018053968510807f64) as f32; const A: f32 = (1. / 1.09929682680944f64) as f32; [G, A, B, C, D] } impl TryFrom for ToneReprCurve { type Error = CmsError; /// See [ICC.1:2010](https://www.color.org/specification/ICC1v43_2010-12.pdf) /// See [Rec. ITU-R BT.2100-2](https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-E.pdf) fn try_from(value: TransferCharacteristics) -> Result { const NUM_TRC_TABLE_ENTRIES: i32 = 1024; Ok(match value { TransferCharacteristics::Reserved => { return Err(CmsError::UnsupportedTrc(value as u8)); } TransferCharacteristics::Bt709 | TransferCharacteristics::Bt601 | TransferCharacteristics::Bt202010bit | TransferCharacteristics::Bt202012bit => { // The opto-electronic transfer characteristic function (OETF) // as defined in ITU-T H.273 table 3, row 1: // // V = (α * Lc^0.45) − (α − 1) for 1 >= Lc >= β // V = 4.500 * Lc for β > Lc >= 0 // // Inverting gives the electro-optical transfer characteristic // function (EOTF) which can be represented as ICC // parametricCurveType with 4 parameters (ICC.1:2010 Table 5). // Converting between the two (Lc ↔︎ Y, V ↔︎ X): // // Y = (a * X + b)^g for (X >= d) // Y = c * X for (X < d) // // g, a, b, c, d can then be defined in terms of α and β: // // g = 1 / 0.45 // a = 1 / α // b = 1 - α // c = 1 / 4.500 // d = 4.500 * β // // α and β are determined by solving the piecewise equations to // ensure continuity of both value and slope at the value β. // We use the values specified for 10-bit systems in // https://www.itu.int/rec/R-REC-BT.2020-2-201510-I Table 4 // since this results in the similar values as available ICC // profiles after converting to s15Fixed16Number, providing us // good test coverage. ToneReprCurve::Parametric(create_rec709_parametric().to_vec()) } TransferCharacteristics::Unspecified => { return Err(CmsError::UnsupportedTrc(value as u8)); } TransferCharacteristics::Bt470M => curve_from_gamma(2.2), TransferCharacteristics::Bt470Bg => curve_from_gamma(2.8), TransferCharacteristics::Smpte240 => { let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, smpte240_to_linear); ToneReprCurve::Lut(table) } TransferCharacteristics::Linear => curve_from_gamma(1.), TransferCharacteristics::Log100 => { let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, log100_to_linear); ToneReprCurve::Lut(table) } TransferCharacteristics::Log100sqrt10 => { let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, log100_sqrt10_to_linear); ToneReprCurve::Lut(table) } TransferCharacteristics::Iec61966 => { let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, iec61966_to_linear); ToneReprCurve::Lut(table) } TransferCharacteristics::Bt1361 => { let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, bt1361_to_linear); ToneReprCurve::Lut(table) } TransferCharacteristics::Srgb => { ToneReprCurve::Parametric(vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]) } TransferCharacteristics::Smpte2084 => { let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, pq_to_linear); ToneReprCurve::Lut(table) } TransferCharacteristics::Smpte428 => { let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, smpte428_to_linear); ToneReprCurve::Lut(table) } TransferCharacteristics::Hlg => { let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, hlg_to_linear); ToneReprCurve::Lut(table) } }) } } /// Matrix Coefficients Enum (from ISO/IEC 23091-4 / MPEG CICP) #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(C)] pub enum MatrixCoefficients { Identity = 0, // RGB (Identity matrix) Bt709 = 1, // Rec. 709 Unspecified = 2, // Unspecified Reserved = 3, // Reserved Fcc = 4, // FCC Bt470Bg = 5, // BT.470BG / BT.601-625 Smpte170m = 6, // SMPTE 170M / BT.601-525 Smpte240m = 7, // SMPTE 240M YCgCo = 8, // YCgCo Bt2020Ncl = 9, // BT.2020 (non-constant luminance) Bt2020Cl = 10, // BT.2020 (constant luminance) Smpte2085 = 11, // SMPTE ST 2085 ChromaticityDerivedNCL = 12, // Chromaticity-derived non-constant luminance ChromaticityDerivedCL = 13, // Chromaticity-derived constant luminance ICtCp = 14, // ICtCp } impl TryFrom for MatrixCoefficients { type Error = CmsError; fn try_from(value: u8) -> Result { match value { 0 => Ok(MatrixCoefficients::Identity), 1 => Ok(MatrixCoefficients::Bt709), 2 => Ok(MatrixCoefficients::Unspecified), 3 => Ok(MatrixCoefficients::Reserved), 4 => Ok(MatrixCoefficients::Fcc), 5 => Ok(MatrixCoefficients::Bt470Bg), 6 => Ok(MatrixCoefficients::Smpte170m), 7 => Ok(MatrixCoefficients::Smpte240m), 8 => Ok(MatrixCoefficients::YCgCo), 9 => Ok(MatrixCoefficients::Bt2020Ncl), 10 => Ok(MatrixCoefficients::Bt2020Cl), 11 => Ok(MatrixCoefficients::Smpte2085), 12 => Ok(MatrixCoefficients::ChromaticityDerivedNCL), 13 => Ok(MatrixCoefficients::ChromaticityDerivedCL), 14 => Ok(MatrixCoefficients::ICtCp), _ => Err(CmsError::InvalidCicp), } } } #[cfg(test)] mod tests { use super::*; use crate::WHITE_POINT_D65; #[test] fn test_to_xyz_using_absolute_coordinates() { let conversion_matrix = ColorPrimaries::BT_709.transform_to_xyz_d(WHITE_POINT_D65); assert!((conversion_matrix.v[0][0] - 0.4121524015214193).abs() < 1e-14); assert!((conversion_matrix.v[1][1] - 0.7153537403945436).abs() < 1e-14); assert!((conversion_matrix.v[2][2] - 0.9497138466283235).abs() < 1e-14); } #[test] fn test_to_xyz_using_absolute_coordinates_xyz() { let conversion_matrix = ColorPrimaries::XYZ.transform_to_xyz_d(WHITE_POINT_D65); assert!((conversion_matrix.v[0][0] - 0.95015469385536477).abs() < 1e-14); assert!((conversion_matrix.v[1][1] - 1.0).abs() < 1e-14); assert!((conversion_matrix.v[2][2] - 1.0882590676722474).abs() < 1e-14); } #[test] fn test_to_xyz_using_absolute_coordinates_f() { let conversion_matrix = ColorPrimaries::BT_709.transform_to_xyz(WHITE_POINT_D65); assert!((conversion_matrix.v[0][0] - 0.4121524015214193).abs() < 1e-5); assert!((conversion_matrix.v[1][1] - 0.7153537403945436).abs() < 1e-5); assert!((conversion_matrix.v[2][2] - 0.9497138466283235).abs() < 1e-5); } } moxcms-0.7.7/src/conversions/avx/interpolator.rs000064400000000000000000000765501046102023000202070ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::interpolator::BarycentricWeight; use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd}; use std::arch::x86_64::*; use std::ops::{Add, Mul, Sub}; #[repr(align(16), C)] pub(crate) struct SseAlignedF32(pub(crate) [f32; 4]); #[cfg(feature = "options")] pub(crate) struct TetrahedralAvxFma {} #[cfg(feature = "options")] pub(crate) struct PyramidalAvxFma {} #[cfg(feature = "options")] pub(crate) struct PrismaticAvxFma {} pub(crate) struct TrilinearAvxFma {} #[cfg(feature = "options")] pub(crate) struct PrismaticAvxFmaDouble {} pub(crate) struct TrilinearAvxFmaDouble {} #[cfg(feature = "options")] pub(crate) struct PyramidAvxFmaDouble {} #[cfg(feature = "options")] pub(crate) struct TetrahedralAvxFmaDouble {} pub(crate) trait AvxMdInterpolationDouble { fn inter3_sse( &self, table0: &[SseAlignedF32], table1: &[SseAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (AvxVectorSse, AvxVectorSse); } pub(crate) trait AvxMdInterpolation { fn inter3_sse( &self, table: &[SseAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> AvxVectorSse; } trait Fetcher { fn fetch(&self, x: i32, y: i32, z: i32) -> T; } #[derive(Copy, Clone)] #[repr(transparent)] pub(crate) struct AvxVectorSse { pub(crate) v: __m128, } #[derive(Copy, Clone)] #[repr(transparent)] pub(crate) struct AvxVector { pub(crate) v: __m256, } impl AvxVector { #[inline(always)] pub(crate) fn from_sse(lo: AvxVectorSse, hi: AvxVectorSse) -> AvxVector { unsafe { AvxVector { v: _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(lo.v), hi.v), } } } #[inline(always)] pub(crate) fn split(self) -> (AvxVectorSse, AvxVectorSse) { unsafe { ( AvxVectorSse { v: _mm256_castps256_ps128(self.v), }, AvxVectorSse { v: _mm256_extractf128_ps::<1>(self.v), }, ) } } } impl From for AvxVectorSse { #[inline(always)] fn from(v: f32) -> Self { AvxVectorSse { v: unsafe { _mm_set1_ps(v) }, } } } impl From for AvxVector { #[inline(always)] fn from(v: f32) -> Self { AvxVector { v: unsafe { _mm256_set1_ps(v) }, } } } impl Sub for AvxVectorSse { type Output = Self; #[inline(always)] fn sub(self, rhs: AvxVectorSse) -> Self::Output { AvxVectorSse { v: unsafe { _mm_sub_ps(self.v, rhs.v) }, } } } impl Sub for AvxVector { type Output = Self; #[inline(always)] fn sub(self, rhs: AvxVector) -> Self::Output { AvxVector { v: unsafe { _mm256_sub_ps(self.v, rhs.v) }, } } } impl Add for AvxVectorSse { type Output = Self; #[inline(always)] fn add(self, rhs: AvxVectorSse) -> Self::Output { AvxVectorSse { v: unsafe { _mm_add_ps(self.v, rhs.v) }, } } } impl Mul for AvxVectorSse { type Output = Self; #[inline(always)] fn mul(self, rhs: AvxVectorSse) -> Self::Output { AvxVectorSse { v: unsafe { _mm_mul_ps(self.v, rhs.v) }, } } } impl AvxVector { #[inline(always)] pub(crate) fn neg_mla(self, b: AvxVector, c: AvxVector) -> Self { Self { v: unsafe { _mm256_fnmadd_ps(b.v, c.v, self.v) }, } } } impl FusedMultiplyNegAdd for AvxVectorSse { #[inline(always)] fn neg_mla(&self, b: AvxVectorSse, c: AvxVectorSse) -> Self { Self { v: unsafe { _mm_fnmadd_ps(b.v, c.v, self.v) }, } } } impl Add for AvxVector { type Output = Self; #[inline(always)] fn add(self, rhs: AvxVector) -> Self::Output { AvxVector { v: unsafe { _mm256_add_ps(self.v, rhs.v) }, } } } impl Mul for AvxVector { type Output = Self; #[inline(always)] fn mul(self, rhs: AvxVector) -> Self::Output { AvxVector { v: unsafe { _mm256_mul_ps(self.v, rhs.v) }, } } } impl FusedMultiplyAdd for AvxVectorSse { #[inline(always)] fn mla(&self, b: AvxVectorSse, c: AvxVectorSse) -> AvxVectorSse { AvxVectorSse { v: unsafe { _mm_fmadd_ps(b.v, c.v, self.v) }, } } } impl FusedMultiplyAdd for AvxVector { #[inline(always)] fn mla(&self, b: AvxVector, c: AvxVector) -> AvxVector { AvxVector { v: unsafe { _mm256_fmadd_ps(b.v, c.v, self.v) }, } } } struct TetrahedralAvxSseFetchVector<'a, const GRID_SIZE: usize> { cube: &'a [SseAlignedF32], } struct TetrahedralAvxFetchVector<'a, const GRID_SIZE: usize> { cube0: &'a [SseAlignedF32], cube1: &'a [SseAlignedF32], } impl Fetcher for TetrahedralAvxFetchVector<'_, GRID_SIZE> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> AvxVector { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx0 = unsafe { self.cube0.get_unchecked(offset..) }; let jx1 = unsafe { self.cube1.get_unchecked(offset..) }; AvxVector { v: unsafe { _mm256_insertf128_ps::<1>( _mm256_castps128_ps256(_mm_load_ps(jx0.as_ptr() as *const f32)), _mm_load_ps(jx1.as_ptr() as *const f32), ) }, } } } impl Fetcher for TetrahedralAvxSseFetchVector<'_, GRID_SIZE> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> AvxVectorSse { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx = unsafe { self.cube.get_unchecked(offset..) }; AvxVectorSse { v: unsafe { _mm_load_ps(jx.as_ptr() as *const f32) }, } } } #[cfg(feature = "options")] impl TetrahedralAvxFma { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> AvxVectorSse { let lut_r = unsafe { lut.get_unchecked(in_r) }; let lut_g = unsafe { lut.get_unchecked(in_g) }; let lut_b = unsafe { lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = r.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z); c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n); c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, AvxVectorSse::from(rx)); let s1 = s0.mla(c2, AvxVectorSse::from(ry)); s1.mla(c3, AvxVectorSse::from(rz)) } } macro_rules! define_interp_avx { ($interpolator: ident) => { impl AvxMdInterpolation for $interpolator { fn inter3_sse( &self, table: &[SseAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> AvxVectorSse { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralAvxSseFetchVector:: { cube: table }, ) } } } }; } #[cfg(feature = "options")] macro_rules! define_interp_avx_d { ($interpolator: ident) => { impl AvxMdInterpolationDouble for $interpolator { fn inter3_sse( &self, table0: &[SseAlignedF32], table1: &[SseAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (AvxVectorSse, AvxVectorSse) { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralAvxSseFetchVector:: { cube: table0 }, TetrahedralAvxSseFetchVector:: { cube: table1 }, ) } } } }; } #[cfg(feature = "options")] define_interp_avx!(TetrahedralAvxFma); #[cfg(feature = "options")] define_interp_avx!(PyramidalAvxFma); #[cfg(feature = "options")] define_interp_avx!(PrismaticAvxFma); define_interp_avx!(TrilinearAvxFma); #[cfg(feature = "options")] define_interp_avx_d!(PrismaticAvxFmaDouble); #[cfg(feature = "options")] define_interp_avx_d!(PyramidAvxFmaDouble); #[cfg(feature = "options")] impl AvxMdInterpolationDouble for TetrahedralAvxFmaDouble { fn inter3_sse( &self, table0: &[SseAlignedF32], table1: &[SseAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (AvxVectorSse, AvxVectorSse) { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralAvxFetchVector:: { cube0: table0, cube1: table1, }, ) } } } impl AvxMdInterpolationDouble for TrilinearAvxFmaDouble { fn inter3_sse( &self, table0: &[SseAlignedF32], table1: &[SseAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (AvxVectorSse, AvxVectorSse) { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralAvxFetchVector:: { cube0: table0, cube1: table1, }, ) } } } #[cfg(feature = "options")] impl PyramidalAvxFma { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> AvxVectorSse { let lut_r = unsafe { lut.get_unchecked(in_r) }; let lut_g = unsafe { lut.get_unchecked(in_g) }; let lut_b = unsafe { lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); let w0 = AvxVectorSse::from(db); let w1 = AvxVectorSse::from(dr); let w2 = AvxVectorSse::from(dg); if dr > db && dg > db { let w3 = AvxVectorSse::from(dr * dg); let x0 = r.fetch(x_n, y_n, z_n); let x1 = r.fetch(x_n, y_n, z); let x2 = r.fetch(x_n, y, z); let x3 = r.fetch(x, y_n, z); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3) } else if db > dr && dg > dr { let w3 = AvxVectorSse::from(dg * db); let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y_n, z_n); let x2 = r.fetch(x, y_n, z_n); let x3 = r.fetch(x, y_n, z); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3) } else { let w3 = AvxVectorSse::from(db * dr); let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z); let x2 = r.fetch(x_n, y, z_n); let x3 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3) } } } #[cfg(feature = "options")] impl PrismaticAvxFma { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> AvxVectorSse { let lut_r = unsafe { lut.get_unchecked(in_r) }; let lut_g = unsafe { lut.get_unchecked(in_g) }; let lut_b = unsafe { lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); let w0 = AvxVectorSse::from(db); let w1 = AvxVectorSse::from(dr); let w2 = AvxVectorSse::from(dg); let w3 = AvxVectorSse::from(dg * db); let w4 = AvxVectorSse::from(dr * dg); if db > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x, y_n, z_n); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4) } else { let x0 = r.fetch(x_n, y, z); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x_n, y_n, z); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4) } } } #[cfg(feature = "options")] impl PrismaticAvxFmaDouble { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r0: impl Fetcher, r1: impl Fetcher, ) -> (AvxVectorSse, AvxVectorSse) { let lut_r = unsafe { lut.get_unchecked(in_r) }; let lut_g = unsafe { lut.get_unchecked(in_g) }; let lut_b = unsafe { lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0_0 = r0.fetch(x, y, z); let c0_1 = r0.fetch(x, y, z); let w0 = AvxVector::from(db); let w1 = AvxVector::from(dr); let w2 = AvxVector::from(dg); let w3 = AvxVector::from(dg * db); let w4 = AvxVector::from(dr * dg); let c0 = AvxVector::from_sse(c0_0, c0_1); if db > dr { let x0_0 = r0.fetch(x, y, z_n); let x1_0 = r0.fetch(x_n, y, z_n); let x2_0 = r0.fetch(x, y_n, z); let x3_0 = r0.fetch(x, y_n, z_n); let x4_0 = r0.fetch(x_n, y_n, z_n); let x0_1 = r1.fetch(x, y, z_n); let x1_1 = r1.fetch(x_n, y, z_n); let x2_1 = r1.fetch(x, y_n, z); let x3_1 = r1.fetch(x, y_n, z_n); let x4_1 = r1.fetch(x_n, y_n, z_n); let x0 = AvxVector::from_sse(x0_0, x0_1); let x1 = AvxVector::from_sse(x1_0, x1_1); let x2 = AvxVector::from_sse(x2_0, x2_1); let x3 = AvxVector::from_sse(x3_0, x3_1); let x4 = AvxVector::from_sse(x4_0, x4_1); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4).split() } else { let x0_0 = r0.fetch(x_n, y, z); let x1_0 = r0.fetch(x_n, y, z_n); let x2_0 = r0.fetch(x, y_n, z); let x3_0 = r0.fetch(x_n, y_n, z); let x4_0 = r0.fetch(x_n, y_n, z_n); let x0_1 = r1.fetch(x_n, y, z); let x1_1 = r1.fetch(x_n, y, z_n); let x2_1 = r1.fetch(x, y_n, z); let x3_1 = r1.fetch(x_n, y_n, z); let x4_1 = r1.fetch(x_n, y_n, z_n); let x0 = AvxVector::from_sse(x0_0, x0_1); let x1 = AvxVector::from_sse(x1_0, x1_1); let x2 = AvxVector::from_sse(x2_0, x2_1); let x3 = AvxVector::from_sse(x3_0, x3_1); let x4 = AvxVector::from_sse(x4_0, x4_1); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4).split() } } } #[cfg(feature = "options")] impl PyramidAvxFmaDouble { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r0: impl Fetcher, r1: impl Fetcher, ) -> (AvxVectorSse, AvxVectorSse) { let lut_r = unsafe { lut.get_unchecked(in_r) }; let lut_g = unsafe { lut.get_unchecked(in_g) }; let lut_b = unsafe { lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0_0 = r0.fetch(x, y, z); let c0_1 = r1.fetch(x, y, z); let w0 = AvxVector::from(db); let w1 = AvxVector::from(dr); let w2 = AvxVector::from(dg); let c0 = AvxVector::from_sse(c0_0, c0_1); if dr > db && dg > db { let w3 = AvxVector::from(dr * dg); let x0_0 = r0.fetch(x_n, y_n, z_n); let x1_0 = r0.fetch(x_n, y_n, z); let x2_0 = r0.fetch(x_n, y, z); let x3_0 = r0.fetch(x, y_n, z); let x0_1 = r1.fetch(x_n, y_n, z_n); let x1_1 = r1.fetch(x_n, y_n, z); let x2_1 = r1.fetch(x_n, y, z); let x3_1 = r1.fetch(x, y_n, z); let x0 = AvxVector::from_sse(x0_0, x0_1); let x1 = AvxVector::from_sse(x1_0, x1_1); let x2 = AvxVector::from_sse(x2_0, x2_1); let x3 = AvxVector::from_sse(x3_0, x3_1); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } else if db > dr && dg > dr { let w3 = AvxVector::from(dg * db); let x0_0 = r0.fetch(x, y, z_n); let x1_0 = r0.fetch(x_n, y_n, z_n); let x2_0 = r0.fetch(x, y_n, z_n); let x3_0 = r0.fetch(x, y_n, z); let x0_1 = r1.fetch(x, y, z_n); let x1_1 = r1.fetch(x_n, y_n, z_n); let x2_1 = r1.fetch(x, y_n, z_n); let x3_1 = r1.fetch(x, y_n, z); let x0 = AvxVector::from_sse(x0_0, x0_1); let x1 = AvxVector::from_sse(x1_0, x1_1); let x2 = AvxVector::from_sse(x2_0, x2_1); let x3 = AvxVector::from_sse(x3_0, x3_1); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } else { let w3 = AvxVector::from(db * dr); let x0_0 = r0.fetch(x, y, z_n); let x1_0 = r0.fetch(x_n, y, z); let x2_0 = r0.fetch(x_n, y, z_n); let x3_0 = r0.fetch(x_n, y_n, z_n); let x0_1 = r1.fetch(x, y, z_n); let x1_1 = r1.fetch(x_n, y, z); let x2_1 = r1.fetch(x_n, y, z_n); let x3_1 = r1.fetch(x_n, y_n, z_n); let x0 = AvxVector::from_sse(x0_0, x0_1); let x1 = AvxVector::from_sse(x1_0, x1_1); let x2 = AvxVector::from_sse(x2_0, x2_1); let x3 = AvxVector::from_sse(x3_0, x3_1); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } } } #[cfg(feature = "options")] impl TetrahedralAvxFmaDouble { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], rv: impl Fetcher, ) -> (AvxVectorSse, AvxVectorSse) { let lut_r = unsafe { lut.get_unchecked(in_r) }; let lut_g = unsafe { lut.get_unchecked(in_g) }; let lut_b = unsafe { lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = rv.fetch(x, y, z); let w0 = AvxVector::from(rx); let w1 = AvxVector::from(ry); let w2 = AvxVector::from(rz); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = rv.fetch(x_n, y, z) - c0; c2 = rv.fetch(x_n, y_n, z) - rv.fetch(x_n, y, z); c3 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = rv.fetch(x_n, y, z) - c0; c2 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y, z_n); c3 = rv.fetch(x_n, y, z_n) - rv.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = rv.fetch(x_n, y, z_n) - rv.fetch(x, y, z_n); c2 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y, z_n); c3 = rv.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = rv.fetch(x_n, y_n, z) - rv.fetch(x, y_n, z); c2 = rv.fetch(x, y_n, z) - c0; c3 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x, y_n, z_n); c2 = rv.fetch(x, y_n, z) - c0; c3 = rv.fetch(x, y_n, z_n) - rv.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x, y_n, z_n); c2 = rv.fetch(x, y_n, z_n) - rv.fetch(x, y, z_n); c3 = rv.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); s1.mla(c3, w2).split() } } impl TrilinearAvxFmaDouble { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], rv: impl Fetcher, ) -> (AvxVectorSse, AvxVectorSse) { let lut_r = unsafe { lut.get_unchecked(in_r) }; let lut_g = unsafe { lut.get_unchecked(in_g) }; let lut_b = unsafe { lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let w0 = AvxVector::from(rx); let w1 = AvxVector::from(ry); let w2 = AvxVector::from(rz); let c000 = rv.fetch(x, y, z); let c100 = rv.fetch(x_n, y, z); let c010 = rv.fetch(x, y_n, z); let c110 = rv.fetch(x_n, y_n, z); let c001 = rv.fetch(x, y, z_n); let c101 = rv.fetch(x_n, y, z_n); let c011 = rv.fetch(x, y_n, z_n); let c111 = rv.fetch(x_n, y_n, z_n); let dx = AvxVector::from(rx); let c00 = c000.neg_mla(c000, dx).mla(c100, w0); let c10 = c010.neg_mla(c010, dx).mla(c110, w0); let c01 = c001.neg_mla(c001, dx).mla(c101, w0); let c11 = c011.neg_mla(c011, dx).mla(c111, w0); let dy = AvxVector::from(ry); let c0 = c00.neg_mla(c00, dy).mla(c10, w1); let c1 = c01.neg_mla(c01, dy).mla(c11, w1); let dz = AvxVector::from(rz); c0.neg_mla(c0, dz).mla(c1, w2).split() } } impl TrilinearAvxFma { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> AvxVectorSse { let lut_r = unsafe { lut.get_unchecked(in_r) }; let lut_g = unsafe { lut.get_unchecked(in_g) }; let lut_b = unsafe { lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let w0 = AvxVector::from(dr); let w1 = AvxVector::from(dg); let w2 = AvxVectorSse::from(db); let c000 = r.fetch(x, y, z); let c100 = r.fetch(x_n, y, z); let c010 = r.fetch(x, y_n, z); let c110 = r.fetch(x_n, y_n, z); let c001 = r.fetch(x, y, z_n); let c101 = r.fetch(x_n, y, z_n); let c011 = r.fetch(x, y_n, z_n); let c111 = r.fetch(x_n, y_n, z_n); let x000 = AvxVector::from_sse(c000, c001); let x010 = AvxVector::from_sse(c010, c011); let x011 = AvxVector::from_sse(c100, c101); let x111 = AvxVector::from_sse(c110, c111); let c00 = x000.neg_mla(x000, w0).mla(x011, w0); let c10 = x010.neg_mla(x010, w0).mla(x111, w0); let z0 = c00.neg_mla(c00, w1).mla(c10, w1); let (c0, c1) = z0.split(); c0.neg_mla(c0, w2).mla(c1, w2) } } moxcms-0.7.7/src/conversions/avx/interpolator_q0_15.rs000064400000000000000000001002331046102023000210760ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::interpolator::BarycentricWeight; use crate::math::FusedMultiplyAdd; use std::arch::x86_64::*; use std::ops::{Add, Mul, Sub}; #[repr(align(8), C)] pub(crate) struct AvxAlignedI16(pub(crate) [i16; 4]); #[cfg(feature = "options")] pub(crate) struct TetrahedralAvxQ0_15 {} #[cfg(feature = "options")] pub(crate) struct PyramidalAvxQ0_15 {} #[cfg(feature = "options")] pub(crate) struct PrismaticAvxQ0_15 {} pub(crate) struct TrilinearAvxQ0_15 {} #[cfg(feature = "options")] pub(crate) struct PrismaticAvxQ0_15Double {} pub(crate) struct TrilinearAvxQ0_15Double {} #[cfg(feature = "options")] pub(crate) struct PyramidAvxFmaQ0_15Double {} #[cfg(feature = "options")] pub(crate) struct TetrahedralAvxQ0_15Double {} pub(crate) trait AvxMdInterpolationQ0_15Double { fn inter3_sse( &self, table0: &[AvxAlignedI16], table1: &[AvxAlignedI16], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse); } pub(crate) trait AvxMdInterpolationQ0_15 { fn inter3_sse( &self, table: &[AvxAlignedI16], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> AvxVectorQ0_15Sse; } trait Fetcher { fn fetch(&self, x: i32, y: i32, z: i32) -> T; } #[derive(Copy, Clone)] #[repr(transparent)] pub(crate) struct AvxVectorQ0_15Sse { pub(crate) v: __m128i, } #[derive(Copy, Clone)] #[repr(transparent)] pub(crate) struct AvxVectorQ0_15 { pub(crate) v: __m256i, } impl AvxVectorQ0_15 { #[inline(always)] pub(crate) fn from_sse(lo: AvxVectorQ0_15Sse, hi: AvxVectorQ0_15Sse) -> AvxVectorQ0_15 { unsafe { AvxVectorQ0_15 { v: _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(lo.v), hi.v), } } } #[inline(always)] pub(crate) fn split(self) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) { unsafe { ( AvxVectorQ0_15Sse { v: _mm256_castsi256_si128(self.v), }, AvxVectorQ0_15Sse { v: _mm256_extracti128_si256::<1>(self.v), }, ) } } } impl From for AvxVectorQ0_15Sse { #[inline(always)] fn from(v: i16) -> Self { AvxVectorQ0_15Sse { v: unsafe { _mm_set1_epi16(v) }, } } } impl From for AvxVectorQ0_15 { #[inline(always)] fn from(v: i16) -> Self { AvxVectorQ0_15 { v: unsafe { _mm256_set1_epi16(v) }, } } } impl Sub for AvxVectorQ0_15Sse { type Output = Self; #[inline(always)] fn sub(self, rhs: AvxVectorQ0_15Sse) -> Self::Output { AvxVectorQ0_15Sse { v: unsafe { _mm_sub_epi16(self.v, rhs.v) }, } } } impl Sub for AvxVectorQ0_15 { type Output = Self; #[inline(always)] fn sub(self, rhs: AvxVectorQ0_15) -> Self::Output { AvxVectorQ0_15 { v: unsafe { _mm256_sub_epi16(self.v, rhs.v) }, } } } impl Add for AvxVectorQ0_15Sse { type Output = Self; #[inline(always)] fn add(self, rhs: AvxVectorQ0_15Sse) -> Self::Output { AvxVectorQ0_15Sse { v: unsafe { _mm_add_epi16(self.v, rhs.v) }, } } } impl Mul for AvxVectorQ0_15Sse { type Output = Self; #[inline(always)] fn mul(self, rhs: AvxVectorQ0_15Sse) -> Self::Output { AvxVectorQ0_15Sse { v: unsafe { _mm_mulhrs_epi16(self.v, rhs.v) }, } } } impl Add for AvxVectorQ0_15 { type Output = Self; #[inline(always)] fn add(self, rhs: AvxVectorQ0_15) -> Self::Output { AvxVectorQ0_15 { v: unsafe { _mm256_add_epi16(self.v, rhs.v) }, } } } impl Mul for AvxVectorQ0_15 { type Output = Self; #[inline(always)] fn mul(self, rhs: AvxVectorQ0_15) -> Self::Output { AvxVectorQ0_15 { v: unsafe { _mm256_mulhrs_epi16(self.v, rhs.v) }, } } } impl FusedMultiplyAdd for AvxVectorQ0_15Sse { #[inline(always)] fn mla(&self, b: AvxVectorQ0_15Sse, c: AvxVectorQ0_15Sse) -> AvxVectorQ0_15Sse { AvxVectorQ0_15Sse { v: unsafe { _mm_add_epi16(_mm_mulhrs_epi16(b.v, c.v), self.v) }, } } } impl FusedMultiplyAdd for AvxVectorQ0_15 { #[inline(always)] fn mla(&self, b: AvxVectorQ0_15, c: AvxVectorQ0_15) -> AvxVectorQ0_15 { AvxVectorQ0_15 { v: unsafe { _mm256_add_epi16(_mm256_mulhrs_epi16(b.v, c.v), self.v) }, } } } struct TetrahedralAvxSseFetchVector<'a, const GRID_SIZE: usize> { cube: &'a [AvxAlignedI16], } struct TetrahedralAvxFetchVector<'a, const GRID_SIZE: usize> { cube0: &'a [AvxAlignedI16], cube1: &'a [AvxAlignedI16], } impl Fetcher for TetrahedralAvxFetchVector<'_, GRID_SIZE> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> AvxVectorQ0_15 { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx0 = unsafe { self.cube0.get_unchecked(offset..) }; let jx1 = unsafe { self.cube1.get_unchecked(offset..) }; AvxVectorQ0_15 { v: unsafe { _mm256_inserti128_si256::<1>( _mm256_castsi128_si256(_mm_loadu_si64(jx0.as_ptr() as *const _)), _mm_loadu_si64(jx1.as_ptr() as *const _), ) }, } } } impl Fetcher for TetrahedralAvxSseFetchVector<'_, GRID_SIZE> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> AvxVectorQ0_15Sse { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx = unsafe { self.cube.get_unchecked(offset..) }; AvxVectorQ0_15Sse { v: unsafe { _mm_loadu_si64(jx.as_ptr() as *const _) }, } } } #[cfg(feature = "options")] impl TetrahedralAvxQ0_15 { #[target_feature(enable = "avx2")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> AvxVectorQ0_15Sse { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = r.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z); c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n); c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, AvxVectorQ0_15Sse::from(rx)); let s1 = s0.mla(c2, AvxVectorQ0_15Sse::from(ry)); s1.mla(c3, AvxVectorQ0_15Sse::from(rz)) } } macro_rules! define_interp_avx { ($interpolator: ident) => { impl AvxMdInterpolationQ0_15 for $interpolator { fn inter3_sse( &self, table: &[AvxAlignedI16], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> AvxVectorQ0_15Sse { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralAvxSseFetchVector:: { cube: table }, ) } } } }; } #[cfg(feature = "options")] macro_rules! define_interp_avx_d { ($interpolator: ident) => { impl AvxMdInterpolationQ0_15Double for $interpolator { fn inter3_sse( &self, table0: &[AvxAlignedI16], table1: &[AvxAlignedI16], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralAvxSseFetchVector:: { cube: table0 }, TetrahedralAvxSseFetchVector:: { cube: table1 }, ) } } } }; } #[cfg(feature = "options")] define_interp_avx!(TetrahedralAvxQ0_15); #[cfg(feature = "options")] define_interp_avx!(PyramidalAvxQ0_15); #[cfg(feature = "options")] define_interp_avx!(PrismaticAvxQ0_15); define_interp_avx!(TrilinearAvxQ0_15); #[cfg(feature = "options")] define_interp_avx_d!(PrismaticAvxQ0_15Double); #[cfg(feature = "options")] define_interp_avx_d!(PyramidAvxFmaQ0_15Double); #[cfg(feature = "options")] impl AvxMdInterpolationQ0_15Double for TetrahedralAvxQ0_15Double { fn inter3_sse( &self, table0: &[AvxAlignedI16], table1: &[AvxAlignedI16], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralAvxFetchVector:: { cube0: table0, cube1: table1, }, ) } } } impl AvxMdInterpolationQ0_15Double for TrilinearAvxQ0_15Double { fn inter3_sse( &self, table0: &[AvxAlignedI16], table1: &[AvxAlignedI16], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralAvxFetchVector:: { cube0: table0, cube1: table1, }, ) } } } #[cfg(feature = "options")] impl PyramidalAvxQ0_15 { #[target_feature(enable = "avx2")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> AvxVectorQ0_15Sse { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); let w0 = AvxVectorQ0_15Sse::from(db); let w1 = AvxVectorQ0_15Sse::from(dr); let w2 = AvxVectorQ0_15Sse::from(dg); if dr > db && dg > db { let w3 = AvxVectorQ0_15Sse::from(dr) * AvxVectorQ0_15Sse::from(dg); let x0 = r.fetch(x_n, y_n, z_n); let x1 = r.fetch(x_n, y_n, z); let x2 = r.fetch(x_n, y, z); let x3 = r.fetch(x, y_n, z); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3) } else if db > dr && dg > dr { let w3 = AvxVectorQ0_15Sse::from(dg) * AvxVectorQ0_15Sse::from(db); let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y_n, z_n); let x2 = r.fetch(x, y_n, z_n); let x3 = r.fetch(x, y_n, z); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3) } else { let w3 = AvxVectorQ0_15Sse::from(db) * AvxVectorQ0_15Sse::from(dr); let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z); let x2 = r.fetch(x_n, y, z_n); let x3 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3) } } } #[cfg(feature = "options")] impl PrismaticAvxQ0_15 { #[target_feature(enable = "avx2")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> AvxVectorQ0_15Sse { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); let w0 = AvxVectorQ0_15Sse::from(db); let w1 = AvxVectorQ0_15Sse::from(dr); let w2 = AvxVectorQ0_15Sse::from(dg); let w3 = AvxVectorQ0_15Sse::from(dg) * AvxVectorQ0_15Sse::from(db); let w4 = AvxVectorQ0_15Sse::from(dr) * AvxVectorQ0_15Sse::from(dg); if db > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x, y_n, z_n); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4) } else { let x0 = r.fetch(x_n, y, z); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x_n, y_n, z); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4) } } } #[cfg(feature = "options")] impl PrismaticAvxQ0_15Double { #[target_feature(enable = "avx2")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r0: impl Fetcher, r1: impl Fetcher, ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0_0 = r0.fetch(x, y, z); let c0_1 = r0.fetch(x, y, z); let w0 = AvxVectorQ0_15::from(db); let w1 = AvxVectorQ0_15::from(dr); let w2 = AvxVectorQ0_15::from(dg); let w3 = AvxVectorQ0_15::from(dg) * AvxVectorQ0_15::from(db); let w4 = AvxVectorQ0_15::from(dr) * AvxVectorQ0_15::from(dg); let c0 = AvxVectorQ0_15::from_sse(c0_0, c0_1); if db > dr { let x0_0 = r0.fetch(x, y, z_n); let x1_0 = r0.fetch(x_n, y, z_n); let x2_0 = r0.fetch(x, y_n, z); let x3_0 = r0.fetch(x, y_n, z_n); let x4_0 = r0.fetch(x_n, y_n, z_n); let x0_1 = r1.fetch(x, y, z_n); let x1_1 = r1.fetch(x_n, y, z_n); let x2_1 = r1.fetch(x, y_n, z); let x3_1 = r1.fetch(x, y_n, z_n); let x4_1 = r1.fetch(x_n, y_n, z_n); let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1); let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1); let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1); let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1); let x4 = AvxVectorQ0_15::from_sse(x4_0, x4_1); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4).split() } else { let x0_0 = r0.fetch(x_n, y, z); let x1_0 = r0.fetch(x_n, y, z_n); let x2_0 = r0.fetch(x, y_n, z); let x3_0 = r0.fetch(x_n, y_n, z); let x4_0 = r0.fetch(x_n, y_n, z_n); let x0_1 = r1.fetch(x_n, y, z); let x1_1 = r1.fetch(x_n, y, z_n); let x2_1 = r1.fetch(x, y_n, z); let x3_1 = r1.fetch(x_n, y_n, z); let x4_1 = r1.fetch(x_n, y_n, z_n); let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1); let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1); let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1); let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1); let x4 = AvxVectorQ0_15::from_sse(x4_0, x4_1); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4).split() } } } #[cfg(feature = "options")] impl PyramidAvxFmaQ0_15Double { #[target_feature(enable = "avx2")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r0: impl Fetcher, r1: impl Fetcher, ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0_0 = r0.fetch(x, y, z); let c0_1 = r1.fetch(x, y, z); let w0 = AvxVectorQ0_15::from(db); let w1 = AvxVectorQ0_15::from(dr); let w2 = AvxVectorQ0_15::from(dg); let c0 = AvxVectorQ0_15::from_sse(c0_0, c0_1); if dr > db && dg > db { let w3 = AvxVectorQ0_15::from(dr) * AvxVectorQ0_15::from(dg); let x0_0 = r0.fetch(x_n, y_n, z_n); let x1_0 = r0.fetch(x_n, y_n, z); let x2_0 = r0.fetch(x_n, y, z); let x3_0 = r0.fetch(x, y_n, z); let x0_1 = r1.fetch(x_n, y_n, z_n); let x1_1 = r1.fetch(x_n, y_n, z); let x2_1 = r1.fetch(x_n, y, z); let x3_1 = r1.fetch(x, y_n, z); let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1); let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1); let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1); let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } else if db > dr && dg > dr { let w3 = AvxVectorQ0_15::from(dg) * AvxVectorQ0_15::from(db); let x0_0 = r0.fetch(x, y, z_n); let x1_0 = r0.fetch(x_n, y_n, z_n); let x2_0 = r0.fetch(x, y_n, z_n); let x3_0 = r0.fetch(x, y_n, z); let x0_1 = r1.fetch(x, y, z_n); let x1_1 = r1.fetch(x_n, y_n, z_n); let x2_1 = r1.fetch(x, y_n, z_n); let x3_1 = r1.fetch(x, y_n, z); let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1); let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1); let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1); let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } else { let w3 = AvxVectorQ0_15::from(db) * AvxVectorQ0_15::from(dr); let x0_0 = r0.fetch(x, y, z_n); let x1_0 = r0.fetch(x_n, y, z); let x2_0 = r0.fetch(x_n, y, z_n); let x3_0 = r0.fetch(x_n, y_n, z_n); let x0_1 = r1.fetch(x, y, z_n); let x1_1 = r1.fetch(x_n, y, z); let x2_1 = r1.fetch(x_n, y, z_n); let x3_1 = r1.fetch(x_n, y_n, z_n); let x0 = AvxVectorQ0_15::from_sse(x0_0, x0_1); let x1 = AvxVectorQ0_15::from_sse(x1_0, x1_1); let x2 = AvxVectorQ0_15::from_sse(x2_0, x2_1); let x3 = AvxVectorQ0_15::from_sse(x3_0, x3_1); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } } } #[cfg(feature = "options")] impl TetrahedralAvxQ0_15Double { #[target_feature(enable = "avx2")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], rv: impl Fetcher, ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = rv.fetch(x, y, z); let w0 = AvxVectorQ0_15::from(rx); let w1 = AvxVectorQ0_15::from(ry); let w2 = AvxVectorQ0_15::from(rz); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = rv.fetch(x_n, y, z) - c0; c2 = rv.fetch(x_n, y_n, z) - rv.fetch(x_n, y, z); c3 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = rv.fetch(x_n, y, z) - c0; c2 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y, z_n); c3 = rv.fetch(x_n, y, z_n) - rv.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = rv.fetch(x_n, y, z_n) - rv.fetch(x, y, z_n); c2 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y, z_n); c3 = rv.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = rv.fetch(x_n, y_n, z) - rv.fetch(x, y_n, z); c2 = rv.fetch(x, y_n, z) - c0; c3 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x, y_n, z_n); c2 = rv.fetch(x, y_n, z) - c0; c3 = rv.fetch(x, y_n, z_n) - rv.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = rv.fetch(x_n, y_n, z_n) - rv.fetch(x, y_n, z_n); c2 = rv.fetch(x, y_n, z_n) - rv.fetch(x, y, z_n); c3 = rv.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); s1.mla(c3, w2).split() } } impl TrilinearAvxQ0_15Double { #[target_feature(enable = "avx2")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], rv: impl Fetcher, ) -> (AvxVectorQ0_15Sse, AvxVectorQ0_15Sse) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; const Q_MAX: i16 = ((1i32 << 15i32) - 1) as i16; let q_max = AvxVectorQ0_15::from(Q_MAX); let w0 = AvxVectorQ0_15::from(rx); let w1 = AvxVectorQ0_15::from(ry); let w2 = AvxVectorQ0_15::from(rz); let dx = q_max - w0; let dy = q_max - w1; let dz = q_max - w2; let c000 = rv.fetch(x, y, z); let c100 = rv.fetch(x_n, y, z); let c010 = rv.fetch(x, y_n, z); let c110 = rv.fetch(x_n, y_n, z); let c001 = rv.fetch(x, y, z_n); let c101 = rv.fetch(x_n, y, z_n); let c011 = rv.fetch(x, y_n, z_n); let c111 = rv.fetch(x_n, y_n, z_n); let c00 = (c000 * dx).mla(c100, w0); let c10 = (c010 * dx).mla(c110, w0); let c01 = (c001 * dx).mla(c101, w0); let c11 = (c011 * dx).mla(c111, w0); let c0 = (c00 * dy).mla(c10, w1); let c1 = (c01 * dy).mla(c11, w1); (c0 * dz).mla(c1, w2).split() } } impl TrilinearAvxQ0_15 { #[target_feature(enable = "avx2")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> AvxVectorQ0_15Sse { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; const Q_MAX: i16 = ((1i32 << 15i32) - 1) as i16; let q_max = AvxVectorQ0_15Sse::from(Q_MAX); let q_max_avx = AvxVectorQ0_15::from(Q_MAX); let w0 = AvxVectorQ0_15::from(dr); let w1 = AvxVectorQ0_15::from(dg); let w2 = AvxVectorQ0_15Sse::from(db); let dx = q_max_avx - w0; let dy = q_max_avx - w1; let dz = q_max - w2; let c000 = r.fetch(x, y, z); let c100 = r.fetch(x_n, y, z); let c010 = r.fetch(x, y_n, z); let c110 = r.fetch(x_n, y_n, z); let c001 = r.fetch(x, y, z_n); let c101 = r.fetch(x_n, y, z_n); let c011 = r.fetch(x, y_n, z_n); let c111 = r.fetch(x_n, y_n, z_n); let x000 = AvxVectorQ0_15::from_sse(c000, c001); let x010 = AvxVectorQ0_15::from_sse(c010, c011); let x011 = AvxVectorQ0_15::from_sse(c100, c101); let x111 = AvxVectorQ0_15::from_sse(c110, c111); let c00 = (x000 * dx).mla(x011, w0); let c10 = (x010 * dx).mla(x111, w0); let c0 = (c00 * dy).mla(c10, w1); let (c0, c1) = c0.split(); (c0 * dz).mla(c1, w2) } } moxcms-0.7.7/src/conversions/avx/lut4_to_3.rs000064400000000000000000000320151046102023000172650ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::avx::interpolator::*; use crate::conversions::avx::interpolator_q0_15::AvxAlignedI16; use crate::conversions::avx::lut4_to_3_q0_15::TransformLut4To3AvxQ0_15; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::lut_transforms::Lut4x3Factory; use crate::transform::PointeeSizeExpressible; use crate::{ BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; use std::arch::x86_64::*; use std::marker::PhantomData; struct TransformLut4To3Avx< T, U, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { lut: Vec, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: InterpolationMethod, weights: Box<[BarycentricWeight; BINS]>, color_space: DataColorSpace, is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut4To3Avx where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[target_feature(enable = "avx2", enable = "fma")] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { let cn = Layout::from(LAYOUT); let channels = cn.channels(); let grid_size = GRID_SIZE as i32; let grid_size3 = grid_size * grid_size * grid_size; let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) }; let max_value = ((1 << BIT_DEPTH) - 1u32).as_(); for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) { let c = <() as LutBarycentricReduction>::reduce::( src[0], ); let m = <() as LutBarycentricReduction>::reduce::( src[1], ); let y = <() as LutBarycentricReduction>::reduce::( src[2], ); let k = <() as LutBarycentricReduction>::reduce::( src[3], ); let k_weights = self.weights[k.as_()]; let w: i32 = k_weights.x; let w_n: i32 = k_weights.x_n; let t: f32 = k_weights.w; let table1 = &self.lut[(w * grid_size3) as usize..]; let table2 = &self.lut[(w_n * grid_size3) as usize..]; let v = interpolator.inter3_sse( table1, table2, c.as_(), m.as_(), y.as_(), self.weights.as_slice(), ); let (a0, b0) = (v.0.v, v.1.v); if T::FINITE { unsafe { let t0 = _mm_set1_ps(t); let hp = _mm_fnmadd_ps(a0, t0, a0); let mut v = _mm_fmadd_ps(b0, t0, hp); v = _mm_max_ps(v, _mm_setzero_ps()); v = _mm_mul_ps(v, value_scale); v = _mm_min_ps(v, value_scale); let jvz = _mm_cvtps_epi32(v); let x = _mm_extract_epi32::<0>(jvz); let y = _mm_extract_epi32::<1>(jvz); let z = _mm_extract_epi32::<2>(jvz); dst[cn.r_i()] = (x as u32).as_(); dst[cn.g_i()] = (y as u32).as_(); dst[cn.b_i()] = (z as u32).as_(); } } else { unsafe { let t0 = _mm_set1_ps(t); let hp = _mm_fnmadd_ps(a0, t0, a0); let v = _mm_fmadd_ps(b0, t0, hp); dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v) as u32).as_(); dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v) as u32).as_(); dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v) as u32).as_(); } } if channels == 4 { dst[cn.a_i()] = max_value; } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut4To3Avx where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let cn = Layout::from(LAYOUT); let channels = cn.channels(); if src.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / 4; let dst_chunks = dst.len() / channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearAvxFmaDouble:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk( src, dst, Box::new(TetrahedralAvxFmaDouble:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk( src, dst, Box::new(PyramidAvxFmaDouble:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk( src, dst, Box::new(PrismaticAvxFmaDouble:: {}), ); } InterpolationMethod::Linear => { self.transform_chunk( src, dst, Box::new(TrilinearAvxFmaDouble:: {}), ); } } } } Ok(()) } } pub(crate) struct AvxLut4x3Factory {} impl Lut4x3Factory for AvxLut4x3Factory { fn make_transform_4x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Send + Sync> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { if options.prefer_fixed_point && BIT_DEPTH < 16 { let q: f32 = if T::FINITE { ((1i32 << BIT_DEPTH as i32) - 1) as f32 } else { ((1i32 << 14i32) - 1) as f32 }; let lut = lut .chunks_exact(3) .map(|x| { AvxAlignedI16([ (x[0] * q).round() as i16, (x[1] * q).round() as i16, (x[2] * q).round() as i16, 0, ]) }) .collect::>(); return match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut4To3AvxQ0_15::< T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), _phantom: PhantomData, _phantom1: PhantomData, color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut4To3AvxQ0_15::< T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), _phantom: PhantomData, _phantom1: PhantomData, color_space, is_linear, }), }; } assert!( std::arch::is_x86_feature_detected!("fma"), "Internal configuration error, this feature might not be called without `fma` feature" ); let lut = lut .chunks_exact(3) .map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32])) .collect::>(); match options.barycentric_weight_scale { BarycentricWeightScale::Low => { Box::new( TransformLut4To3Avx:: { lut, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), _phantom: PhantomData, _phantom1: PhantomData, color_space, is_linear, }, ) } #[cfg(feature = "options")] BarycentricWeightScale::High => { Box::new( TransformLut4To3Avx:: { lut, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), _phantom: PhantomData, _phantom1: PhantomData, color_space, is_linear, }, ) } } } } moxcms-0.7.7/src/conversions/avx/lut4_to_3_q0_15.rs000064400000000000000000000217171046102023000202010ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::avx::interpolator_q0_15::*; use crate::conversions::interpolator::BarycentricWeight; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::x86_64::*; use std::marker::PhantomData; pub(crate) struct TransformLut4To3AvxQ0_15< T, U, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { pub(crate) lut: Vec, pub(crate) _phantom: PhantomData, pub(crate) _phantom1: PhantomData, pub(crate) interpolation_method: InterpolationMethod, pub(crate) weights: Box<[BarycentricWeight; BINS]>, pub(crate) color_space: DataColorSpace, pub(crate) is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut4To3AvxQ0_15 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[target_feature(enable = "avx2")] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { unsafe { let cn = Layout::from(LAYOUT); let channels = cn.channels(); let grid_size = GRID_SIZE as i32; let grid_size3 = grid_size * grid_size * grid_size; let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32); let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); let v_max_scale = if T::FINITE { _mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16) } else { _mm_set1_epi16(((1i32 << 14i32) - 1) as i16) }; for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) { let c = <() as LutBarycentricReduction>::reduce::( src[0], ); let m = <() as LutBarycentricReduction>::reduce::( src[1], ); let y = <() as LutBarycentricReduction>::reduce::( src[2], ); let k = <() as LutBarycentricReduction>::reduce::( src[3], ); let k_weights = self.weights[k.as_()]; let w: i32 = k_weights.x; let w_n: i32 = k_weights.x_n; const Q: i16 = ((1i32 << 15) - 1) as i16; let t: i16 = k_weights.w; let t_n: i16 = Q - t; let table1 = &self.lut[(w * grid_size3) as usize..]; let table2 = &self.lut[(w_n * grid_size3) as usize..]; let v = interpolator.inter3_sse( table1, table2, c.as_(), m.as_(), y.as_(), self.weights.as_slice(), ); let (a0, b0) = (v.0.v, v.1.v); let hp = _mm_mulhrs_epi16(_mm_set1_epi16(t_n), a0); let v = _mm_add_epi16(hp, _mm_mulhrs_epi16(b0, _mm_set1_epi16(t))); if T::FINITE { let mut o = _mm_max_epi16(v, _mm_setzero_si128()); o = _mm_min_epi16(o, v_max_scale); let x = _mm_extract_epi16::<0>(o); let y = _mm_extract_epi16::<1>(o); let z = _mm_extract_epi16::<2>(o); dst[cn.r_i()] = (x as u32).as_(); dst[cn.g_i()] = (y as u32).as_(); dst[cn.b_i()] = (z as u32).as_(); } else { let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v)); r = _mm_mul_ps(r, f_value_scale); dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_(); dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_(); dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_(); } if channels == 4 { dst[cn.a_i()] = max_value; } } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut4To3AvxQ0_15 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let cn = Layout::from(LAYOUT); let channels = cn.channels(); if src.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / 4; let dst_chunks = dst.len() / channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearAvxQ0_15Double:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk( src, dst, Box::new(TetrahedralAvxQ0_15Double:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk( src, dst, Box::new(PyramidAvxFmaQ0_15Double:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk( src, dst, Box::new(PrismaticAvxQ0_15Double:: {}), ); } InterpolationMethod::Linear => { self.transform_chunk( src, dst, Box::new(TrilinearAvxQ0_15Double:: {}), ); } } } } Ok(()) } } moxcms-0.7.7/src/conversions/avx/mod.rs000064400000000000000000000041551046102023000162340ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ mod interpolator; mod interpolator_q0_15; mod lut4_to_3; mod lut4_to_3_q0_15; mod rgb_xyz; mod rgb_xyz_opt; mod rgb_xyz_q2_13; mod rgb_xyz_q2_13_opt; mod t_lut3_to_3; mod t_lut3_to_3_q0_15; pub(crate) use lut4_to_3::AvxLut4x3Factory; pub(crate) use rgb_xyz::TransformShaperRgbAvx; pub(crate) use rgb_xyz_opt::TransformShaperRgbOptAvx; pub(crate) use rgb_xyz_q2_13::TransformShaperRgbQ2_13Avx; pub(crate) use rgb_xyz_q2_13_opt::TransformShaperRgbQ2_13OptAvx; pub(crate) use t_lut3_to_3::AvxLut3x3Factory; moxcms-0.7.7/src/conversions/avx/rgb_xyz.rs000064400000000000000000000313551046102023000171430ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::TransformMatrixShaper; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::x86_64::*; #[repr(align(32), C)] #[derive(Debug)] pub(crate) struct AvxAlignedU16(pub(crate) [u16; 16]); pub(crate) struct TransformShaperRgbAvx< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, // deleting linear cap is in effective here const LINEAR_CAP: usize, > { pub(crate) profile: TransformMatrixShaper, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > TransformShaperRgbAvx where u32: AsPrimitive, { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn transform_impl( &self, src: &[T], dst: &mut [T], ) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary0 = AvxAlignedU16([0; 16]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let scale = (self.gamma_lut - 1) as f32; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); unsafe { let m0 = _mm256_setr_ps( t.v[0][0], t.v[0][1], t.v[0][2], 0., t.v[0][0], t.v[0][1], t.v[0][2], 0., ); let m1 = _mm256_setr_ps( t.v[1][0], t.v[1][1], t.v[1][2], 0., t.v[1][0], t.v[1][1], t.v[1][2], 0., ); let m2 = _mm256_setr_ps( t.v[2][0], t.v[2][1], t.v[2][2], 0., t.v[2][0], t.v[2][1], t.v[2][2], 0., ); let zeros = _mm_setzero_ps(); let v_scale = _mm256_set1_ps(scale); let mut src = src; let mut dst = dst; let mut src_iter = src.chunks_exact(src_channels * 2); let dst_iter = dst.chunks_exact_mut(dst_channels * 2); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); if let Some(src) = src_iter.next() { r0 = _mm_broadcast_ss(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]); g0 = _mm_broadcast_ss(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]); b0 = _mm_broadcast_ss(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]); r1 = _mm_broadcast_ss( &self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()], ); g1 = _mm_broadcast_ss( &self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()], ); b1 = _mm_broadcast_ss( &self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()], ); a0 = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = _mm_setzero_ps(); g0 = _mm_setzero_ps(); b0 = _mm_setzero_ps(); a0 = max_colors; r1 = _mm_setzero_ps(); g1 = _mm_setzero_ps(); b1 = _mm_setzero_ps(); a1 = max_colors; } for (src, dst) in src_iter.zip(dst_iter) { let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1); let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1); let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1); let mut v = if FMA { let v0 = _mm256_mul_ps(r, m0); let v1 = _mm256_fmadd_ps(g, m1, v0); _mm256_fmadd_ps(b, m2, v1) } else { let v0 = _mm256_mul_ps(r, m0); let v1 = _mm256_mul_ps(g, m1); let v2 = _mm256_mul_ps(b, m2); _mm256_add_ps(_mm256_add_ps(v0, v1), v2) }; v = _mm256_max_ps(v, _mm256_setzero_ps()); v = _mm256_mul_ps(v, v_scale); v = _mm256_min_ps(v, v_scale); let zx = _mm256_cvtps_epi32(v); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx); r0 = _mm_broadcast_ss(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]); g0 = _mm_broadcast_ss(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]); b0 = _mm_broadcast_ss(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]); r1 = _mm_broadcast_ss( &self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()], ); g1 = _mm_broadcast_ss( &self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()], ); b1 = _mm_broadcast_ss( &self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()], ); dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a0; } dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize]; dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize]; dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst[dst_cn.a_i() + dst_channels] = a1; } a0 = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src[src_cn.a_i() + src_channels] } else { max_colors }; } if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() { let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1); let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1); let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1); let mut v = if FMA { let v0 = _mm256_mul_ps(r, m0); let v1 = _mm256_fmadd_ps(g, m1, v0); _mm256_fmadd_ps(b, m2, v1) } else { let v0 = _mm256_mul_ps(r, m0); let v1 = _mm256_mul_ps(g, m1); let v2 = _mm256_mul_ps(b, m2); _mm256_add_ps(_mm256_add_ps(v0, v1), v2) }; v = _mm256_max_ps(v, _mm256_setzero_ps()); v = _mm256_mul_ps(v, v_scale); v = _mm256_min_ps(v, v_scale); let zx = _mm256_cvtps_epi32(v); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx); dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a0; } dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize]; dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize]; dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst[dst_cn.a_i() + dst_channels] = a1; } } src = src.chunks_exact(src_channels * 2).remainder(); dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = _mm_broadcast_ss(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]); let g = _mm_broadcast_ss(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]); let b = _mm_broadcast_ss(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let mut v = if FMA { let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0)); let v1 = _mm_fmadd_ps(g, _mm256_castps256_ps128(m1), v0); _mm_fmadd_ps(b, _mm256_castps256_ps128(m2), v1) } else { let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0)); let v1 = _mm_mul_ps(g, _mm256_castps256_ps128(m1)); let v2 = _mm_mul_ps(b, _mm256_castps256_ps128(m2)); _mm_add_ps(_mm_add_ps(v0, v1), v2) }; v = _mm_max_ps(v, zeros); v = _mm_mul_ps(v, _mm256_castps256_ps128(v_scale)); v = _mm_min_ps(v, _mm256_castps256_ps128(v_scale)); let zx = _mm_cvtps_epi32(v); _mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, zx); dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > TransformExecutor for TransformShaperRgbAvx where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { assert!(std::arch::is_x86_feature_detected!("fma")); self.transform_impl::(src, dst) } } } moxcms-0.7.7/src/conversions/avx/rgb_xyz_opt.rs000064400000000000000000000315421046102023000200230ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::avx::rgb_xyz::AvxAlignedU16; use crate::conversions::rgbxyz::TransformMatrixShaperOptimizedV; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::x86_64::*; pub(crate) struct TransformShaperRgbOptAvx< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > { pub(crate) profile: TransformMatrixShaperOptimizedV, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformShaperRgbOptAvx where u32: AsPrimitive, { #[target_feature(enable = "avx2", enable = "fma")] unsafe fn transform_impl( &self, src: &[T], dst: &mut [T], ) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary0 = AvxAlignedU16([0; 16]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let scale = (self.gamma_lut - 1) as f32; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.linear.len() >= cap); } else { assert!(self.profile.linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let lut_lin = &self.profile.linear; unsafe { let m0 = _mm256_setr_ps( t.v[0][0], t.v[0][1], t.v[0][2], 0., t.v[0][0], t.v[0][1], t.v[0][2], 0., ); let m1 = _mm256_setr_ps( t.v[1][0], t.v[1][1], t.v[1][2], 0., t.v[1][0], t.v[1][1], t.v[1][2], 0., ); let m2 = _mm256_setr_ps( t.v[2][0], t.v[2][1], t.v[2][2], 0., t.v[2][0], t.v[2][1], t.v[2][2], 0., ); let zeros = _mm_setzero_ps(); let v_scale = _mm256_set1_ps(scale); let mut src = src; let mut dst = dst; let mut src_iter = src.chunks_exact(src_channels * 2); let dst_iter = dst.chunks_exact_mut(dst_channels * 2); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); if let Some(src) = src_iter.next() { r0 = _mm_broadcast_ss(lut_lin.get_unchecked(src[src_cn.r_i()]._as_usize())); g0 = _mm_broadcast_ss(lut_lin.get_unchecked(src[src_cn.g_i()]._as_usize())); b0 = _mm_broadcast_ss(lut_lin.get_unchecked(src[src_cn.b_i()]._as_usize())); r1 = _mm_broadcast_ss( lut_lin.get_unchecked(src[src_cn.r_i() + src_channels]._as_usize()), ); g1 = _mm_broadcast_ss( lut_lin.get_unchecked(src[src_cn.g_i() + src_channels]._as_usize()), ); b1 = _mm_broadcast_ss( lut_lin.get_unchecked(src[src_cn.b_i() + src_channels]._as_usize()), ); a0 = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = _mm_setzero_ps(); g0 = _mm_setzero_ps(); b0 = _mm_setzero_ps(); a0 = max_colors; r1 = _mm_setzero_ps(); g1 = _mm_setzero_ps(); b1 = _mm_setzero_ps(); a1 = max_colors; } for (src, dst) in src_iter.zip(dst_iter) { let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1); let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1); let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1); let mut v = if FMA { let v0 = _mm256_mul_ps(r, m0); let v1 = _mm256_fmadd_ps(g, m1, v0); _mm256_fmadd_ps(b, m2, v1) } else { let v0 = _mm256_mul_ps(r, m0); let v1 = _mm256_mul_ps(g, m1); let v2 = _mm256_mul_ps(b, m2); _mm256_add_ps(_mm256_add_ps(v0, v1), v2) }; v = _mm256_max_ps(v, _mm256_setzero_ps()); v = _mm256_mul_ps(v, v_scale); v = _mm256_min_ps(v, v_scale); let zx = _mm256_cvtps_epi32(v); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx); r0 = _mm_broadcast_ss(lut_lin.get_unchecked(src[src_cn.r_i()]._as_usize())); g0 = _mm_broadcast_ss(lut_lin.get_unchecked(src[src_cn.g_i()]._as_usize())); b0 = _mm_broadcast_ss(lut_lin.get_unchecked(src[src_cn.b_i()]._as_usize())); r1 = _mm_broadcast_ss( lut_lin.get_unchecked(src[src_cn.r_i() + src_channels]._as_usize()), ); g1 = _mm_broadcast_ss( lut_lin.get_unchecked(src[src_cn.g_i() + src_channels]._as_usize()), ); b1 = _mm_broadcast_ss( lut_lin.get_unchecked(src[src_cn.b_i() + src_channels]._as_usize()), ); dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a0; } dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize]; dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize]; dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst[dst_cn.a_i() + dst_channels] = a1; } a0 = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src[src_cn.a_i() + src_channels] } else { max_colors }; } if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() { let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1); let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1); let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1); let mut v = if FMA { let v0 = _mm256_mul_ps(r, m0); let v1 = _mm256_fmadd_ps(g, m1, v0); _mm256_fmadd_ps(b, m2, v1) } else { let v0 = _mm256_mul_ps(r, m0); let v1 = _mm256_mul_ps(g, m1); let v2 = _mm256_mul_ps(b, m2); _mm256_add_ps(_mm256_add_ps(v0, v1), v2) }; v = _mm256_max_ps(v, _mm256_setzero_ps()); v = _mm256_mul_ps(v, v_scale); v = _mm256_min_ps(v, v_scale); let zx = _mm256_cvtps_epi32(v); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx); dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a0; } dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize]; dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize]; dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst[dst_cn.a_i() + dst_channels] = a1; } } src = src.chunks_exact(src_channels * 2).remainder(); dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = _mm_broadcast_ss(lut_lin.get_unchecked(src[src_cn.r_i()]._as_usize())); let g = _mm_broadcast_ss(lut_lin.get_unchecked(src[src_cn.g_i()]._as_usize())); let b = _mm_broadcast_ss(lut_lin.get_unchecked(src[src_cn.b_i()]._as_usize())); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let mut v = if FMA { let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0)); let v1 = _mm_fmadd_ps(g, _mm256_castps256_ps128(m1), v0); _mm_fmadd_ps(b, _mm256_castps256_ps128(m2), v1) } else { let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0)); let v1 = _mm_mul_ps(g, _mm256_castps256_ps128(m1)); let v2 = _mm_mul_ps(b, _mm256_castps256_ps128(m2)); _mm_add_ps(_mm_add_ps(v0, v1), v2) }; v = _mm_max_ps(v, zeros); v = _mm_mul_ps(v, _mm256_castps256_ps128(v_scale)); v = _mm_min_ps(v, _mm256_castps256_ps128(v_scale)); let zx = _mm_cvtps_epi32(v); _mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, zx); dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformShaperRgbOptAvx where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { assert!(std::arch::is_x86_feature_detected!("fma")); self.transform_impl::(src, dst) } } } moxcms-0.7.7/src/conversions/avx/rgb_xyz_q2_13.rs000064400000000000000000000320251046102023000200430ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::avx::rgb_xyz::AvxAlignedU16; use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFp; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::x86_64::*; pub(crate) struct TransformShaperRgbQ2_13Avx< T: Copy, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > { pub(crate) profile: TransformMatrixShaperFp, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } #[inline(always)] pub(crate) unsafe fn _xmm_broadcast_epi32(f: &i32) -> __m128i { let float_ref: &f32 = unsafe { &*(f as *const i32 as *const f32) }; unsafe { _mm_castps_si128(_mm_broadcast_ss(float_ref)) } } impl< T: Copy + PointeeSizeExpressible + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformShaperRgbQ2_13Avx where u32: AsPrimitive, { #[target_feature(enable = "avx2")] unsafe fn transform_avx2(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary0 = AvxAlignedU16([0; 16]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let max_colors = ((1 << self.bit_depth) - 1).as_(); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.r_linear.len() >= cap); assert!(self.profile.g_linear.len() >= cap); assert!(self.profile.b_linear.len() >= cap); } else { assert!(self.profile.r_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); assert!(self.profile.g_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); assert!(self.profile.b_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let r_lin = &self.profile.r_linear; let g_lin = &self.profile.g_linear; let b_lin = &self.profile.b_linear; unsafe { let m0 = _mm256_setr_epi16( t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, ); let m2 = _mm256_setr_epi16( t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0, t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0, ); let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes(); let rnd = _mm256_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]])); let zeros = _mm256_setzero_si256(); let v_max_value = _mm256_set1_epi32(self.gamma_lut as i32 - 1); let mut src = src; let mut dst = dst; let mut src_iter = src.chunks_exact(src_channels * 2); let dst_iter = dst.chunks_exact_mut(dst_channels * 2); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); if let Some(src) = src_iter.next() { r0 = _xmm_broadcast_epi32(r_lin.get_unchecked(src[src_cn.r_i()]._as_usize())); g0 = _xmm_broadcast_epi32(g_lin.get_unchecked(src[src_cn.g_i()]._as_usize())); b0 = _xmm_broadcast_epi32(b_lin.get_unchecked(src[src_cn.b_i()]._as_usize())); r1 = _xmm_broadcast_epi32( r_lin.get_unchecked(src[src_cn.r_i() + src_channels]._as_usize()), ); g1 = _xmm_broadcast_epi32( g_lin.get_unchecked(src[src_cn.g_i() + src_channels]._as_usize()), ); b1 = _xmm_broadcast_epi32( b_lin.get_unchecked(src[src_cn.b_i() + src_channels]._as_usize()), ); a0 = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = _mm_setzero_si128(); g0 = _mm_setzero_si128(); b0 = _mm_setzero_si128(); a0 = max_colors; r1 = _mm_setzero_si128(); g1 = _mm_setzero_si128(); b1 = _mm_setzero_si128(); a1 = max_colors; } for (src, dst) in src_iter.zip(dst_iter) { let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1); let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1); let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1); zg0 = _mm256_slli_epi32::<16>(zg0); let zrg0 = _mm256_or_si256(zr0, zg0); let zbz0 = _mm256_or_si256(zb0, rnd); let va0 = _mm256_madd_epi16(zrg0, m0); let va1 = _mm256_madd_epi16(zbz0, m2); let mut v0 = _mm256_add_epi32(va0, va1); v0 = _mm256_srai_epi32::(v0); v0 = _mm256_max_epi32(v0, zeros); v0 = _mm256_min_epi32(v0, v_max_value); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0); r0 = _xmm_broadcast_epi32(r_lin.get_unchecked(src[src_cn.r_i()]._as_usize())); g0 = _xmm_broadcast_epi32(g_lin.get_unchecked(src[src_cn.g_i()]._as_usize())); b0 = _xmm_broadcast_epi32(b_lin.get_unchecked(src[src_cn.b_i()]._as_usize())); r1 = _xmm_broadcast_epi32( r_lin.get_unchecked(src[src_cn.r_i() + src_channels]._as_usize()), ); g1 = _xmm_broadcast_epi32( g_lin.get_unchecked(src[src_cn.g_i() + src_channels]._as_usize()), ); b1 = _xmm_broadcast_epi32( b_lin.get_unchecked(src[src_cn.b_i() + src_channels]._as_usize()), ); dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a0; } dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize]; dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize]; dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst[dst_cn.a_i() + dst_channels] = a1; } a0 = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src[src_cn.a_i() + src_channels] } else { max_colors }; } if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() { let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1); let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1); let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1); zg0 = _mm256_slli_epi32::<16>(zg0); let zrg0 = _mm256_or_si256(zr0, zg0); let zbz0 = _mm256_or_si256(zb0, rnd); let va0 = _mm256_madd_epi16(zrg0, m0); let va1 = _mm256_madd_epi16(zbz0, m2); let mut v0 = _mm256_add_epi32(va0, va1); v0 = _mm256_srai_epi32::(v0); v0 = _mm256_max_epi32(v0, zeros); v0 = _mm256_min_epi32(v0, v_max_value); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0); dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a0; } dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize]; dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize]; dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst[dst_cn.a_i() + dst_channels] = a1; } } src = src.chunks_exact(src_channels * 2).remainder(); dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = _xmm_broadcast_epi32(r_lin.get_unchecked(src[src_cn.r_i()]._as_usize())); let mut g = _xmm_broadcast_epi32(g_lin.get_unchecked(src[src_cn.g_i()]._as_usize())); let b = _xmm_broadcast_epi32(b_lin.get_unchecked(src[src_cn.b_i()]._as_usize())); g = _mm_slli_epi32::<16>(g); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let zrg0 = _mm_or_si128(r, g); let zbz0 = _mm_or_si128(b, _mm256_castsi256_si128(rnd)); let v0 = _mm_madd_epi16(zrg0, _mm256_castsi256_si128(m0)); let v1 = _mm_madd_epi16(zbz0, _mm256_castsi256_si128(m2)); let mut v = _mm_add_epi32(v0, v1); v = _mm_srai_epi32::(v); v = _mm_max_epi32(v, _mm_setzero_si128()); v = _mm_min_epi32(v, _mm256_castsi256_si128(v_max_value)); _mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, v); dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Copy + PointeeSizeExpressible + 'static + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformExecutor for TransformShaperRgbQ2_13Avx where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { self.transform_avx2(src, dst) } } } moxcms-0.7.7/src/conversions/avx/rgb_xyz_q2_13_opt.rs000064400000000000000000000307171046102023000207330ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::avx::rgb_xyz::AvxAlignedU16; use crate::conversions::avx::rgb_xyz_q2_13::_xmm_broadcast_epi32; use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFpOptVec; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::x86_64::*; pub(crate) struct TransformShaperRgbQ2_13OptAvx< T: Copy, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > { pub(crate) profile: TransformMatrixShaperFpOptVec, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Copy + PointeeSizeExpressible + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformShaperRgbQ2_13OptAvx where u32: AsPrimitive, { #[target_feature(enable = "avx2")] unsafe fn transform_avx2(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary0 = AvxAlignedU16([0; 16]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let max_colors = ((1 << self.bit_depth) - 1).as_(); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.linear.len() >= cap); } else { assert!(self.profile.linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let lut_lin = &self.profile.linear; unsafe { let m0 = _mm256_setr_epi16( t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, ); let m2 = _mm256_setr_epi16( t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0, t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0, ); let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes(); let rnd = _mm256_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]])); let zeros = _mm256_setzero_si256(); let v_max_value = _mm256_set1_epi32(self.gamma_lut as i32 - 1); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); let mut src_iter = src.chunks_exact(src_channels * 2); if let Some(src0) = src_iter.next() { r0 = _xmm_broadcast_epi32(lut_lin.get_unchecked(src0[src_cn.r_i()]._as_usize())); g0 = _xmm_broadcast_epi32(lut_lin.get_unchecked(src0[src_cn.g_i()]._as_usize())); b0 = _xmm_broadcast_epi32(lut_lin.get_unchecked(src0[src_cn.b_i()]._as_usize())); r1 = _xmm_broadcast_epi32( lut_lin.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()), ); g1 = _xmm_broadcast_epi32( lut_lin.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()), ); b1 = _xmm_broadcast_epi32( lut_lin.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()), ); a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = _mm_setzero_si128(); g0 = _mm_setzero_si128(); b0 = _mm_setzero_si128(); a0 = max_colors; r1 = _mm_setzero_si128(); g1 = _mm_setzero_si128(); b1 = _mm_setzero_si128(); a1 = max_colors; } for (src, dst) in src_iter.zip(dst.chunks_exact_mut(dst_channels * 2)) { let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1); let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1); let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1); zg0 = _mm256_slli_epi32::<16>(zg0); let zrg0 = _mm256_or_si256(zr0, zg0); let zbz0 = _mm256_or_si256(zb0, rnd); let va0 = _mm256_madd_epi16(zrg0, m0); let va1 = _mm256_madd_epi16(zbz0, m2); let mut v0 = _mm256_add_epi32(va0, va1); v0 = _mm256_srai_epi32::(v0); v0 = _mm256_max_epi32(v0, zeros); v0 = _mm256_min_epi32(v0, v_max_value); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0); r0 = _xmm_broadcast_epi32(lut_lin.get_unchecked(src[src_cn.r_i()]._as_usize())); g0 = _xmm_broadcast_epi32(lut_lin.get_unchecked(src[src_cn.g_i()]._as_usize())); b0 = _xmm_broadcast_epi32(lut_lin.get_unchecked(src[src_cn.b_i()]._as_usize())); r1 = _xmm_broadcast_epi32( lut_lin.get_unchecked(src[src_cn.r_i() + src_channels]._as_usize()), ); g1 = _xmm_broadcast_epi32( lut_lin.get_unchecked(src[src_cn.g_i() + src_channels]._as_usize()), ); b1 = _xmm_broadcast_epi32( lut_lin.get_unchecked(src[src_cn.b_i() + src_channels]._as_usize()), ); dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a0; } dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize]; dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize]; dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst[dst_cn.a_i() + dst_channels] = a1; } a0 = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src[src_cn.a_i() + src_channels] } else { max_colors }; } if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() { let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1); let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1); let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1); zg0 = _mm256_slli_epi32::<16>(zg0); let zrg0 = _mm256_or_si256(zr0, zg0); let zbz0 = _mm256_or_si256(zb0, rnd); let va0 = _mm256_madd_epi16(zrg0, m0); let va1 = _mm256_madd_epi16(zbz0, m2); let mut v0 = _mm256_add_epi32(va0, va1); v0 = _mm256_srai_epi32::(v0); v0 = _mm256_max_epi32(v0, zeros); v0 = _mm256_min_epi32(v0, v_max_value); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0); dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a0; } dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize]; dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize]; dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst[dst_cn.a_i() + dst_channels] = a1; } } let src = src.chunks_exact(src_channels * 2).remainder(); let dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = _xmm_broadcast_epi32(lut_lin.get_unchecked(src[src_cn.r_i()]._as_usize())); let mut g = _xmm_broadcast_epi32(lut_lin.get_unchecked(src[src_cn.g_i()]._as_usize())); let b = _xmm_broadcast_epi32(lut_lin.get_unchecked(src[src_cn.b_i()]._as_usize())); g = _mm_slli_epi32::<16>(g); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let zrg0 = _mm_or_si128(r, g); let zbz0 = _mm_or_si128(b, _mm256_castsi256_si128(rnd)); let v0 = _mm_madd_epi16(zrg0, _mm256_castsi256_si128(m0)); let v1 = _mm_madd_epi16(zbz0, _mm256_castsi256_si128(m2)); let mut v = _mm_add_epi32(v0, v1); v = _mm_srai_epi32::(v); v = _mm_max_epi32(v, _mm_setzero_si128()); v = _mm_min_epi32(v, _mm256_castsi256_si128(v_max_value)); _mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, v); dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Copy + PointeeSizeExpressible + 'static + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformExecutor for TransformShaperRgbQ2_13OptAvx where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { self.transform_avx2(src, dst) } } } moxcms-0.7.7/src/conversions/avx/t_lut3_to_3.rs000064400000000000000000000307621046102023000176160ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::avx::interpolator::*; use crate::conversions::avx::interpolator_q0_15::AvxAlignedI16; use crate::conversions::avx::t_lut3_to_3_q0_15::TransformLut3x3AvxQ0_15; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::lut_transforms::Lut3x3Factory; use crate::transform::PointeeSizeExpressible; use crate::{ BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; use std::arch::x86_64::*; use std::marker::PhantomData; struct TransformLut3x3AvxFma< T, U, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { lut: Vec, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: InterpolationMethod, weights: Box<[BarycentricWeight; BINS]>, color_space: DataColorSpace, is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut3x3AvxFma where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[target_feature(enable = "avx2", enable = "fma")] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) }; let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let x = <() as LutBarycentricReduction>::reduce::( src[src_cn.r_i()], ); let y = <() as LutBarycentricReduction>::reduce::( src[src_cn.g_i()], ); let z = <() as LutBarycentricReduction>::reduce::( src[src_cn.b_i()], ); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_value }; let v = interpolator.inter3_sse( &self.lut, x.as_(), y.as_(), z.as_(), self.weights.as_slice(), ); if T::FINITE { unsafe { let mut r = _mm_mul_ps(v.v, value_scale); r = _mm_max_ps(r, _mm_setzero_ps()); r = _mm_min_ps(r, value_scale); let jvz = _mm_cvtps_epi32(r); let x = _mm_extract_epi32::<0>(jvz); let y = _mm_extract_epi32::<1>(jvz); let z = _mm_extract_epi32::<2>(jvz); dst[dst_cn.r_i()] = (x as u32).as_(); dst[dst_cn.g_i()] = (y as u32).as_(); dst[dst_cn.b_i()] = (z as u32).as_(); } } else { unsafe { dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v.v) as u32).as_(); dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v.v) as u32).as_(); dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v.v) as u32).as_(); } } if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut3x3AvxFma< T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS, > where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / src_channels; let dst_chunks = dst.len() / dst_channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearAvxFma:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk(src, dst, Box::new(TetrahedralAvxFma:: {})); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk(src, dst, Box::new(PyramidalAvxFma:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk(src, dst, Box::new(PrismaticAvxFma:: {})); } InterpolationMethod::Linear => { self.transform_chunk(src, dst, Box::new(TrilinearAvxFma:: {})); } } } } Ok(()) } } pub(crate) struct AvxLut3x3Factory {} impl Lut3x3Factory for AvxLut3x3Factory { fn make_transform_3x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Send + Sync> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { if options.prefer_fixed_point && BIT_DEPTH < 16 { let q: f32 = if T::FINITE { ((1i32 << BIT_DEPTH as i32) - 1) as f32 } else { ((1i32 << 14i32) - 1) as f32 }; let lut = lut .chunks_exact(3) .map(|x| { AvxAlignedI16([ (x[0] * q).round() as i16, (x[1] * q).round() as i16, (x[2] * q).round() as i16, 0, ]) }) .collect::>(); return match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut3x3AvxQ0_15::< T, u8, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut3x3AvxQ0_15::< T, u16, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), }; } assert!( std::arch::is_x86_feature_detected!("fma"), "Internal configuration error, this might not be called without `fma` feature" ); let lut = lut .chunks_exact(3) .map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32])) .collect::>(); match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut3x3AvxFma::< T, u8, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut3x3AvxFma::< T, u16, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), } } } moxcms-0.7.7/src/conversions/avx/t_lut3_to_3_q0_15.rs000064400000000000000000000207641046102023000205240ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::avx::interpolator_q0_15::*; use crate::conversions::interpolator::BarycentricWeight; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::x86_64::*; use std::marker::PhantomData; pub(crate) struct TransformLut3x3AvxQ0_15< T, U, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { pub(crate) lut: Vec, pub(crate) _phantom: PhantomData, pub(crate) _phantom2: PhantomData, pub(crate) interpolation_method: InterpolationMethod, pub(crate) weights: Box<[BarycentricWeight; BINS]>, pub(crate) color_space: DataColorSpace, pub(crate) is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut3x3AvxQ0_15< T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS, > where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[target_feature(enable = "avx2")] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { unsafe { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32); let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); let v_max_scale = if T::FINITE { _mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16) } else { _mm_set1_epi16(((1i32 << 14i32) - 1) as i16) }; for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let x = <() as LutBarycentricReduction>::reduce::( src[src_cn.r_i()], ); let y = <() as LutBarycentricReduction>::reduce::( src[src_cn.g_i()], ); let z = <() as LutBarycentricReduction>::reduce::( src[src_cn.b_i()], ); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_value }; let v = interpolator.inter3_sse( &self.lut, x.as_(), y.as_(), z.as_(), self.weights.as_slice(), ); if T::FINITE { let mut o = _mm_max_epi16(v.v, _mm_setzero_si128()); o = _mm_min_epi16(o, v_max_scale); let x = _mm_extract_epi16::<0>(o); let y = _mm_extract_epi16::<1>(o); let z = _mm_extract_epi16::<2>(o); dst[dst_cn.r_i()] = (x as u32).as_(); dst[dst_cn.g_i()] = (y as u32).as_(); dst[dst_cn.b_i()] = (z as u32).as_(); } else { let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v.v)); r = _mm_mul_ps(r, f_value_scale); dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_(); dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_(); dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_(); } if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut3x3AvxQ0_15< T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS, > where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / src_channels; let dst_chunks = dst.len() / dst_channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearAvxQ0_15:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk( src, dst, Box::new(TetrahedralAvxQ0_15:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk(src, dst, Box::new(PyramidalAvxQ0_15:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk(src, dst, Box::new(PrismaticAvxQ0_15:: {})); } InterpolationMethod::Linear => { self.transform_chunk(src, dst, Box::new(TrilinearAvxQ0_15:: {})); } } } } Ok(()) } } moxcms-0.7.7/src/conversions/avx512/mod.rs000064400000000000000000000034321046102023000164610ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 5/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ mod rgb_xyz_opt; mod rgb_xyz_q2_13_opt; pub(crate) use rgb_xyz_opt::TransformShaperRgbOptAvx512; pub(crate) use rgb_xyz_q2_13_opt::TransformShaperRgbQ2_13OptAvx512; moxcms-0.7.7/src/conversions/avx512/rgb_xyz_opt.rs000064400000000000000000000454161046102023000202600ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 5/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::TransformMatrixShaperOptimized; use crate::conversions::avx512::rgb_xyz_q2_13_opt::{ AvxAlignedU16, split_by_twos, split_by_twos_mut, }; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::x86_64::*; pub(crate) struct TransformShaperRgbOptAvx512< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > { pub(crate) profile: TransformMatrixShaperOptimized, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > TransformShaperRgbOptAvx512 where u32: AsPrimitive, { #[target_feature(enable = "avx512bw", enable = "avx512vl", enable = "fma")] unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let scale = (self.gamma_lut - 1) as f32; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); let (src_chunks, src_remainder) = split_by_twos(src, src_channels); let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels); let mut temporary0 = AvxAlignedU16([0; 16]); let mut temporary1 = AvxAlignedU16([0; 16]); unsafe { let m0 = _mm256_setr_ps( t.v[0][0], t.v[0][1], t.v[0][2], 0f32, t.v[0][0], t.v[0][1], t.v[0][2], 0f32, ); let m1 = _mm256_setr_ps( t.v[1][0], t.v[1][1], t.v[1][2], 0f32, t.v[1][0], t.v[1][1], t.v[1][2], 0f32, ); let m2 = _mm256_setr_ps( t.v[2][0], t.v[2][1], t.v[2][2], 0f32, t.v[2][0], t.v[2][1], t.v[2][2], 0f32, ); let zeros = _mm_setzero_ps(); let v_scale = _mm256_set1_ps(scale); if !src_chunks.is_empty() { let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2); let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2); let mut src_iter0 = src0.chunks_exact(src_channels * 2); let mut src_iter1 = src1.chunks_exact(src_channels * 2); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); let (mut r2, mut g2, mut b2, mut a2); let (mut r3, mut g3, mut b3, mut a3); if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) { r0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]); g0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]); b0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]); r1 = _mm_broadcast_ss( &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()], ); g1 = _mm_broadcast_ss( &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()], ); b1 = _mm_broadcast_ss( &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()], ); r2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]); g2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]); b2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]); r3 = _mm_broadcast_ss( &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()], ); g3 = _mm_broadcast_ss( &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()], ); b3 = _mm_broadcast_ss( &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()], ); a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = _mm_setzero_ps(); g0 = _mm_setzero_ps(); b0 = _mm_setzero_ps(); a0 = max_colors; r1 = _mm_setzero_ps(); g1 = _mm_setzero_ps(); b1 = _mm_setzero_ps(); a1 = max_colors; r2 = _mm_setzero_ps(); g2 = _mm_setzero_ps(); b2 = _mm_setzero_ps(); a2 = max_colors; r3 = _mm_setzero_ps(); g3 = _mm_setzero_ps(); b3 = _mm_setzero_ps(); a3 = max_colors; } for (((src0, src1), dst0), dst1) in src_iter0 .zip(src_iter1) .zip(dst0.chunks_exact_mut(dst_channels * 2)) .zip(dst1.chunks_exact_mut(dst_channels * 2)) { let rz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1); let gz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1); let bz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1); let rz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r2), r3); let gz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g2), g3); let bz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b2), b3); let v0 = _mm256_mul_ps(rz0, m0); let v1 = _mm256_fmadd_ps(gz0, m1, v0); let mut vz0 = _mm256_fmadd_ps(bz0, m2, v1); let v2 = _mm256_mul_ps(rz1, m0); let v3 = _mm256_fmadd_ps(gz1, m1, v2); let mut vz1 = _mm256_fmadd_ps(bz1, m2, v3); vz0 = _mm256_max_ps(vz0, _mm256_setzero_ps()); vz0 = _mm256_mul_ps(vz0, v_scale); vz0 = _mm256_min_ps(vz0, v_scale); vz1 = _mm256_max_ps(vz1, _mm256_setzero_ps()); vz1 = _mm256_mul_ps(vz1, v_scale); vz1 = _mm256_min_ps(vz1, v_scale); let zx0 = _mm256_cvtps_epi32(vz0); let zx1 = _mm256_cvtps_epi32(vz1); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx0); _mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, zx1); r0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]); g0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]); b0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]); r1 = _mm_broadcast_ss( &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()], ); g1 = _mm_broadcast_ss( &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()], ); b1 = _mm_broadcast_ss( &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()], ); r2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]); g2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]); b2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]); r3 = _mm_broadcast_ss( &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()], ); g3 = _mm_broadcast_ss( &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()], ); b3 = _mm_broadcast_ss( &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()], ); dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary1.0[8] as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary1.0[10] as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary1.0[12] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } a0 = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } if let (Some(dst0), Some(dst1)) = ( dst0.chunks_exact_mut(dst_channels * 2).last(), dst1.chunks_exact_mut(dst_channels * 2).last(), ) { let rz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1); let gz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1); let bz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1); let rz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r2), r3); let gz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g2), g3); let bz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b2), b3); let v0 = _mm256_mul_ps(rz0, m0); let v1 = _mm256_fmadd_ps(gz0, m1, v0); let mut vz0 = _mm256_fmadd_ps(bz0, m2, v1); let v2 = _mm256_mul_ps(rz1, m0); let v3 = _mm256_fmadd_ps(gz1, m1, v2); let mut vz1 = _mm256_fmadd_ps(bz1, m2, v3); vz0 = _mm256_max_ps(vz0, _mm256_setzero_ps()); vz0 = _mm256_mul_ps(vz0, v_scale); vz0 = _mm256_min_ps(vz0, v_scale); vz1 = _mm256_max_ps(vz1, _mm256_setzero_ps()); vz1 = _mm256_mul_ps(vz1, v_scale); vz1 = _mm256_min_ps(vz1, v_scale); let zx0 = _mm256_cvtps_epi32(vz0); let zx1 = _mm256_cvtps_epi32(vz1); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx0); _mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, zx1); dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary1.0[8] as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary1.0[10] as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary1.0[12] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } } } for (src, dst) in src_remainder .chunks_exact(src_channels) .zip(dst_remainder.chunks_exact_mut(dst_channels)) { let r = _mm_broadcast_ss(&self.profile.linear[src[src_cn.r_i()]._as_usize()]); let g = _mm_broadcast_ss(&self.profile.linear[src[src_cn.g_i()]._as_usize()]); let b = _mm_broadcast_ss(&self.profile.linear[src[src_cn.b_i()]._as_usize()]); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0)); let v1 = _mm_fmadd_ps(g, _mm256_castps256_ps128(m1), v0); let mut v = _mm_fmadd_ps(b, _mm256_castps256_ps128(m2), v1); v = _mm_max_ps(v, zeros); v = _mm_mul_ps(v, _mm256_castps256_ps128(v_scale)); v = _mm_min_ps(v, _mm256_castps256_ps128(v_scale)); let zx = _mm_cvtps_epi32(v); _mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, zx); dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > TransformExecutor for TransformShaperRgbOptAvx512 where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { self.transform_impl(src, dst) } } } moxcms-0.7.7/src/conversions/avx512/rgb_xyz_q2_13_opt.rs000064400000000000000000000507361046102023000211660ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::x86_64::*; pub(crate) struct TransformShaperRgbQ2_13OptAvx512< T: Copy, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, const PRECISION: i32, > { pub(crate) profile: TransformMatrixShaperFixedPointOpt, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } #[inline(always)] pub(crate) unsafe fn _xmm_broadcast_epi32(f: &i32) -> __m128i { let float_ref: &f32 = unsafe { &*(f as *const i32 as *const f32) }; unsafe { _mm_castps_si128(_mm_broadcast_ss(float_ref)) } } #[repr(align(32), C)] #[derive(Debug)] pub(crate) struct AvxAlignedU16(pub(crate) [u16; 16]); #[inline] pub(crate) fn split_by_twos(data: &[T], channels: usize) -> (&[T], &[T]) { let len = data.len() / (channels * 4); let split_point = len * 4; data.split_at(split_point * channels) } #[inline] pub(crate) fn split_by_twos_mut(data: &mut [T], channels: usize) -> (&mut [T], &mut [T]) { let len = data.len() / (channels * 4); let split_point = len * 4; data.split_at_mut(split_point * channels) } impl< T: Copy + PointeeSizeExpressible + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, const PRECISION: i32, > TransformShaperRgbQ2_13OptAvx512 where u32: AsPrimitive, { #[target_feature(enable = "avx512bw", enable = "avx512vl")] unsafe fn transform_avx512(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let max_colors = ((1 << self.bit_depth) - 1).as_(); // If precision changed in another place it should be also changed here assert_eq!(PRECISION, 13); let (src_chunks, src_remainder) = split_by_twos(src, src_channels); let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels); let mut temporary0 = AvxAlignedU16([0; 16]); let mut temporary1 = AvxAlignedU16([0; 16]); unsafe { let m0 = _mm256_set_epi16( 0, 0, t.v[1][2], t.v[0][2], t.v[1][1], t.v[0][1], t.v[1][0], t.v[0][0], 0, 0, t.v[1][2], t.v[0][2], t.v[1][1], t.v[0][1], t.v[1][0], t.v[0][0], ); let m2 = _mm256_set_epi16( 0, 0, 1, t.v[2][2], 1, t.v[2][1], 1, t.v[2][0], 0, 0, 1, t.v[2][2], 1, t.v[2][1], 1, t.v[2][0], ); let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes(); let rnd = _mm256_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]])); let zeros = _mm256_setzero_si256(); let v_max_value = _mm256_set1_epi32(self.gamma_lut as i32 - 1); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); let (mut r2, mut g2, mut b2, mut a2); let (mut r3, mut g3, mut b3, mut a3); if !src_chunks.is_empty() { let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2); let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2); let mut src_iter0 = src0.chunks_exact(src_channels * 2); let mut src_iter1 = src1.chunks_exact(src_channels * 2); if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) { r0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]); g0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]); b0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]); r1 = _xmm_broadcast_epi32( &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()], ); g1 = _xmm_broadcast_epi32( &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()], ); b1 = _xmm_broadcast_epi32( &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()], ); r2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]); g2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]); b2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]); r3 = _xmm_broadcast_epi32( &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()], ); g3 = _xmm_broadcast_epi32( &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()], ); b3 = _xmm_broadcast_epi32( &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()], ); a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = _mm_setzero_si128(); g0 = _mm_setzero_si128(); b0 = _mm_setzero_si128(); a0 = max_colors; r1 = _mm_setzero_si128(); g1 = _mm_setzero_si128(); b1 = _mm_setzero_si128(); a1 = max_colors; r2 = _mm_setzero_si128(); g2 = _mm_setzero_si128(); b2 = _mm_setzero_si128(); a2 = max_colors; r3 = _mm_setzero_si128(); g3 = _mm_setzero_si128(); b3 = _mm_setzero_si128(); a3 = max_colors; } for (((src0, src1), dst0), dst1) in src_iter0 .zip(src_iter1) .zip(dst0.chunks_exact_mut(dst_channels * 2)) .zip(dst1.chunks_exact_mut(dst_channels * 2)) { let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1); let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1); let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1); zg0 = _mm256_slli_epi32::<16>(zg0); let zr1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r2), r3); let mut zg1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g2), g3); let zb1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b2), b3); zg1 = _mm256_slli_epi32::<16>(zg1); let zrg0 = _mm256_or_si256(zr0, zg0); let zbz0 = _mm256_or_si256(zb0, rnd); let zrg1 = _mm256_or_si256(zr1, zg1); let zbz1 = _mm256_or_si256(zb1, rnd); let va0 = _mm256_madd_epi16(zrg0, m0); let va1 = _mm256_madd_epi16(zbz0, m2); let va2 = _mm256_madd_epi16(zrg1, m0); let va3 = _mm256_madd_epi16(zbz1, m2); let mut v0 = _mm256_add_epi32(va0, va1); let mut v1 = _mm256_add_epi32(va2, va3); v0 = _mm256_srai_epi32::(v0); v0 = _mm256_max_epi32(v0, zeros); v0 = _mm256_min_epi32(v0, v_max_value); v1 = _mm256_srai_epi32::(v1); v1 = _mm256_max_epi32(v1, zeros); v1 = _mm256_min_epi32(v1, v_max_value); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0); _mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, v1); r0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]); g0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]); b0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]); r1 = _xmm_broadcast_epi32( &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()], ); g1 = _xmm_broadcast_epi32( &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()], ); b1 = _xmm_broadcast_epi32( &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()], ); r2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]); g2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]); b2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]); r3 = _xmm_broadcast_epi32( &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()], ); g3 = _xmm_broadcast_epi32( &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()], ); b3 = _xmm_broadcast_epi32( &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()], ); dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary1.0[8] as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary1.0[10] as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary1.0[12] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } if let (Some(dst0), Some(dst1)) = ( dst0.chunks_exact_mut(dst_channels * 2).last(), dst1.chunks_exact_mut(dst_channels * 2).last(), ) { let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1); let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1); let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1); zg0 = _mm256_slli_epi32::<16>(zg0); let zr1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r2), r3); let mut zg1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g2), g3); let zb1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b2), b3); zg1 = _mm256_slli_epi32::<16>(zg1); let zrg0 = _mm256_or_si256(zr0, zg0); let zbz0 = _mm256_or_si256(zb0, rnd); let zrg1 = _mm256_or_si256(zr1, zg1); let zbz1 = _mm256_or_si256(zb1, rnd); let va0 = _mm256_madd_epi16(zrg0, m0); let va1 = _mm256_madd_epi16(zbz0, m2); let va2 = _mm256_madd_epi16(zrg1, m0); let va3 = _mm256_madd_epi16(zbz1, m2); let mut v0 = _mm256_add_epi32(va0, va1); let mut v1 = _mm256_add_epi32(va2, va3); v0 = _mm256_srai_epi32::(v0); v0 = _mm256_max_epi32(v0, zeros); v0 = _mm256_min_epi32(v0, v_max_value); v1 = _mm256_srai_epi32::(v1); v1 = _mm256_max_epi32(v1, zeros); v1 = _mm256_min_epi32(v1, v_max_value); _mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0); _mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, v1); dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary1.0[8] as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary1.0[10] as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary1.0[12] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } } } for (src, dst) in src_remainder .chunks_exact(src_channels) .zip(dst_remainder.chunks_exact_mut(dst_channels)) { let r = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.r_i()]._as_usize()]); let mut g = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.g_i()]._as_usize()]); let b = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.b_i()]._as_usize()]); g = _mm_slli_epi32::<16>(g); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let zrg0 = _mm_or_si128(r, g); let zbz0 = _mm_or_si128(b, _mm256_castsi256_si128(rnd)); let v0 = _mm_madd_epi16(zrg0, _mm256_castsi256_si128(m0)); let v1 = _mm_madd_epi16(zbz0, _mm256_castsi256_si128(m2)); let mut v = _mm_add_epi32(v0, v1); v = _mm_srai_epi32::(v); v = _mm_max_epi32(v, _mm_setzero_si128()); v = _mm_min_epi32(v, _mm256_castsi256_si128(v_max_value)); _mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, v); dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Copy + PointeeSizeExpressible + 'static + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, const PRECISION: i32, > TransformExecutor for TransformShaperRgbQ2_13OptAvx512 where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { self.transform_avx512(src, dst) } } } moxcms-0.7.7/src/conversions/bpc.rs000064400000000000000000000123371046102023000154240ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // use crate::conversions::interpolator::{MultidimensionalInterpolation, Tetrahedral}; // use crate::conversions::transform_lut4_to_4::{NonFiniteVector3fLerp, Vector3fCmykLerp}; // use crate::mlaf::mlaf; // use crate::{Chromaticity, ColorProfile, DataColorSpace, Lab, Xyz}; // // impl ColorProfile { // #[inline] // pub(crate) fn detect_black_point(&self, lut: &[f32]) -> Option { // if self.color_space == DataColorSpace::Cmyk { // // if let Some(mut bp) = self.black_point { // // if let Some(wp) = self.media_white_point.map(|x| x.normalize()) { // // if wp != Chromaticity::D50.to_xyz() { // // let ad = adaption_matrix(wp, Chromaticity::D50.to_xyz()); // // let v = ad.mul_vector(bp.to_vector()); // // bp = Xyz { // // x: v.v[0], // // y: v.v[1], // // z: v.v[2], // // }; // // } // // } // // let mut lab = Lab::from_xyz(bp); // // lab.a = 0.; // // lab.b = 0.; // // if lab.l > 50. { // // lab.l = 50.; // // } // // bp = lab.to_xyz(); // // return Some(bp); // // } // let c = 65535; // let m = 65535; // let y = 65535; // let k = 65535; // // let linear_k: f32 = k as f32 * (1. / 65535.); // let w: i32 = k * (GRID_SIZE as i32 - 1) / 65535; // let w_n: i32 = (w + 1).min(GRID_SIZE as i32 - 1); // let t: f32 = linear_k * (GRID_SIZE as i32 - 1) as f32 - w as f32; // // let grid_size = GRID_SIZE as i32; // let grid_size3 = grid_size * grid_size * grid_size; // // let table1 = &lut[(w * grid_size3 * 3) as usize..]; // let table2 = &lut[(w_n * grid_size3 * 3) as usize..]; // // let tetrahedral1 = Tetrahedral::::new(table1); // let tetrahedral2 = Tetrahedral::::new(table2); // let r1 = tetrahedral1.inter3(c, m, y); // let r2 = tetrahedral2.inter3(c, m, y); // let r = NonFiniteVector3fLerp::interpolate(r1, r2, t, 1.0); // // let mut lab = Lab::from_xyz(Xyz { // x: r.v[0], // y: r.v[1], // z: r.v[2], // }); // lab.a = 0.; // lab.b = 0.; // if lab.l > 50. { // lab.l = 50.; // } // let bp = lab.to_xyz(); // // return Some(bp); // } // if self.color_space == DataColorSpace::Rgb { // return Some(Xyz::new(0.0, 0.0, 0.0)); // } // None // } // } // // pub(crate) fn compensate_bpc_in_lut(lut_xyz: &mut [f32], src_bp: Xyz, dst_bp: Xyz) { // const WP_50: Xyz = Chromaticity::D50.to_xyz(); // let tx = src_bp.x - WP_50.x; // let ty = src_bp.y - WP_50.y; // let tz = src_bp.z - WP_50.z; // let ax = (dst_bp.x - WP_50.x) / tx; // let ay = (dst_bp.y - WP_50.y) / ty; // let az = (dst_bp.z - WP_50.z) / tz; // // let bx = -WP_50.x * (dst_bp.x - src_bp.x) / tx; // let by = -WP_50.y * (dst_bp.y - src_bp.y) / ty; // let bz = -WP_50.z * (dst_bp.z - src_bp.z) / tz; // // for dst in lut_xyz.chunks_exact_mut(3) { // dst[0] = mlaf(bx, dst[0], ax); // dst[1] = mlaf(by, dst[1], ay); // dst[2] = mlaf(bz, dst[2], az); // } // } moxcms-0.7.7/src/conversions/gray2rgb.rs000064400000000000000000000316241046102023000163770ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; #[derive(Clone)] struct TransformGray2RgbFusedExecutor { fused_gamma: Box<[T; 65536]>, bit_depth: usize, } pub(crate) fn make_gray_to_x< T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync, const BUCKET: usize, >( src_layout: Layout, dst_layout: Layout, gray_linear: &[f32; BUCKET], gray_gamma: &[T; 65536], bit_depth: usize, gamma_lut: usize, ) -> Result + Sync + Send>, CmsError> where u32: AsPrimitive, { if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha { return Err(CmsError::UnsupportedProfileConnection); } let mut fused_gamma = Box::new([T::default(); 65536]); let max_lut_size = (gamma_lut - 1) as f32; for (&src, dst) in gray_linear.iter().zip(fused_gamma.iter_mut()) { let possible_value = ((src * max_lut_size).round() as u32).min(max_lut_size as u32) as u16; *dst = gray_gamma[possible_value as usize]; } match src_layout { Layout::Gray => match dst_layout { Layout::Rgb => Ok(Box::new(TransformGray2RgbFusedExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgb as u8 }, > { fused_gamma, bit_depth, })), Layout::Rgba => Ok(Box::new(TransformGray2RgbFusedExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgba as u8 }, > { fused_gamma, bit_depth, })), Layout::Gray => Ok(Box::new(TransformGray2RgbFusedExecutor::< T, { Layout::Gray as u8 }, { Layout::Gray as u8 }, > { fused_gamma, bit_depth, })), Layout::GrayAlpha => Ok(Box::new(TransformGray2RgbFusedExecutor::< T, { Layout::Gray as u8 }, { Layout::GrayAlpha as u8 }, > { fused_gamma, bit_depth, })), _ => unreachable!(), }, Layout::GrayAlpha => match dst_layout { Layout::Rgb => Ok(Box::new(TransformGray2RgbFusedExecutor::< T, { Layout::Gray as u8 }, { Layout::GrayAlpha as u8 }, > { fused_gamma, bit_depth, })), Layout::Rgba => Ok(Box::new(TransformGray2RgbFusedExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgba as u8 }, > { fused_gamma, bit_depth, })), Layout::Gray => Ok(Box::new(TransformGray2RgbFusedExecutor::< T, { Layout::Gray as u8 }, { Layout::Gray as u8 }, > { fused_gamma, bit_depth, })), Layout::GrayAlpha => Ok(Box::new(TransformGray2RgbFusedExecutor::< T, { Layout::GrayAlpha as u8 }, { Layout::GrayAlpha as u8 }, > { fused_gamma, bit_depth, })), _ => unreachable!(), }, _ => Err(CmsError::UnsupportedProfileConnection), } } impl< T: Copy + Default + PointeeSizeExpressible + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformGray2RgbFusedExecutor where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let is_gray_alpha = src_cn == Layout::GrayAlpha; let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let g = self.fused_gamma[src[0]._as_usize()]; let a = if is_gray_alpha { src[1] } else { max_value }; dst[0] = g; if dst_cn == Layout::GrayAlpha { dst[1] = a; } else if dst_cn == Layout::Rgb { dst[1] = g; dst[2] = g; } else if dst_cn == Layout::Rgba { dst[1] = g; dst[2] = g; dst[3] = a; } } Ok(()) } } #[derive(Clone)] struct TransformGrayToRgbExecutor { gray_linear: Box<[f32; 65536]>, red_gamma: Box<[T; 65536]>, green_gamma: Box<[T; 65536]>, blue_gamma: Box<[T; 65536]>, bit_depth: usize, gamma_lut: usize, } #[allow(clippy::too_many_arguments)] pub(crate) fn make_gray_to_unfused< T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync, const BUCKET: usize, >( src_layout: Layout, dst_layout: Layout, gray_linear: Box<[f32; 65536]>, red_gamma: Box<[T; 65536]>, green_gamma: Box<[T; 65536]>, blue_gamma: Box<[T; 65536]>, bit_depth: usize, gamma_lut: usize, ) -> Result + Sync + Send>, CmsError> where u32: AsPrimitive, { if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha { return Err(CmsError::UnsupportedProfileConnection); } if dst_layout != Layout::Rgb && dst_layout != Layout::Rgba { return Err(CmsError::UnsupportedProfileConnection); } match src_layout { Layout::Gray => match dst_layout { Layout::Rgb => Ok(Box::new(TransformGrayToRgbExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgb as u8 }, > { gray_linear, red_gamma, green_gamma, blue_gamma, bit_depth, gamma_lut, })), Layout::Rgba => Ok(Box::new(TransformGrayToRgbExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgba as u8 }, > { gray_linear, red_gamma, green_gamma, blue_gamma, bit_depth, gamma_lut, })), Layout::Gray => Ok(Box::new(TransformGrayToRgbExecutor::< T, { Layout::Gray as u8 }, { Layout::Gray as u8 }, > { gray_linear, red_gamma, green_gamma, blue_gamma, bit_depth, gamma_lut, })), Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExecutor::< T, { Layout::Gray as u8 }, { Layout::GrayAlpha as u8 }, > { gray_linear, red_gamma, green_gamma, blue_gamma, bit_depth, gamma_lut, })), _ => Err(CmsError::UnsupportedProfileConnection), }, Layout::GrayAlpha => match dst_layout { Layout::Rgb => Ok(Box::new(TransformGrayToRgbExecutor::< T, { Layout::Gray as u8 }, { Layout::GrayAlpha as u8 }, > { gray_linear, red_gamma, green_gamma, blue_gamma, bit_depth, gamma_lut, })), Layout::Rgba => Ok(Box::new(TransformGrayToRgbExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgba as u8 }, > { gray_linear, red_gamma, green_gamma, blue_gamma, bit_depth, gamma_lut, })), Layout::Gray => Ok(Box::new(TransformGrayToRgbExecutor::< T, { Layout::Gray as u8 }, { Layout::Gray as u8 }, > { gray_linear, red_gamma, green_gamma, blue_gamma, bit_depth, gamma_lut, })), Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExecutor::< T, { Layout::GrayAlpha as u8 }, { Layout::GrayAlpha as u8 }, > { gray_linear, red_gamma, green_gamma, blue_gamma, bit_depth, gamma_lut, })), _ => Err(CmsError::UnsupportedProfileConnection), }, _ => Err(CmsError::UnsupportedProfileConnection), } } impl< T: Copy + Default + PointeeSizeExpressible + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformGrayToRgbExecutor where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let is_gray_alpha = src_cn == Layout::GrayAlpha; let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_(); let max_lut_size = (self.gamma_lut - 1) as f32; for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let g = self.gray_linear[src[0]._as_usize()]; let a = if is_gray_alpha { src[1] } else { max_value }; let possible_value = ((g * max_lut_size).round() as u16) as usize; let red_value = self.red_gamma[possible_value]; let green_value = self.green_gamma[possible_value]; let blue_value = self.blue_gamma[possible_value]; if dst_cn == Layout::Rgb { dst[0] = red_value; dst[1] = green_value; dst[2] = blue_value; } else if dst_cn == Layout::Rgba { dst[0] = red_value; dst[1] = green_value; dst[2] = blue_value; dst[3] = a; } else { return Err(CmsError::UnsupportedProfileConnection); } } Ok(()) } } moxcms-0.7.7/src/conversions/gray2rgb_extended.rs000064400000000000000000000322701046102023000202550ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::transform::PointeeSizeExpressible; use crate::trc::ToneCurveEvaluator; use crate::{CmsError, Layout, Rgb, TransformExecutor}; use num_traits::AsPrimitive; use std::marker::PhantomData; struct TransformGrayOneToOneExecutor { linear_eval: Box, gamma_eval: Box, _phantom: PhantomData, bit_depth: usize, } pub(crate) fn make_gray_to_one_trc_extended< T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync + AsPrimitive, >( src_layout: Layout, dst_layout: Layout, linear_eval: Box, gamma_eval: Box, bit_depth: usize, ) -> Result + Sync + Send>, CmsError> where u32: AsPrimitive, f32: AsPrimitive, { if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha { return Err(CmsError::UnsupportedProfileConnection); } match src_layout { Layout::Gray => match dst_layout { Layout::Rgb => Ok(Box::new(TransformGrayOneToOneExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgb as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::Rgba => Ok(Box::new(TransformGrayOneToOneExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgba as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::Gray => Ok(Box::new(TransformGrayOneToOneExecutor::< T, { Layout::Gray as u8 }, { Layout::Gray as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::GrayAlpha => Ok(Box::new(TransformGrayOneToOneExecutor::< T, { Layout::Gray as u8 }, { Layout::GrayAlpha as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), _ => unreachable!(), }, Layout::GrayAlpha => match dst_layout { Layout::Rgb => Ok(Box::new(TransformGrayOneToOneExecutor::< T, { Layout::Gray as u8 }, { Layout::GrayAlpha as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::Rgba => Ok(Box::new(TransformGrayOneToOneExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgba as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::Gray => Ok(Box::new(TransformGrayOneToOneExecutor::< T, { Layout::Gray as u8 }, { Layout::Gray as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::GrayAlpha => Ok(Box::new(TransformGrayOneToOneExecutor::< T, { Layout::GrayAlpha as u8 }, { Layout::GrayAlpha as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), _ => unreachable!(), }, _ => Err(CmsError::UnsupportedProfileConnection), } } impl< T: Copy + Default + PointeeSizeExpressible + 'static + AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformGrayOneToOneExecutor where u32: AsPrimitive, f32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let is_gray_alpha = src_cn == Layout::GrayAlpha; let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let linear_value = self.linear_eval.evaluate_value(src[0].as_()); let g = self.gamma_eval.evaluate_value(linear_value).as_(); let a = if is_gray_alpha { src[1] } else { max_value }; dst[0] = g; if dst_cn == Layout::GrayAlpha { dst[1] = a; } else if dst_cn == Layout::Rgb { dst[1] = g; dst[2] = g; } else if dst_cn == Layout::Rgba { dst[1] = g; dst[2] = g; dst[3] = a; } } Ok(()) } } struct TransformGrayToRgbExtendedExecutor { linear_eval: Box, gamma_eval: Box, _phantom: PhantomData, bit_depth: usize, } pub(crate) fn make_gray_to_rgb_extended< T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync + AsPrimitive, >( src_layout: Layout, dst_layout: Layout, linear_eval: Box, gamma_eval: Box, bit_depth: usize, ) -> Result + Sync + Send>, CmsError> where u32: AsPrimitive, f32: AsPrimitive, { if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha { return Err(CmsError::UnsupportedProfileConnection); } if dst_layout != Layout::Rgb && dst_layout != Layout::Rgba { return Err(CmsError::UnsupportedProfileConnection); } match src_layout { Layout::Gray => match dst_layout { Layout::Rgb => Ok(Box::new(TransformGrayToRgbExtendedExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgb as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::Rgba => Ok(Box::new(TransformGrayToRgbExtendedExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgba as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::Gray => Ok(Box::new(TransformGrayToRgbExtendedExecutor::< T, { Layout::Gray as u8 }, { Layout::Gray as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExtendedExecutor::< T, { Layout::Gray as u8 }, { Layout::GrayAlpha as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), _ => Err(CmsError::UnsupportedProfileConnection), }, Layout::GrayAlpha => match dst_layout { Layout::Rgb => Ok(Box::new(TransformGrayToRgbExtendedExecutor::< T, { Layout::Gray as u8 }, { Layout::GrayAlpha as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::Rgba => Ok(Box::new(TransformGrayToRgbExtendedExecutor::< T, { Layout::Gray as u8 }, { Layout::Rgba as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::Gray => Ok(Box::new(TransformGrayToRgbExtendedExecutor::< T, { Layout::Gray as u8 }, { Layout::Gray as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExtendedExecutor::< T, { Layout::GrayAlpha as u8 }, { Layout::GrayAlpha as u8 }, > { linear_eval, gamma_eval, _phantom: PhantomData, bit_depth, })), _ => Err(CmsError::UnsupportedProfileConnection), }, _ => Err(CmsError::UnsupportedProfileConnection), } } impl< T: Copy + Default + PointeeSizeExpressible + 'static + AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformGrayToRgbExtendedExecutor where u32: AsPrimitive, f32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let is_gray_alpha = src_cn == Layout::GrayAlpha; let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let linear_value = self.linear_eval.evaluate_value(src[0].as_()); let a = if is_gray_alpha { src[1] } else { max_value }; let tristimulus = self.gamma_eval.evaluate_tristimulus(Rgb::new( linear_value, linear_value, linear_value, )); let red_value = tristimulus.r.as_(); let green_value = tristimulus.g.as_(); let blue_value = tristimulus.b.as_(); if dst_cn == Layout::Rgb { dst[0] = red_value; dst[1] = green_value; dst[2] = blue_value; } else if dst_cn == Layout::Rgba { dst[0] = red_value; dst[1] = green_value; dst[2] = blue_value; dst[3] = a; } else { return Err(CmsError::UnsupportedProfileConnection); } } Ok(()) } } moxcms-0.7.7/src/conversions/interpolator.rs000064400000000000000000000453561046102023000174110ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #![allow(dead_code)] use crate::conversions::lut_transforms::LUT_SAMPLING; use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd}; use crate::{Vector3f, Vector4f}; use std::ops::{Add, Mul, Sub}; #[cfg(feature = "options")] pub(crate) struct Tetrahedral {} #[cfg(feature = "options")] pub(crate) struct Pyramidal {} #[cfg(feature = "options")] pub(crate) struct Prismatic {} pub(crate) struct Trilinear {} #[derive(Debug, Copy, Clone, Default)] pub(crate) struct BarycentricWeight { pub x: i32, pub x_n: i32, pub w: V, } impl BarycentricWeight { pub(crate) fn create_ranged_256() -> Box<[BarycentricWeight; 256]> { let mut weights = Box::new([BarycentricWeight::default(); 256]); for (index, weight) in weights.iter_mut().enumerate() { const SCALE: f32 = 1.0 / LUT_SAMPLING as f32; let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32; let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1); let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE; let dr = index as f32 * scale - x as f32; *weight = BarycentricWeight { x, x_n, w: dr }; } weights } #[cfg(feature = "options")] pub(crate) fn create_binned() -> Box<[BarycentricWeight; 65536]> { let mut weights = Box::new([BarycentricWeight::::default(); 65536]); let b_scale: f32 = 1.0 / (BINS - 1) as f32; for (index, weight) in weights.iter_mut().enumerate().take(BINS) { let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32; let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1); let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale; let dr = index as f32 * scale - x as f32; *weight = BarycentricWeight { x, x_n, w: dr }; } weights } } #[allow(dead_code)] impl BarycentricWeight { pub(crate) fn create_ranged_256() -> Box<[BarycentricWeight; 256]> { let mut weights = Box::new([BarycentricWeight::default(); 256]); for (index, weight) in weights.iter_mut().enumerate() { const SCALE: f32 = 1.0 / LUT_SAMPLING as f32; let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32; let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1); let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE; const Q: f32 = ((1i32 << 15) - 1) as f32; let dr = ((index as f32 * scale - x as f32) * Q) .round() .min(i16::MAX as f32) .max(-i16::MAX as f32) as i16; *weight = BarycentricWeight { x, x_n, w: dr }; } weights } #[cfg(feature = "options")] pub(crate) fn create_binned() -> Box<[BarycentricWeight; 65536]> { let mut weights = Box::new([BarycentricWeight::::default(); 65536]); let b_scale: f32 = 1.0 / (BINS - 1) as f32; for (index, weight) in weights.iter_mut().enumerate().take(BINS) { let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32; let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1); let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale; const Q: f32 = ((1i32 << 15) - 1) as f32; let dr = ((index as f32 * scale - x as f32) * Q) .round() .min(i16::MAX as f32) .max(-i16::MAX as f32) as i16; *weight = BarycentricWeight { x, x_n, w: dr }; } weights } } trait Fetcher { fn fetch(&self, x: i32, y: i32, z: i32) -> T; } struct TetrahedralFetchVector3f<'a, const GRID_SIZE: usize> { cube: &'a [f32], } pub(crate) trait MultidimensionalInterpolation { fn inter3( &self, cube: &[f32], lut_r: &BarycentricWeight, lut_g: &BarycentricWeight, lut_b: &BarycentricWeight, ) -> Vector3f; fn inter4( &self, cube: &[f32], lut_r: &BarycentricWeight, lut_g: &BarycentricWeight, lut_b: &BarycentricWeight, ) -> Vector4f; } impl Fetcher for TetrahedralFetchVector3f<'_, GRID_SIZE> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> Vector3f { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize * 3; let jx = &self.cube[offset..offset + 3]; Vector3f { v: [jx[0], jx[1], jx[2]], } } } struct TetrahedralFetchVector4f<'a, const GRID_SIZE: usize> { cube: &'a [f32], } impl Fetcher for TetrahedralFetchVector4f<'_, GRID_SIZE> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> Vector4f { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize * 4; let jx = &self.cube[offset..offset + 4]; Vector4f { v: [jx[0], jx[1], jx[2], jx[3]], } } } #[cfg(feature = "options")] impl Tetrahedral { #[inline] fn interpolate< T: Copy + Sub + Mul + Mul + Add + From + FusedMultiplyAdd, >( &self, lut_r: &BarycentricWeight, lut_g: &BarycentricWeight, lut_b: &BarycentricWeight, r: impl Fetcher, ) -> T { let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = r.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z); c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n); c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, T::from(rx)); let s1 = s0.mla(c2, T::from(ry)); s1.mla(c3, T::from(rz)) } } macro_rules! define_md_inter { ($interpolator: ident) => { impl MultidimensionalInterpolation for $interpolator { fn inter3( &self, cube: &[f32], lut_r: &BarycentricWeight, lut_g: &BarycentricWeight, lut_b: &BarycentricWeight, ) -> Vector3f { self.interpolate::( lut_r, lut_g, lut_b, TetrahedralFetchVector3f:: { cube }, ) } fn inter4( &self, cube: &[f32], lut_r: &BarycentricWeight, lut_g: &BarycentricWeight, lut_b: &BarycentricWeight, ) -> Vector4f { self.interpolate::( lut_r, lut_g, lut_b, TetrahedralFetchVector4f:: { cube }, ) } } }; } #[cfg(feature = "options")] define_md_inter!(Tetrahedral); #[cfg(feature = "options")] define_md_inter!(Pyramidal); #[cfg(feature = "options")] define_md_inter!(Prismatic); define_md_inter!(Trilinear); #[cfg(feature = "options")] impl Pyramidal { #[inline] fn interpolate< T: Copy + Sub + Mul + Mul + Add + From + FusedMultiplyAdd, >( &self, lut_r: &BarycentricWeight, lut_g: &BarycentricWeight, lut_b: &BarycentricWeight, r: impl Fetcher, ) -> T { let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); if dr > db && dg > db { let x0 = r.fetch(x_n, y_n, z_n); let x1 = r.fetch(x_n, y_n, z); let x2 = r.fetch(x_n, y, z); let x3 = r.fetch(x, y_n, z); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(dr * dg)) } else if db > dr && dg > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y_n, z_n); let x2 = r.fetch(x, y_n, z_n); let x3 = r.fetch(x, y_n, z); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(dg * db)) } else { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z); let x2 = r.fetch(x_n, y, z_n); let x3 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(db * dr)) } } } #[cfg(feature = "options")] impl Prismatic { #[inline(always)] fn interpolate< T: Copy + Sub + Mul + Mul + Add + From + FusedMultiplyAdd, >( &self, lut_r: &BarycentricWeight, lut_g: &BarycentricWeight, lut_b: &BarycentricWeight, r: impl Fetcher, ) -> T { let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); if db >= dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x, y_n, z_n); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); let s3 = s2.mla(c4, T::from(dg * db)); s3.mla(c5, T::from(dr * dg)) } else { let x0 = r.fetch(x_n, y, z); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x_n, y_n, z); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); let s3 = s2.mla(c4, T::from(dg * db)); s3.mla(c5, T::from(dr * dg)) } } } impl Trilinear { #[inline(always)] fn interpolate< T: Copy + Sub + Mul + Mul + Add + From + FusedMultiplyAdd + FusedMultiplyNegAdd, >( &self, lut_r: &BarycentricWeight, lut_g: &BarycentricWeight, lut_b: &BarycentricWeight, r: impl Fetcher, ) -> T { let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let w0 = T::from(dr); let w1 = T::from(dg); let w2 = T::from(db); let c000 = r.fetch(x, y, z); let c100 = r.fetch(x_n, y, z); let c010 = r.fetch(x, y_n, z); let c110 = r.fetch(x_n, y_n, z); let c001 = r.fetch(x, y, z_n); let c101 = r.fetch(x_n, y, z_n); let c011 = r.fetch(x, y_n, z_n); let c111 = r.fetch(x_n, y_n, z_n); let dx = T::from(dr); let c00 = c000.neg_mla(c000, dx).mla(c100, w0); let c10 = c010.neg_mla(c010, dx).mla(c110, w0); let c01 = c001.neg_mla(c001, dx).mla(c101, w0); let c11 = c011.neg_mla(c011, dx).mla(c111, w0); let dy = T::from(dg); let c0 = c00.neg_mla(c00, dy).mla(c10, w1); let c1 = c01.neg_mla(c01, dy).mla(c11, w1); let dz = T::from(db); c0.neg_mla(c0, dz).mla(c1, w2) } } pub(crate) trait LutBarycentricReduction { fn reduce(v: T) -> U; } impl LutBarycentricReduction for () { #[inline(always)] fn reduce(v: u8) -> u8 { v } } impl LutBarycentricReduction for () { #[inline(always)] fn reduce(v: u8) -> u16 { if BINS == 65536 { return u16::from_ne_bytes([v, v]); } if BINS == 16384 { return u16::from_ne_bytes([v, v]) >> 2; } unimplemented!() } } impl LutBarycentricReduction for () { #[inline(always)] fn reduce(v: f32) -> u8 { (v * 255.).round().min(255.).max(0.) as u8 } } impl LutBarycentricReduction for () { #[inline(always)] fn reduce(v: f32) -> u16 { let scale = (BINS - 1) as f32; (v * scale).round().min(scale).max(0.) as u16 } } impl LutBarycentricReduction for () { #[inline(always)] fn reduce(v: f64) -> u8 { (v * 255.).round().min(255.).max(0.) as u8 } } impl LutBarycentricReduction for () { #[inline(always)] fn reduce(v: f64) -> u16 { let scale = (BINS - 1) as f64; (v * scale).round().min(scale).max(0.) as u16 } } impl LutBarycentricReduction for () { #[inline(always)] fn reduce(v: u16) -> u16 { let src_scale = 1. / ((1 << SRC_BP) - 1) as f32; let scale = src_scale * (BINS - 1) as f32; (v as f32 * scale).round().min(scale).max(0.) as u16 } } impl LutBarycentricReduction for () { #[inline(always)] fn reduce(v: u16) -> u8 { let shift = SRC_BP as u16 - 8; if SRC_BP == 16 { (v >> 8) as u8 } else { (v >> shift).min(255) as u8 } } } moxcms-0.7.7/src/conversions/katana/finalizers.rs000064400000000000000000000114451046102023000202640ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::KatanaPostFinalizationStage; use crate::{CmsError, DataColorSpace, Layout, PointeeSizeExpressible}; use num_traits::AsPrimitive; use std::marker::PhantomData; pub(crate) struct InjectAlphaStage { pub(crate) dst_layout: Layout, pub(crate) target_color_space: DataColorSpace, pub(crate) _phantom: PhantomData, pub(crate) bit_depth: usize, } pub(crate) struct CopyAlphaStage { pub(crate) src_layout: Layout, pub(crate) dst_layout: Layout, pub(crate) target_color_space: DataColorSpace, pub(crate) _phantom: PhantomData, } impl + PointeeSizeExpressible + Send + Sync> KatanaPostFinalizationStage for InjectAlphaStage where f32: AsPrimitive, { fn finalize(&self, _: &[T], dst: &mut [T]) -> Result<(), CmsError> { let norm_value: T = (if T::FINITE { ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }) .as_(); if self.dst_layout == Layout::Rgba && self.target_color_space == DataColorSpace::Rgb { for dst in dst.chunks_exact_mut(self.dst_layout.channels()) { dst[3] = norm_value; } } else if self.dst_layout == Layout::GrayAlpha && self.target_color_space == DataColorSpace::Gray { for dst in dst.chunks_exact_mut(self.dst_layout.channels()) { dst[1] = norm_value; } } Ok(()) } } impl + PointeeSizeExpressible + Send + Sync> KatanaPostFinalizationStage for CopyAlphaStage where f32: AsPrimitive, { fn finalize(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { if self.dst_layout == Layout::Rgba && self.target_color_space == DataColorSpace::Rgb { if self.src_layout == Layout::Rgba { for (src, dst) in src .chunks_exact(self.src_layout.channels()) .zip(dst.chunks_exact_mut(self.dst_layout.channels())) { dst[3] = src[3]; } } else if self.src_layout == Layout::GrayAlpha { for (src, dst) in src .chunks_exact(self.src_layout.channels()) .zip(dst.chunks_exact_mut(self.dst_layout.channels())) { dst[3] = src[1]; } } } else if self.dst_layout == Layout::GrayAlpha && self.target_color_space == DataColorSpace::Gray { if self.src_layout == Layout::Rgba { for (src, dst) in src .chunks_exact(self.src_layout.channels()) .zip(dst.chunks_exact_mut(self.dst_layout.channels())) { dst[1] = src[3]; } } else if self.src_layout == Layout::GrayAlpha { for (src, dst) in src .chunks_exact(self.src_layout.channels()) .zip(dst.chunks_exact_mut(self.dst_layout.channels())) { dst[1] = src[1]; } } } Ok(()) } } moxcms-0.7.7/src/conversions/katana/md3x3.rs000064400000000000000000000424141046102023000170540ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::{KatanaFinalStage, KatanaInitialStage}; use crate::mlaf::mlaf; use crate::safe_math::SafeMul; use crate::trc::lut_interp_linear_float; use crate::{ CmsError, Cube, DataColorSpace, InterpolationMethod, LutMultidimensionalType, MalformedSize, Matrix3d, Matrix3f, PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f, }; use num_traits::AsPrimitive; use std::marker::PhantomData; #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)] pub(crate) enum MultidimensionalDirection { DeviceToPcs, PcsToDevice, } struct Multidimensional3x3< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, > { a_curves: Option; 3]>>, m_curves: Option; 3]>>, b_curves: Option; 3]>>, clut: Option>, matrix: Matrix3f, bias: Vector3f, direction: MultidimensionalDirection, options: TransformOptions, pcs: DataColorSpace, grid_size: [u8; 3], _phantom: PhantomData, bit_depth: usize, } impl + PointeeSizeExpressible + Send + Sync> Multidimensional3x3 { fn execute_matrix_stage(&self, dst: &mut [f32]) { let m = self.matrix; let b = self.bias; if !m.test_equality(Matrix3f::IDENTITY) || !b.eq(&Vector3f::default()) { for dst in dst.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(mlaf(b.v[0], x, m.v[0][0]), y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(mlaf(b.v[1], x, m.v[1][0]), y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(mlaf(b.v[2], x, m.v[2][0]), y, m.v[2][1]), z, m.v[2][2]); } } } fn execute_simple_curves(&self, dst: &mut [f32], curves: &[Vec; 3]) { let curve0 = &curves[0]; let curve1 = &curves[1]; let curve2 = &curves[2]; for dst in dst.chunks_exact_mut(3) { let a0 = dst[0]; let a1 = dst[1]; let a2 = dst[2]; let b0 = lut_interp_linear_float(a0, curve0); let b1 = lut_interp_linear_float(a1, curve1); let b2 = lut_interp_linear_float(a2, curve2); dst[0] = b0; dst[1] = b1; dst[2] = b2; } } fn to_pcs_impl Vector3f>( &self, input: &[T], dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { let norm_value = if T::FINITE { 1.0 / ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; assert_eq!( self.direction, MultidimensionalDirection::DeviceToPcs, "PCS to device cannot be used on `to pcs` stage" ); // A -> B // OR B - A A - curves stage if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) { if !clut.is_empty() { let curve0 = &a_curves[0]; let curve1 = &a_curves[1]; let curve2 = &a_curves[2]; for (src, dst) in input.chunks_exact(3).zip(dst.chunks_exact_mut(3)) { let b0 = lut_interp_linear_float(src[0].as_() * norm_value, curve0); let b1 = lut_interp_linear_float(src[1].as_() * norm_value, curve1); let b2 = lut_interp_linear_float(src[2].as_() * norm_value, curve2); let interpolated = fetch(b0, b1, b2); dst[0] = interpolated.v[0]; dst[1] = interpolated.v[1]; dst[2] = interpolated.v[2]; } } else { for (src, dst) in input.chunks_exact(3).zip(dst.chunks_exact_mut(3)) { dst[0] = src[0].as_() * norm_value; dst[1] = src[1].as_() * norm_value; dst[2] = src[2].as_() * norm_value; } } } else { for (src, dst) in input.chunks_exact(3).zip(dst.chunks_exact_mut(3)) { dst[0] = src[0].as_() * norm_value; dst[1] = src[1].as_() * norm_value; dst[2] = src[2].as_() * norm_value; } } // Matrix stage if let Some(m_curves) = self.m_curves.as_ref() { self.execute_simple_curves(dst, m_curves); self.execute_matrix_stage(dst); } // B-curves is mandatory if let Some(b_curves) = &self.b_curves.as_ref() { self.execute_simple_curves(dst, b_curves); } Ok(()) } } impl + PointeeSizeExpressible + Send + Sync> KatanaInitialStage for Multidimensional3x3 { fn to_pcs(&self, input: &[T]) -> Result, CmsError> { if input.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } let fixed_new_clut = Vec::new(); let new_clut = self.clut.as_ref().unwrap_or(&fixed_new_clut); let lut = Cube::new_cube(new_clut, self.grid_size); let mut new_dst = vec![0f32; input.len()]; // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.trilinear_vec3(x, y, z))?; return Ok(new_dst); } match self.options.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.tetra_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.pyramid_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.prism_vec3(x, y, z))?; } InterpolationMethod::Linear => { self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.trilinear_vec3(x, y, z))?; } } Ok(new_dst) } } impl + PointeeSizeExpressible + Send + Sync> Multidimensional3x3 where f32: AsPrimitive, { fn to_output_impl Vector3f>( &self, src: &mut [f32], dst: &mut [T], fetch: Fetch, ) -> Result<(), CmsError> { let norm_value = if T::FINITE { ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; assert_eq!( self.direction, MultidimensionalDirection::PcsToDevice, "Device to PCS cannot be used on `to output` stage" ); if let Some(b_curves) = &self.b_curves.as_ref() { self.execute_simple_curves(src, b_curves); } // Matrix stage if let Some(m_curves) = self.m_curves.as_ref() { self.execute_matrix_stage(src); self.execute_simple_curves(src, m_curves); } if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) { if !clut.is_empty() { let curve0 = &a_curves[0]; let curve1 = &a_curves[1]; let curve2 = &a_curves[2]; for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) { let b0 = lut_interp_linear_float(src[0], curve0); let b1 = lut_interp_linear_float(src[1], curve1); let b2 = lut_interp_linear_float(src[2], curve2); let interpolated = fetch(b0, b1, b2); if T::FINITE { dst[0] = (interpolated.v[0] * norm_value) .round() .max(0.0) .min(norm_value) .as_(); dst[1] = (interpolated.v[1] * norm_value) .round() .max(0.0) .min(norm_value) .as_(); dst[2] = (interpolated.v[2] * norm_value) .round() .max(0.0) .min(norm_value) .as_(); } else { dst[0] = interpolated.v[0].as_(); dst[1] = interpolated.v[1].as_(); dst[2] = interpolated.v[2].as_(); } } } else { for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) { if T::FINITE { dst[0] = (src[0] * norm_value).round().max(0.0).min(norm_value).as_(); dst[1] = (src[1] * norm_value).round().max(0.0).min(norm_value).as_(); dst[2] = (src[2] * norm_value).round().max(0.0).min(norm_value).as_(); } else { dst[0] = src[0].as_(); dst[1] = src[1].as_(); dst[2] = src[2].as_(); } } } } else { for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) { if T::FINITE { dst[0] = (src[0] * norm_value).round().max(0.0).min(norm_value).as_(); dst[1] = (src[1] * norm_value).round().max(0.0).min(norm_value).as_(); dst[2] = (src[2] * norm_value).round().max(0.0).min(norm_value).as_(); } else { dst[0] = src[0].as_(); dst[1] = src[1].as_(); dst[2] = src[2].as_(); } } } Ok(()) } } impl + PointeeSizeExpressible + Send + Sync> KatanaFinalStage for Multidimensional3x3 where f32: AsPrimitive, { fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> { if src.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if src.len() != dst.len() { return Err(CmsError::LaneSizeMismatch); } let fixed_new_clut = Vec::new(); let new_clut = self.clut.as_ref().unwrap_or(&fixed_new_clut); let lut = Cube::new_cube(new_clut, self.grid_size); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.to_output_impl(src, dst, |x, y, z| lut.trilinear_vec3(x, y, z)); } match self.options.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.to_output_impl(src, dst, |x, y, z| lut.tetra_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.to_output_impl(src, dst, |x, y, z| lut.pyramid_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.to_output_impl(src, dst, |x, y, z| lut.prism_vec3(x, y, z))?; } InterpolationMethod::Linear => { self.to_output_impl(src, dst, |x, y, z| lut.trilinear_vec3(x, y, z))?; } } Ok(()) } } fn make_multidimensional_3x3< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( mab: &LutMultidimensionalType, options: TransformOptions, pcs: DataColorSpace, direction: MultidimensionalDirection, bit_depth: usize, ) -> Result, CmsError> { if mab.num_input_channels != 3 && mab.num_output_channels != 3 { return Err(CmsError::UnsupportedProfileConnection); } if mab.b_curves.is_empty() || mab.b_curves.len() != 3 { return Err(CmsError::InvalidAtoBLut); } let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]]; let clut: Option> = if mab.a_curves.len() == 3 && mab.clut.is_some() { let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap(); let lut_grid = (mab.grid_points[0] as usize) .safe_mul(mab.grid_points[1] as usize)? .safe_mul(mab.grid_points[2] as usize)? .safe_mul(mab.num_output_channels as usize)?; if clut.len() != lut_grid { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: clut.len(), expected: lut_grid, })); } Some(clut) } else { None }; let a_curves: Option; 3]>> = if mab.a_curves.len() == 3 && mab.clut.is_some() { let mut arr = Box::<[Vec; 3]>::default(); for (a_curve, dst) in mab.a_curves.iter().zip(arr.iter_mut()) { *dst = a_curve.to_clut()?; } Some(arr) } else { None }; let b_curves: Option; 3]>> = if mab.b_curves.len() == 3 { let mut arr = Box::<[Vec; 3]>::default(); let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear()); if all_curves_linear { None } else { for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) { *dst = c_curve.to_clut()?; } Some(arr) } } else { return Err(CmsError::InvalidAtoBLut); }; let matrix = mab.matrix.to_f32(); let m_curves: Option; 3]>> = if mab.m_curves.len() == 3 { let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear || !mab.matrix.test_equality(Matrix3d::IDENTITY) || mab.bias.ne(&Vector3d::default()) { let mut arr = Box::<[Vec; 3]>::default(); for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) { *dst = curve.to_clut()?; } Some(arr) } else { None } } else { None }; let bias = mab.bias.cast(); let transform = Multidimensional3x3:: { a_curves, b_curves, m_curves, matrix, direction, options, clut, pcs, grid_size, bias, _phantom: PhantomData, bit_depth, }; Ok(transform) } pub(crate) fn multi_dimensional_3x3_to_pcs< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( mab: &LutMultidimensionalType, options: TransformOptions, pcs: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> { let transform = make_multidimensional_3x3::( mab, options, pcs, MultidimensionalDirection::DeviceToPcs, bit_depth, )?; Ok(Box::new(transform)) } pub(crate) fn multi_dimensional_3x3_to_device< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( mab: &LutMultidimensionalType, options: TransformOptions, pcs: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where f32: AsPrimitive, { let transform = make_multidimensional_3x3::( mab, options, pcs, MultidimensionalDirection::PcsToDevice, bit_depth, )?; Ok(Box::new(transform)) } moxcms-0.7.7/src/conversions/katana/md4x3.rs000064400000000000000000000262011046102023000170510ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::KatanaInitialStage; use crate::conversions::katana::md3x3::MultidimensionalDirection; use crate::mlaf::mlaf; use crate::safe_math::SafeMul; use crate::trc::lut_interp_linear_float; use crate::{ CmsError, DataColorSpace, Hypercube, InterpolationMethod, LutMultidimensionalType, MalformedSize, Matrix3d, Matrix3f, PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f, }; use num_traits::AsPrimitive; use std::marker::PhantomData; pub(crate) fn execute_simple_curves3(dst: &mut [f32], curves: &[Vec; 3]) { let curve0 = &curves[0]; let curve1 = &curves[1]; let curve2 = &curves[2]; for dst in dst.chunks_exact_mut(3) { let a0 = dst[0]; let a1 = dst[1]; let a2 = dst[2]; let b0 = lut_interp_linear_float(a0, curve0); let b1 = lut_interp_linear_float(a1, curve1); let b2 = lut_interp_linear_float(a2, curve2); dst[0] = b0; dst[1] = b1; dst[2] = b2; } } pub(crate) fn execute_matrix_stage3(matrix: Matrix3f, bias: Vector3f, dst: &mut [f32]) { let m = matrix; let b = bias; if !m.test_equality(Matrix3f::IDENTITY) || !b.eq(&Vector3f::default()) { for dst in dst.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(mlaf(b.v[0], x, m.v[0][0]), y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(mlaf(b.v[1], x, m.v[1][0]), y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(mlaf(b.v[2], x, m.v[2][0]), y, m.v[2][1]), z, m.v[2][2]); } } } struct Multidimensional4x3< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, > { a_curves: Option; 4]>>, m_curves: Option; 3]>>, b_curves: Option; 3]>>, clut: Option>, matrix: Matrix3f, bias: Vector3f, direction: MultidimensionalDirection, options: TransformOptions, pcs: DataColorSpace, grid_size: [u8; 4], _phantom: PhantomData, bit_depth: usize, } impl + PointeeSizeExpressible + Send + Sync> Multidimensional4x3 { fn to_pcs_impl Vector3f>( &self, input: &[T], dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { let norm_value = if T::FINITE { 1.0 / ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; assert_eq!( self.direction, MultidimensionalDirection::DeviceToPcs, "PCS to device cannot be used on `to pcs` stage" ); // A -> B // OR B - A A - curves stage if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) { if !clut.is_empty() { let curve0 = &a_curves[0]; let curve1 = &a_curves[1]; let curve2 = &a_curves[2]; let curve3 = &a_curves[3]; for (src, dst) in input.chunks_exact(4).zip(dst.chunks_exact_mut(3)) { let b0 = lut_interp_linear_float(src[0].as_() * norm_value, curve0); let b1 = lut_interp_linear_float(src[1].as_() * norm_value, curve1); let b2 = lut_interp_linear_float(src[2].as_() * norm_value, curve2); let b3 = lut_interp_linear_float(src[3].as_() * norm_value, curve3); let interpolated = fetch(b0, b1, b2, b3); dst[0] = interpolated.v[0]; dst[1] = interpolated.v[1]; dst[2] = interpolated.v[2]; } } } else { return Err(CmsError::InvalidAtoBLut); } // Matrix stage if let Some(m_curves) = self.m_curves.as_ref() { execute_simple_curves3(dst, m_curves); execute_matrix_stage3(self.matrix, self.bias, dst); } // B-curves is mandatory if let Some(b_curves) = &self.b_curves.as_ref() { execute_simple_curves3(dst, b_curves); } Ok(()) } } impl + PointeeSizeExpressible + Send + Sync> KatanaInitialStage for Multidimensional4x3 { fn to_pcs(&self, input: &[T]) -> Result, CmsError> { if input.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } let fixed_new_clut = Vec::new(); let new_clut = self.clut.as_ref().unwrap_or(&fixed_new_clut); let lut = Hypercube::new_hypercube(new_clut, self.grid_size); let mut new_dst = vec![0f32; (input.len() / 4) * 3]; // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| { lut.quadlinear_vec3(x, y, z, w) })?; return Ok(new_dst); } match self.options.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| { lut.pyramid_vec3(x, y, z, w) })?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?; } InterpolationMethod::Linear => { self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| { lut.quadlinear_vec3(x, y, z, w) })?; } } Ok(new_dst) } } fn make_multidimensional_4x3< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( mab: &LutMultidimensionalType, options: TransformOptions, pcs: DataColorSpace, direction: MultidimensionalDirection, bit_depth: usize, ) -> Result, CmsError> { if mab.num_input_channels != 4 && mab.num_output_channels != 3 { return Err(CmsError::UnsupportedProfileConnection); } if mab.b_curves.is_empty() || mab.b_curves.len() != 3 { return Err(CmsError::InvalidAtoBLut); } let grid_size = [ mab.grid_points[0], mab.grid_points[1], mab.grid_points[2], mab.grid_points[3], ]; let clut: Option> = if mab.a_curves.len() == 4 && mab.clut.is_some() { let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap(); let lut_grid = (mab.grid_points[0] as usize) .safe_mul(mab.grid_points[1] as usize)? .safe_mul(mab.grid_points[2] as usize)? .safe_mul(mab.grid_points[3] as usize)? .safe_mul(mab.num_output_channels as usize)?; if clut.len() != lut_grid { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: clut.len(), expected: lut_grid, })); } Some(clut) } else { return Err(CmsError::InvalidAtoBLut); }; let a_curves: Option; 4]>> = if mab.a_curves.len() == 4 && mab.clut.is_some() { let mut arr = Box::<[Vec; 4]>::default(); for (a_curve, dst) in mab.a_curves.iter().zip(arr.iter_mut()) { *dst = a_curve.to_clut()?; } Some(arr) } else { None }; let b_curves: Option; 3]>> = if mab.b_curves.len() == 3 { let mut arr = Box::<[Vec; 3]>::default(); let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear()); if all_curves_linear { None } else { for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) { *dst = c_curve.to_clut()?; } Some(arr) } } else { return Err(CmsError::InvalidAtoBLut); }; let matrix = mab.matrix.to_f32(); let m_curves: Option; 3]>> = if mab.m_curves.len() == 3 { let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear || !mab.matrix.test_equality(Matrix3d::IDENTITY) || mab.bias.ne(&Vector3d::default()) { let mut arr = Box::<[Vec; 3]>::default(); for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) { *dst = curve.to_clut()?; } Some(arr) } else { None } } else { None }; let bias = mab.bias.cast(); let transform = Multidimensional4x3:: { a_curves, b_curves, m_curves, matrix, direction, options, clut, pcs, grid_size, bias, _phantom: PhantomData, bit_depth, }; Ok(transform) } pub(crate) fn multi_dimensional_4x3_to_pcs< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( mab: &LutMultidimensionalType, options: TransformOptions, pcs: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> { let transform = make_multidimensional_4x3::( mab, options, pcs, MultidimensionalDirection::DeviceToPcs, bit_depth, )?; Ok(Box::new(transform)) } moxcms-0.7.7/src/conversions/katana/md_3xn.rs000064400000000000000000000226561046102023000173140ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::KatanaFinalStage; use crate::conversions::katana::md3x3::MultidimensionalDirection; use crate::conversions::katana::md4x3::{execute_matrix_stage3, execute_simple_curves3}; use crate::conversions::md_lut::{MultidimensionalLut, tetra_3i_to_any_vec}; use crate::safe_math::SafeMul; use crate::trc::lut_interp_linear_float; use crate::{ CmsError, DataColorSpace, Layout, LutMultidimensionalType, MalformedSize, Matrix3d, Matrix3f, PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f, }; use num_traits::AsPrimitive; use std::marker::PhantomData; struct Multidimensional3xN< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, > { a_curves: Option>>, m_curves: Option; 3]>>, b_curves: Option; 3]>>, clut: Option>, matrix: Matrix3f, bias: Vector3f, direction: MultidimensionalDirection, grid_size: [u8; 16], output_inks: usize, _phantom: PhantomData, dst_layout: Layout, bit_depth: usize, } impl + PointeeSizeExpressible + Send + Sync> Multidimensional3xN where f32: AsPrimitive, { fn to_output_impl(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> { let norm_value = if T::FINITE { ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; assert_eq!( self.direction, MultidimensionalDirection::PcsToDevice, "PCS to device cannot be used on `to pcs` stage" ); // B-curves is mandatory if let Some(b_curves) = &self.b_curves.as_ref() { execute_simple_curves3(src, b_curves); } // Matrix stage if let Some(m_curves) = self.m_curves.as_ref() { execute_matrix_stage3(self.matrix, self.bias, src); execute_simple_curves3(src, m_curves); } if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) { let mut inks = vec![0.; self.output_inks]; if clut.is_empty() { return Err(CmsError::InvalidAtoBLut); } let md_lut = MultidimensionalLut::new(self.grid_size, 3, self.output_inks); for (src, dst) in src .chunks_exact(3) .zip(dst.chunks_exact_mut(self.dst_layout.channels())) { tetra_3i_to_any_vec( &md_lut, clut, src[0], src[1], src[2], &mut inks, self.output_inks, ); for (ink, curve) in inks.iter_mut().zip(a_curves.iter()) { *ink = lut_interp_linear_float(*ink, curve); } if T::FINITE { for (dst, ink) in dst.iter_mut().zip(inks.iter()) { *dst = (*ink * norm_value).round().max(0.).min(norm_value).as_(); } } else { for (dst, ink) in dst.iter_mut().zip(inks.iter()) { *dst = (*ink * norm_value).as_(); } } } } else { return Err(CmsError::InvalidAtoBLut); } Ok(()) } } impl + PointeeSizeExpressible + Send + Sync> KatanaFinalStage for Multidimensional3xN where f32: AsPrimitive, { fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> { if src.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % self.output_inks != 0 { return Err(CmsError::LaneMultipleOfChannels); } self.to_output_impl(src, dst)?; Ok(()) } } fn make_multidimensional_nx3< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( dst_layout: Layout, mab: &LutMultidimensionalType, _: TransformOptions, pcs: DataColorSpace, direction: MultidimensionalDirection, bit_depth: usize, ) -> Result, CmsError> { let real_inks = if pcs == DataColorSpace::Rgb { 3 } else { dst_layout.channels() }; if mab.num_output_channels != real_inks as u8 { return Err(CmsError::UnsupportedProfileConnection); } if mab.b_curves.is_empty() || mab.b_curves.len() != 3 { return Err(CmsError::InvalidAtoBLut); } let clut: Option> = if mab.a_curves.len() == mab.num_output_channels as usize && mab.clut.is_some() { let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap(); let mut lut_grid = 1usize; for grid in mab.grid_points.iter().take(mab.num_input_channels as usize) { lut_grid = lut_grid.safe_mul(*grid as usize)?; } let lut_grid = lut_grid.safe_mul(mab.num_output_channels as usize)?; if clut.len() != lut_grid { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: clut.len(), expected: lut_grid, })); } Some(clut) } else { return Err(CmsError::InvalidAtoBLut); }; let a_curves: Option>> = if mab.a_curves.len() == mab.num_output_channels as usize && mab.clut.is_some() { let mut arr = Vec::new(); for a_curve in mab.a_curves.iter() { arr.push(a_curve.to_clut()?); } Some(arr) } else { None }; let b_curves: Option; 3]>> = if mab.b_curves.len() == 3 { let mut arr = Box::<[Vec; 3]>::default(); let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear()); if all_curves_linear { None } else { for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) { *dst = c_curve.to_clut()?; } Some(arr) } } else { return Err(CmsError::InvalidAtoBLut); }; let matrix = mab.matrix.to_f32(); let m_curves: Option; 3]>> = if mab.m_curves.len() == 3 { let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear || !mab.matrix.test_equality(Matrix3d::IDENTITY) || mab.bias.ne(&Vector3d::default()) { let mut arr = Box::<[Vec; 3]>::default(); for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) { *dst = curve.to_clut()?; } Some(arr) } else { None } } else { None }; let bias = mab.bias.cast(); let transform = Multidimensional3xN:: { a_curves, b_curves, m_curves, matrix, direction, clut, grid_size: mab.grid_points, bias, dst_layout, output_inks: real_inks, _phantom: PhantomData, bit_depth, }; Ok(transform) } pub(crate) fn katana_multi_dimensional_3xn_to_device< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( dst_layout: Layout, mab: &LutMultidimensionalType, options: TransformOptions, pcs: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where f32: AsPrimitive, { if mab.num_input_channels == 0 { return Err(CmsError::UnsupportedProfileConnection); } let transform = make_multidimensional_nx3::( dst_layout, mab, options, pcs, MultidimensionalDirection::PcsToDevice, bit_depth, )?; Ok(Box::new(transform)) } moxcms-0.7.7/src/conversions/katana/md_nx3.rs000064400000000000000000000246131046102023000173070ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::KatanaInitialStage; use crate::conversions::katana::md3x3::MultidimensionalDirection; use crate::conversions::katana::md4x3::{execute_matrix_stage3, execute_simple_curves3}; use crate::conversions::md_lut::{ MultidimensionalLut, NVector, linear_1i_vec3f, linear_2i_vec3f_direct, linear_3i_vec3f_direct, linear_4i_vec3f, linear_5i_vec3f, linear_6i_vec3f, linear_7i_vec3f, linear_8i_vec3f, linear_9i_vec3f, linear_10i_vec3f, linear_11i_vec3f, linear_12i_vec3f, linear_13i_vec3f, linear_14i_vec3f, linear_15i_vec3f, }; use crate::safe_math::SafeMul; use crate::trc::lut_interp_linear_float; use crate::{ CmsError, DataColorSpace, Layout, LutMultidimensionalType, MalformedSize, Matrix3d, Matrix3f, PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f, }; use num_traits::AsPrimitive; use std::marker::PhantomData; struct MultidimensionalNx3< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, > { a_curves: Option>>, m_curves: Option; 3]>>, b_curves: Option; 3]>>, clut: Option>, matrix: Matrix3f, bias: Vector3f, direction: MultidimensionalDirection, grid_size: [u8; 16], input_inks: usize, _phantom: PhantomData, bit_depth: usize, } #[inline(never)] pub(crate) fn interpolate_out_function( layout: Layout, ) -> fn(lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32]) -> NVector { const OUT: usize = 3; match layout { Layout::Rgb => linear_3i_vec3f_direct::, Layout::Rgba => linear_4i_vec3f::, Layout::Gray => linear_1i_vec3f::, Layout::GrayAlpha => linear_2i_vec3f_direct::, Layout::Inks5 => linear_5i_vec3f::, Layout::Inks6 => linear_6i_vec3f::, Layout::Inks7 => linear_7i_vec3f::, Layout::Inks8 => linear_8i_vec3f::, Layout::Inks9 => linear_9i_vec3f::, Layout::Inks10 => linear_10i_vec3f::, Layout::Inks11 => linear_11i_vec3f::, Layout::Inks12 => linear_12i_vec3f::, Layout::Inks13 => linear_13i_vec3f::, Layout::Inks14 => linear_14i_vec3f::, Layout::Inks15 => linear_15i_vec3f::, } } impl + PointeeSizeExpressible + Send + Sync> MultidimensionalNx3 { fn to_pcs_impl(&self, input: &[T], dst: &mut [f32]) -> Result<(), CmsError> { let norm_value = if T::FINITE { 1.0 / ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; assert_eq!( self.direction, MultidimensionalDirection::DeviceToPcs, "PCS to device cannot be used on `to pcs` stage" ); // A -> B // OR B - A A - curves stage if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) { let layout = Layout::from_inks(self.input_inks); let mut inks = vec![0.; self.input_inks]; if clut.is_empty() { return Err(CmsError::InvalidAtoBLut); } let fetcher = interpolate_out_function(layout); let md_lut = MultidimensionalLut::new(self.grid_size, self.input_inks, 3); for (src, dst) in input .chunks_exact(layout.channels()) .zip(dst.chunks_exact_mut(3)) { for ((ink, src_ink), curve) in inks.iter_mut().zip(src).zip(a_curves.iter()) { *ink = lut_interp_linear_float(src_ink.as_() * norm_value, curve); } let interpolated = fetcher(&md_lut, clut, &inks); dst[0] = interpolated.v[0]; dst[1] = interpolated.v[1]; dst[2] = interpolated.v[2]; } } else { return Err(CmsError::InvalidAtoBLut); } // Matrix stage if let Some(m_curves) = self.m_curves.as_ref() { execute_simple_curves3(dst, m_curves); execute_matrix_stage3(self.matrix, self.bias, dst); } // B-curves is mandatory if let Some(b_curves) = &self.b_curves.as_ref() { execute_simple_curves3(dst, b_curves); } Ok(()) } } impl + PointeeSizeExpressible + Send + Sync> KatanaInitialStage for MultidimensionalNx3 { fn to_pcs(&self, input: &[T]) -> Result, CmsError> { if input.len() % self.input_inks != 0 { return Err(CmsError::LaneMultipleOfChannels); } let mut new_dst = vec![0f32; (input.len() / self.input_inks) * 3]; self.to_pcs_impl(input, &mut new_dst)?; Ok(new_dst) } } fn make_multidimensional_nx3< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( mab: &LutMultidimensionalType, _: TransformOptions, _: DataColorSpace, direction: MultidimensionalDirection, bit_depth: usize, ) -> Result, CmsError> { if mab.num_output_channels != 3 { return Err(CmsError::UnsupportedProfileConnection); } if mab.b_curves.is_empty() || mab.b_curves.len() != 3 { return Err(CmsError::InvalidAtoBLut); } let clut: Option> = if mab.a_curves.len() == mab.num_input_channels as usize && mab.clut.is_some() { let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap(); let mut lut_grid = 1usize; for grid in mab.grid_points.iter().take(mab.num_input_channels as usize) { lut_grid = lut_grid.safe_mul(*grid as usize)?; } let lut_grid = lut_grid.safe_mul(mab.num_output_channels as usize)?; if clut.len() != lut_grid { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: clut.len(), expected: lut_grid, })); } Some(clut) } else { return Err(CmsError::InvalidAtoBLut); }; let a_curves: Option>> = if mab.a_curves.len() == mab.num_input_channels as usize && mab.clut.is_some() { let mut arr = Vec::new(); for a_curve in mab.a_curves.iter() { arr.push(a_curve.to_clut()?); } Some(arr) } else { None }; let b_curves: Option; 3]>> = if mab.b_curves.len() == 3 { let mut arr = Box::<[Vec; 3]>::default(); let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear()); if all_curves_linear { None } else { for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) { *dst = c_curve.to_clut()?; } Some(arr) } } else { return Err(CmsError::InvalidAtoBLut); }; let matrix = mab.matrix.to_f32(); let m_curves: Option; 3]>> = if mab.m_curves.len() == 3 { let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear || !mab.matrix.test_equality(Matrix3d::IDENTITY) || mab.bias.ne(&Vector3d::default()) { let mut arr = Box::<[Vec; 3]>::default(); for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) { *dst = curve.to_clut()?; } Some(arr) } else { None } } else { None }; let bias = mab.bias.cast(); let transform = MultidimensionalNx3:: { a_curves, b_curves, m_curves, matrix, direction, clut, grid_size: mab.grid_points, bias, input_inks: mab.num_input_channels as usize, _phantom: PhantomData, bit_depth, }; Ok(transform) } pub(crate) fn katana_multi_dimensional_nx3_to_pcs< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( src_layout: Layout, mab: &LutMultidimensionalType, options: TransformOptions, pcs: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> { if pcs == DataColorSpace::Rgb { if mab.num_input_channels != 3 { return Err(CmsError::InvalidAtoBLut); } if src_layout != Layout::Rgba && src_layout != Layout::Rgb { return Err(CmsError::InvalidInksCountForProfile); } } else if mab.num_input_channels != src_layout.channels() as u8 { return Err(CmsError::InvalidInksCountForProfile); } let transform = make_multidimensional_nx3::( mab, options, pcs, MultidimensionalDirection::DeviceToPcs, bit_depth, )?; Ok(Box::new(transform)) } moxcms-0.7.7/src/conversions/katana/md_pipeline.rs000064400000000000000000000324501046102023000204020ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::md_nx3::interpolate_out_function; use crate::conversions::katana::{KatanaFinalStage, KatanaInitialStage}; use crate::conversions::md_lut::{MultidimensionalLut, tetra_3i_to_any_vec}; use crate::profile::LutDataType; use crate::safe_math::{SafeMul, SafePowi}; use crate::trc::lut_interp_linear_float; use crate::{ CmsError, DataColorSpace, Layout, MalformedSize, PointeeSizeExpressible, TransformOptions, }; use num_traits::AsPrimitive; use std::array::from_fn; use std::marker::PhantomData; #[derive(Default)] struct KatanaLutNx3 { linearization: Vec>, clut: Vec, grid_size: u8, input_inks: usize, output: [Vec; 3], _phantom: PhantomData, bit_depth: usize, } struct KatanaLut3xN { linearization: [Vec; 3], clut: Vec, grid_size: u8, output_inks: usize, output: Vec>, dst_layout: Layout, target_color_space: DataColorSpace, _phantom: PhantomData, bit_depth: usize, } impl> KatanaLutNx3 { fn to_pcs_impl(&self, input: &[T]) -> Result, CmsError> { if input.len() % self.input_inks != 0 { return Err(CmsError::LaneMultipleOfChannels); } let norm_value = if T::FINITE { 1.0 / ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; let grid_sizes: [u8; 16] = from_fn(|i| { if i < self.input_inks { self.grid_size } else { 0 } }); let md_lut = MultidimensionalLut::new(grid_sizes, self.input_inks, 3); let layout = Layout::from_inks(self.input_inks); let mut inks = vec![0.; self.input_inks]; let mut dst = vec![0.; (input.len() / layout.channels()) * 3]; let fetcher = interpolate_out_function(layout); for (dest, src) in dst .chunks_exact_mut(3) .zip(input.chunks_exact(layout.channels())) { for ((ink, src_ink), curve) in inks.iter_mut().zip(src).zip(self.linearization.iter()) { *ink = lut_interp_linear_float(src_ink.as_() * norm_value, curve); } let clut = fetcher(&md_lut, &self.clut, &inks); let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]); let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]); let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]); dest[0] = pcs_x; dest[1] = pcs_y; dest[2] = pcs_z; } Ok(dst) } } impl> KatanaInitialStage for KatanaLutNx3 { fn to_pcs(&self, input: &[T]) -> Result, CmsError> { if input.len() % self.input_inks != 0 { return Err(CmsError::LaneMultipleOfChannels); } self.to_pcs_impl(input) } } impl> KatanaFinalStage for KatanaLut3xN where f32: AsPrimitive, { fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> { if src.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } let grid_sizes: [u8; 16] = from_fn(|i| { if i < self.output_inks { self.grid_size } else { 0 } }); let md_lut = MultidimensionalLut::new(grid_sizes, 3, self.output_inks); let scale_value = if T::FINITE { ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; let mut working = vec![0.; self.output_inks]; for (dest, src) in dst .chunks_exact_mut(self.dst_layout.channels()) .zip(src.chunks_exact(3)) { let x = lut_interp_linear_float(src[0], &self.linearization[0]); let y = lut_interp_linear_float(src[1], &self.linearization[1]); let z = lut_interp_linear_float(src[2], &self.linearization[2]); tetra_3i_to_any_vec(&md_lut, &self.clut, x, y, z, &mut working, self.output_inks); for (ink, curve) in working.iter_mut().zip(self.output.iter()) { *ink = lut_interp_linear_float(*ink, curve); } if T::FINITE { for (dst, ink) in dest.iter_mut().zip(working.iter()) { *dst = (*ink * scale_value).round().max(0.).min(scale_value).as_(); } } else { for (dst, ink) in dest.iter_mut().zip(working.iter()) { *dst = (*ink * scale_value).as_(); } } } if self.dst_layout == Layout::Rgba && self.target_color_space == DataColorSpace::Rgb { for dst in dst.chunks_exact_mut(self.dst_layout.channels()) { dst[3] = scale_value.as_(); } } Ok(()) } } fn katana_make_lut_nx3>( inks: usize, lut: &LutDataType, _: TransformOptions, _: DataColorSpace, bit_depth: usize, ) -> Result, CmsError> { if inks != lut.num_input_channels as usize { return Err(CmsError::UnsupportedProfileConnection); } if lut.num_output_channels != 3 { return Err(CmsError::UnsupportedProfileConnection); } let clut_length: usize = (lut.num_clut_grid_points as usize) .safe_powi(lut.num_input_channels as u32)? .safe_mul(lut.num_output_channels as usize)?; let clut_table = lut.clut_table.to_clut_f32(); if clut_table.len() != clut_length { return Err(CmsError::MalformedClut(MalformedSize { size: clut_table.len(), expected: clut_length, })); } let linearization_table = lut.input_table.to_clut_f32(); if linearization_table.len() < lut.num_input_table_entries as usize * inks { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: linearization_table.len(), expected: lut.num_input_table_entries as usize * inks, })); } let linearization = (0..inks) .map(|x| { linearization_table[x * lut.num_input_table_entries as usize ..(x + 1) * lut.num_input_table_entries as usize] .to_vec() }) .collect::<_>(); let gamma_table = lut.output_table.to_clut_f32(); if gamma_table.len() < lut.num_output_table_entries as usize * 3 { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: gamma_table.len(), expected: lut.num_output_table_entries as usize * 3, })); } let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec(); let gamma_curve1 = gamma_table [lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2] .to_vec(); let gamma_curve2 = gamma_table [lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3] .to_vec(); let transform = KatanaLutNx3:: { linearization, clut: clut_table, grid_size: lut.num_clut_grid_points, output: [gamma_curve0, gamma_curve1, gamma_curve2], input_inks: inks, _phantom: PhantomData, bit_depth, }; Ok(transform) } fn katana_make_lut_3xn>( inks: usize, dst_layout: Layout, lut: &LutDataType, _: TransformOptions, target_color_space: DataColorSpace, bit_depth: usize, ) -> Result, CmsError> { if lut.num_input_channels as usize != 3 { return Err(CmsError::UnsupportedProfileConnection); } if target_color_space == DataColorSpace::Rgb { if lut.num_output_channels != 3 || lut.num_output_channels != 4 { return Err(CmsError::InvalidInksCountForProfile); } if dst_layout != Layout::Rgb || dst_layout != Layout::Rgba { return Err(CmsError::InvalidInksCountForProfile); } } else if lut.num_output_channels as usize != dst_layout.channels() { return Err(CmsError::InvalidInksCountForProfile); } let clut_length: usize = (lut.num_clut_grid_points as usize) .safe_powi(lut.num_input_channels as u32)? .safe_mul(lut.num_output_channels as usize)?; let clut_table = lut.clut_table.to_clut_f32(); if clut_table.len() != clut_length { return Err(CmsError::MalformedClut(MalformedSize { size: clut_table.len(), expected: clut_length, })); } let linearization_table = lut.input_table.to_clut_f32(); if linearization_table.len() < lut.num_input_table_entries as usize * 3 { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: linearization_table.len(), expected: lut.num_input_table_entries as usize * 3, })); } let linear_curve0 = linearization_table[..lut.num_input_table_entries as usize].to_vec(); let linear_curve1 = linearization_table [lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2] .to_vec(); let linear_curve2 = linearization_table [lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3] .to_vec(); let gamma_table = lut.output_table.to_clut_f32(); if gamma_table.len() < lut.num_output_table_entries as usize * inks { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: gamma_table.len(), expected: lut.num_output_table_entries as usize * inks, })); } let gamma = (0..inks) .map(|x| { gamma_table[x * lut.num_output_table_entries as usize ..(x + 1) * lut.num_output_table_entries as usize] .to_vec() }) .collect::<_>(); let transform = KatanaLut3xN:: { linearization: [linear_curve0, linear_curve1, linear_curve2], clut: clut_table, grid_size: lut.num_clut_grid_points, output: gamma, output_inks: inks, _phantom: PhantomData, target_color_space, dst_layout, bit_depth, }; Ok(transform) } pub(crate) fn katana_input_make_lut_nx3< T: Copy + PointeeSizeExpressible + AsPrimitive + Send + Sync, >( src_layout: Layout, inks: usize, lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> { if pcs == DataColorSpace::Rgb { if lut.num_input_channels != 3 { return Err(CmsError::InvalidAtoBLut); } if src_layout != Layout::Rgba && src_layout != Layout::Rgb { return Err(CmsError::InvalidInksCountForProfile); } } else if lut.num_input_channels != src_layout.channels() as u8 { return Err(CmsError::InvalidInksCountForProfile); } let z0 = katana_make_lut_nx3::(inks, lut, options, pcs, bit_depth)?; Ok(Box::new(z0)) } pub(crate) fn katana_output_make_lut_3xn< T: Copy + PointeeSizeExpressible + AsPrimitive + Send + Sync, >( dst_layout: Layout, lut: &LutDataType, options: TransformOptions, target_color_space: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where f32: AsPrimitive, { let real_inks = if target_color_space == DataColorSpace::Rgb { 3 } else { dst_layout.channels() }; let z0 = katana_make_lut_3xn::( real_inks, dst_layout, lut, options, target_color_space, bit_depth, )?; Ok(Box::new(z0)) } moxcms-0.7.7/src/conversions/katana/mod.rs000064400000000000000000000051151046102023000166720ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ mod finalizers; mod md3x3; mod md4x3; mod md_3xn; mod md_nx3; mod md_pipeline; mod pcs_stages; mod rgb_xyz; mod stages; mod xyz_lab; mod xyz_rgb; pub(crate) use finalizers::{CopyAlphaStage, InjectAlphaStage}; pub(crate) use md_3xn::katana_multi_dimensional_3xn_to_device; pub(crate) use md_nx3::katana_multi_dimensional_nx3_to_pcs; pub(crate) use md_pipeline::{katana_input_make_lut_nx3, katana_output_make_lut_3xn}; pub(crate) use md3x3::{multi_dimensional_3x3_to_device, multi_dimensional_3x3_to_pcs}; pub(crate) use md4x3::multi_dimensional_4x3_to_pcs; pub(crate) use pcs_stages::{ KatanaDefaultIntermediate, katana_pcs_lab_v2_to_v4, katana_pcs_lab_v4_to_v2, }; pub(crate) use rgb_xyz::katana_create_rgb_lin_lut; pub(crate) use stages::{ Katana, KatanaFinalStage, KatanaInitialStage, KatanaIntermediateStage, KatanaPostFinalizationStage, }; pub(crate) use xyz_lab::{KatanaStageLabToXyz, KatanaStageXyzToLab}; pub(crate) use xyz_rgb::katana_prepare_inverse_lut_rgb_xyz; moxcms-0.7.7/src/conversions/katana/pcs_stages.rs000064400000000000000000000077621046102023000202600ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::KatanaIntermediateStage; use crate::conversions::katana::stages::BlackholeIntermediateStage; use crate::mlaf::mlaf; use crate::{CmsError, ColorProfile, DataColorSpace, Matrix3f, ProfileVersion}; use std::marker::PhantomData; pub(crate) struct KatanaMatrixStage { pub(crate) matrices: Vec, } impl KatanaMatrixStage { pub(crate) fn new(matrix: Matrix3f) -> Self { Self { matrices: vec![matrix], } } } pub(crate) type KatanaDefaultIntermediate = dyn KatanaIntermediateStage + Send + Sync; impl KatanaIntermediateStage for KatanaMatrixStage { fn stage(&self, input: &mut Vec) -> Result, CmsError> { if input.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } for m in self.matrices.iter() { for dst in input.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]); } } Ok(std::mem::take(input)) } } pub(crate) fn katana_pcs_lab_v4_to_v2(profile: &ColorProfile) -> Box { if profile.pcs == DataColorSpace::Lab && profile.version_internal <= ProfileVersion::V4_0 { let v_mat = vec![Matrix3f { v: [ [65280.0 / 65535.0, 0., 0.], [0., 65280.0 / 65535.0, 0.], [0., 0., 65280.0 / 65535.0], ], }]; return Box::new(KatanaMatrixStage { matrices: v_mat }); } Box::new(BlackholeIntermediateStage { _phantom: PhantomData, }) } pub(crate) fn katana_pcs_lab_v2_to_v4(profile: &ColorProfile) -> Box { if profile.pcs == DataColorSpace::Lab && profile.version_internal <= ProfileVersion::V4_0 { let v_mat = vec![Matrix3f { v: [ [65535.0 / 65280.0, 0., 0.], [0., 65535.0 / 65280.0, 0.], [0., 0., 65535.0 / 65280.0], ], }]; return Box::new(KatanaMatrixStage { matrices: v_mat }); } Box::new(BlackholeIntermediateStage { _phantom: PhantomData, }) } moxcms-0.7.7/src/conversions/katana/rgb_xyz.rs000064400000000000000000000144501046102023000176010ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::pcs_stages::KatanaMatrixStage; use crate::conversions::katana::{KatanaInitialStage, KatanaIntermediateStage}; use crate::err::try_vec; use crate::{CmsError, ColorProfile, Layout, Matrix3f, PointeeSizeExpressible, TransformOptions}; use num_traits::AsPrimitive; use std::marker::PhantomData; struct KatanaRgbLinearizationStage { r_lin: Box<[f32; LINEAR_CAP]>, g_lin: Box<[f32; LINEAR_CAP]>, b_lin: Box<[f32; LINEAR_CAP]>, linear_cap: usize, bit_depth: usize, _phantom: PhantomData, } impl< T: Clone + AsPrimitive + PointeeSizeExpressible, const LAYOUT: u8, const LINEAR_CAP: usize, > KatanaInitialStage for KatanaRgbLinearizationStage { fn to_pcs(&self, input: &[T]) -> Result, CmsError> { let src_layout = Layout::from(LAYOUT); if input.len() % src_layout.channels() != 0 { return Err(CmsError::LaneMultipleOfChannels); } let mut dst = try_vec![0.; input.len() / src_layout.channels() * 3]; let scale = if T::FINITE { (self.linear_cap as f32 - 1.) / ((1 << self.bit_depth) - 1) as f32 } else { (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 }; let cap_value = if T::FINITE { ((1 << self.bit_depth) - 1) as f32 } else { (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 }; for (src, dst) in input .chunks_exact(src_layout.channels()) .zip(dst.chunks_exact_mut(3)) { let j_r = src[0].as_() * scale; let j_g = src[1].as_() * scale; let j_b = src[2].as_() * scale; dst[0] = self.r_lin[(j_r.round().min(cap_value).max(0.) as u16) as usize]; dst[1] = self.g_lin[(j_g.round().min(cap_value).max(0.) as u16) as usize]; dst[2] = self.b_lin[(j_b.round().min(cap_value).max(0.) as u16) as usize]; } Ok(dst) } } pub(crate) struct KatanaRgbLinearizationState { pub(crate) stages: Vec + Send + Sync>>, pub(crate) initial_stage: Box + Send + Sync>, } pub(crate) fn katana_create_rgb_lin_lut< T: Copy + Default + AsPrimitive + Send + Sync + AsPrimitive + PointeeSizeExpressible, const BIT_DEPTH: usize, const LINEAR_CAP: usize, >( layout: Layout, source: &ColorProfile, opts: TransformOptions, ) -> Result, CmsError> where u32: AsPrimitive, f32: AsPrimitive, { let lin_r = source.build_r_linearize_table::(opts.allow_use_cicp_transfer)?; let lin_g = source.build_g_linearize_table::(opts.allow_use_cicp_transfer)?; let lin_b = source.build_b_linearize_table::(opts.allow_use_cicp_transfer)?; let lin_stage: Box + Send + Sync> = match layout { Layout::Rgb => { Box::new( KatanaRgbLinearizationStage:: { r_lin: lin_r, g_lin: lin_g, b_lin: lin_b, bit_depth: BIT_DEPTH, linear_cap: LINEAR_CAP, _phantom: PhantomData, }, ) } Layout::Rgba => { Box::new( KatanaRgbLinearizationStage:: { r_lin: lin_r, g_lin: lin_g, b_lin: lin_b, bit_depth: BIT_DEPTH, linear_cap: LINEAR_CAP, _phantom: PhantomData, }, ) } Layout::Gray => unimplemented!("Gray should not be called on Rgb/Rgba execution path"), Layout::GrayAlpha => { unimplemented!("GrayAlpha should not be called on Rgb/Rgba execution path") } _ => unreachable!(), }; let xyz_to_rgb = source.rgb_to_xyz_matrix(); let matrices: Vec + Send + Sync>> = vec![Box::new(KatanaMatrixStage { matrices: vec![ xyz_to_rgb.to_f32(), Matrix3f { v: [ [32768.0 / 65535.0, 0.0, 0.0], [0.0, 32768.0 / 65535.0, 0.0], [0.0, 0.0, 32768.0 / 65535.0], ], }, ], })]; Ok(KatanaRgbLinearizationState { stages: matrices, initial_stage: lin_stage, }) } moxcms-0.7.7/src/conversions/katana/stages.rs000064400000000000000000000070241046102023000174020ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::{CmsError, TransformExecutor}; use std::marker::PhantomData; /// W storage working data type /// I input/output data type pub(crate) trait KatanaInitialStage { fn to_pcs(&self, input: &[I]) -> Result, CmsError>; } /// W storage working data type /// I input/output data type pub(crate) trait KatanaFinalStage { fn to_output(&self, src: &mut [W], dst: &mut [I]) -> Result<(), CmsError>; } /// W storage working data type pub(crate) trait KatanaIntermediateStage { fn stage(&self, input: &mut Vec) -> Result, CmsError>; } pub(crate) struct BlackholeIntermediateStage { pub(crate) _phantom: PhantomData, } impl KatanaIntermediateStage for BlackholeIntermediateStage { fn stage(&self, input: &mut Vec) -> Result, CmsError> { Ok(std::mem::take(input)) } } /// I input/output data type pub(crate) trait KatanaPostFinalizationStage { fn finalize(&self, src: &[I], dst: &mut [I]) -> Result<(), CmsError>; } /// W storage working data type /// I input/output data type pub(crate) struct Katana { pub(crate) initial_stage: Box + Send + Sync>, pub(crate) final_stage: Box + Sync + Send>, pub(crate) stages: Vec + Send + Sync>>, pub(crate) post_finalization: Vec + Send + Sync>>, } impl TransformExecutor for Katana { fn transform(&self, src: &[I], dst: &mut [I]) -> Result<(), CmsError> { let mut working_vec = self.initial_stage.to_pcs(src)?; for stage in self.stages.iter() { working_vec = stage.stage(&mut working_vec)?; } self.final_stage.to_output(&mut working_vec, dst)?; for finalization in self.post_finalization.iter() { finalization.finalize(src, dst)?; } Ok(()) } } moxcms-0.7.7/src/conversions/katana/xyz_lab.rs000064400000000000000000000052121046102023000175610ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::KatanaIntermediateStage; use crate::{CmsError, Lab, Xyz}; #[derive(Default)] pub(crate) struct KatanaStageLabToXyz {} impl KatanaIntermediateStage for KatanaStageLabToXyz { fn stage(&self, input: &mut Vec) -> Result, CmsError> { for dst in input.chunks_exact_mut(3) { let lab = Lab::new(dst[0], dst[1], dst[2]); let xyz = lab.to_pcs_xyz(); dst[0] = xyz.x; dst[1] = xyz.y; dst[2] = xyz.z; } Ok(std::mem::take(input)) } } #[derive(Default)] pub(crate) struct KatanaStageXyzToLab {} impl KatanaIntermediateStage for KatanaStageXyzToLab { fn stage(&self, input: &mut Vec) -> Result, CmsError> { for dst in input.chunks_exact_mut(3) { let xyz = Xyz::new(dst[0], dst[1], dst[2]); let lab = Lab::from_pcs_xyz(xyz); dst[0] = lab.l; dst[1] = lab.a; dst[2] = lab.b; } Ok(std::mem::take(input)) } } moxcms-0.7.7/src/conversions/katana/xyz_rgb.rs000064400000000000000000000205741046102023000176050ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::pcs_stages::KatanaMatrixStage; use crate::conversions::katana::{ KatanaDefaultIntermediate, KatanaFinalStage, KatanaIntermediateStage, }; use crate::mlaf::mlaf; use crate::{ CmsError, ColorProfile, GammaLutInterpolate, Layout, Matrix3f, PointeeSizeExpressible, RenderingIntent, Rgb, TransformOptions, filmlike_clip, }; use num_traits::AsPrimitive; pub(crate) struct KatanaXyzToRgbStage { pub(crate) r_gamma: Box<[T; 65536]>, pub(crate) g_gamma: Box<[T; 65536]>, pub(crate) b_gamma: Box<[T; 65536]>, pub(crate) intent: RenderingIntent, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl + PointeeSizeExpressible, const LAYOUT: u8> KatanaFinalStage for KatanaXyzToRgbStage where u32: AsPrimitive, f32: AsPrimitive, { fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> { let dst_cn = Layout::from(LAYOUT); let dst_channels = dst_cn.channels(); if src.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / 3; let dst_chunks = dst.len() / dst_channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } let max_colors: T = (if T::FINITE { ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }) .as_(); let lut_cap = (self.gamma_lut - 1) as f32; if self.intent != RenderingIntent::AbsoluteColorimetric { for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(dst_channels)) { let mut rgb = Rgb::new(src[0], src[1], src[2]); if rgb.is_out_of_gamut() { rgb = filmlike_clip(rgb); } let r = mlaf(0.5, rgb.r, lut_cap).min(lut_cap).max(0.) as u16; let g = mlaf(0.5, rgb.g, lut_cap).min(lut_cap).max(0.) as u16; let b = mlaf(0.5, rgb.b, lut_cap).min(lut_cap).max(0.) as u16; dst[0] = self.r_gamma[r as usize]; dst[1] = self.g_gamma[g as usize]; dst[2] = self.b_gamma[b as usize]; if dst_cn == Layout::Rgba { dst[3] = max_colors; } } } else { for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(dst_channels)) { let rgb = Rgb::new(src[0], src[1], src[2]); let r = mlaf(0.5, rgb.r, lut_cap).min(lut_cap).max(0.) as u16; let g = mlaf(0.5, rgb.g, lut_cap).min(lut_cap).max(0.) as u16; let b = mlaf(0.5, rgb.b, lut_cap).min(lut_cap).max(0.) as u16; dst[0] = self.r_gamma[r as usize]; dst[1] = self.g_gamma[g as usize]; dst[2] = self.b_gamma[b as usize]; if dst_cn == Layout::Rgba { dst[3] = max_colors; } } } Ok(()) } } pub(crate) struct KatanaXyzRgbState { pub(crate) stages: Vec + Send + Sync>>, pub(crate) final_stage: Box + Send + Sync>, } pub(crate) fn katana_prepare_inverse_lut_rgb_xyz< T: Copy + Default + AsPrimitive + Send + Sync + AsPrimitive + PointeeSizeExpressible + GammaLutInterpolate, const BIT_DEPTH: usize, const GAMMA_LUT: usize, >( dest: &ColorProfile, dest_layout: Layout, options: TransformOptions, ) -> Result, CmsError> where f32: AsPrimitive, u32: AsPrimitive, { // if !T::FINITE { // if let Some(extended_gamma) = dest.try_extended_gamma_evaluator() { // let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse(); // // let mut matrices = vec![Matrix3f { // v: [ // [65535.0 / 32768.0, 0.0, 0.0], // [0.0, 65535.0 / 32768.0, 0.0], // [0.0, 0.0, 65535.0 / 32768.0], // ], // }]; // // matrices.push(xyz_to_rgb.to_f32()); // let xyz_to_rgb_stage = XyzToRgbStageExtended:: { // gamma_evaluator: extended_gamma, // matrices, // phantom_data: PhantomData, // }; // xyz_to_rgb_stage.transform(lut)?; // return Ok(()); // } // } let gamma_map_r = dest.build_gamma_table::( &dest.red_trc, options.allow_use_cicp_transfer, )?; let gamma_map_g = dest.build_gamma_table::( &dest.green_trc, options.allow_use_cicp_transfer, )?; let gamma_map_b = dest.build_gamma_table::( &dest.blue_trc, options.allow_use_cicp_transfer, )?; let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse(); let mut matrices: Vec> = vec![Box::new(KatanaMatrixStage::new(Matrix3f { v: [ [65535.0 / 32768.0, 0.0, 0.0], [0.0, 65535.0 / 32768.0, 0.0], [0.0, 0.0, 65535.0 / 32768.0], ], }))]; matrices.push(Box::new(KatanaMatrixStage::new(xyz_to_rgb.to_f32()))); match dest_layout { Layout::Rgb => { let xyz_to_rgb_stage = KatanaXyzToRgbStage:: { r_gamma: gamma_map_r, g_gamma: gamma_map_g, b_gamma: gamma_map_b, intent: options.rendering_intent, bit_depth: BIT_DEPTH, gamma_lut: GAMMA_LUT, }; Ok(KatanaXyzRgbState { stages: matrices, final_stage: Box::new(xyz_to_rgb_stage), }) } Layout::Rgba => { let xyz_to_rgb_stage = KatanaXyzToRgbStage:: { r_gamma: gamma_map_r, g_gamma: gamma_map_g, b_gamma: gamma_map_b, intent: options.rendering_intent, bit_depth: BIT_DEPTH, gamma_lut: GAMMA_LUT, }; Ok(KatanaXyzRgbState { stages: matrices, final_stage: Box::new(xyz_to_rgb_stage), }) } Layout::Gray => unreachable!("Gray layout must not be called on Rgb/Rgba path"), Layout::GrayAlpha => unreachable!("Gray layout must not be called on Rgb/Rgba path"), _ => unreachable!( "layout {:?} should not be called on xyz->rgb path", dest_layout ), } } moxcms-0.7.7/src/conversions/lut3x3.rs000064400000000000000000000357171046102023000160310ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::{KatanaFinalStage, KatanaInitialStage}; use crate::err::{MalformedSize, try_vec}; use crate::profile::LutDataType; use crate::safe_math::{SafeMul, SafePowi}; use crate::trc::lut_interp_linear_float; use crate::{ CmsError, Cube, DataColorSpace, InterpolationMethod, PointeeSizeExpressible, Stage, TransformOptions, Vector3f, }; use num_traits::AsPrimitive; #[derive(Default)] struct Lut3x3 { input: [Vec; 3], clut: Vec, grid_size: u8, gamma: [Vec; 3], interpolation_method: InterpolationMethod, pcs: DataColorSpace, } #[derive(Default)] struct KatanaLut3x3 { input: [Vec; 3], clut: Vec, grid_size: u8, gamma: [Vec; 3], interpolation_method: InterpolationMethod, pcs: DataColorSpace, _phantom: std::marker::PhantomData, bit_depth: usize, } fn make_lut_3x3( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, ) -> Result { let clut_length: usize = (lut.num_clut_grid_points as usize) .safe_powi(lut.num_input_channels as u32)? .safe_mul(lut.num_output_channels as usize)?; let lin_table = lut.input_table.to_clut_f32(); if lin_table.len() < lut.num_input_table_entries as usize * 3 { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: lin_table.len(), expected: lut.num_input_table_entries as usize * 3, })); } let lin_curve0 = lin_table[..lut.num_input_table_entries as usize].to_vec(); let lin_curve1 = lin_table [lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2] .to_vec(); let lin_curve2 = lin_table [lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3] .to_vec(); let clut_table = lut.clut_table.to_clut_f32(); if clut_table.len() != clut_length { return Err(CmsError::MalformedClut(MalformedSize { size: clut_table.len(), expected: clut_length, })); } let gamma_curves = lut.output_table.to_clut_f32(); if gamma_curves.len() < lut.num_output_table_entries as usize * 3 { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: gamma_curves.len(), expected: lut.num_output_table_entries as usize * 3, })); } let gamma_curve0 = gamma_curves[..lut.num_output_table_entries as usize].to_vec(); let gamma_curve1 = gamma_curves [lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2] .to_vec(); let gamma_curve2 = gamma_curves [lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3] .to_vec(); let transform = Lut3x3 { input: [lin_curve0, lin_curve1, lin_curve2], gamma: [gamma_curve0, gamma_curve1, gamma_curve2], interpolation_method: options.interpolation_method, clut: clut_table, grid_size: lut.num_clut_grid_points, pcs, }; Ok(transform) } fn stage_lut_3x3( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, ) -> Result, CmsError> { let lut = make_lut_3x3(lut, options, pcs)?; let transform = Lut3x3 { input: lut.input, gamma: lut.gamma, interpolation_method: lut.interpolation_method, clut: lut.clut, grid_size: lut.grid_size, pcs: lut.pcs, }; Ok(Box::new(transform)) } pub(crate) fn katana_input_stage_lut_3x3< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where f32: AsPrimitive, { let lut = make_lut_3x3(lut, options, pcs)?; let transform = KatanaLut3x3:: { input: lut.input, gamma: lut.gamma, interpolation_method: lut.interpolation_method, clut: lut.clut, grid_size: lut.grid_size, pcs: lut.pcs, _phantom: std::marker::PhantomData, bit_depth, }; Ok(Box::new(transform)) } pub(crate) fn katana_output_stage_lut_3x3< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync, >( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where f32: AsPrimitive, { let lut = make_lut_3x3(lut, options, pcs)?; let transform = KatanaLut3x3:: { input: lut.input, gamma: lut.gamma, interpolation_method: lut.interpolation_method, clut: lut.clut, grid_size: lut.grid_size, pcs: lut.pcs, _phantom: std::marker::PhantomData, bit_depth, }; Ok(Box::new(transform)) } impl Lut3x3 { fn transform_impl Vector3f>( &self, src: &[f32], dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { let linearization_0 = &self.input[0]; let linearization_1 = &self.input[1]; let linearization_2 = &self.input[2]; for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(3)) { debug_assert!(self.grid_size as i32 >= 1); let linear_x = lut_interp_linear_float(src[0], linearization_0); let linear_y = lut_interp_linear_float(src[1], linearization_1); let linear_z = lut_interp_linear_float(src[2], linearization_2); let clut = fetch(linear_x, linear_y, linear_z); let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]); let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]); let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]); dest[0] = pcs_x; dest[1] = pcs_y; dest[2] = pcs_z; } Ok(()) } } impl Stage for Lut3x3 { fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> { let l_tbl = Cube::new(&self.clut, self.grid_size as usize); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(src, dst, |x, y, z| l_tbl.tetra_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(src, dst, |x, y, z| l_tbl.pyramid_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(src, dst, |x, y, z| l_tbl.prism_vec3(x, y, z))?; } InterpolationMethod::Linear => { self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z))?; } } Ok(()) } } impl> KatanaLut3x3 where f32: AsPrimitive, { fn to_pcs_impl Vector3f>( &self, input: &[T], fetch: Fetch, ) -> Result, CmsError> { if input.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } let normalizing_value = if T::FINITE { 1.0 / ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; let mut dst = try_vec![0.; input.len()]; let linearization_0 = &self.input[0]; let linearization_1 = &self.input[1]; let linearization_2 = &self.input[2]; for (dest, src) in dst.chunks_exact_mut(3).zip(input.chunks_exact(3)) { let linear_x = lut_interp_linear_float(src[0].as_() * normalizing_value, linearization_0); let linear_y = lut_interp_linear_float(src[1].as_() * normalizing_value, linearization_1); let linear_z = lut_interp_linear_float(src[2].as_() * normalizing_value, linearization_2); let clut = fetch(linear_x, linear_y, linear_z); let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]); let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]); let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]); dest[0] = pcs_x; dest[1] = pcs_y; dest[2] = pcs_z; } Ok(dst) } fn to_output Vector3f>( &self, src: &[f32], dst: &mut [T], fetch: Fetch, ) -> Result<(), CmsError> { if src.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() != src.len() { return Err(CmsError::LaneSizeMismatch); } let norm_value = if T::FINITE { ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; let linearization_0 = &self.input[0]; let linearization_1 = &self.input[1]; let linearization_2 = &self.input[2]; for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(3)) { let linear_x = lut_interp_linear_float(src[0], linearization_0); let linear_y = lut_interp_linear_float(src[1], linearization_1); let linear_z = lut_interp_linear_float(src[2], linearization_2); let clut = fetch(linear_x, linear_y, linear_z); let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]); let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]); let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]); if T::FINITE { dest[0] = (pcs_x * norm_value).round().max(0.0).min(norm_value).as_(); dest[1] = (pcs_y * norm_value).round().max(0.0).min(norm_value).as_(); dest[2] = (pcs_z * norm_value).round().max(0.0).min(norm_value).as_(); } else { dest[0] = pcs_x.as_(); dest[1] = pcs_y.as_(); dest[2] = pcs_z.as_(); } } Ok(()) } } impl> KatanaInitialStage for KatanaLut3x3 where f32: AsPrimitive, { fn to_pcs(&self, input: &[T]) -> Result, CmsError> { let l_tbl = Cube::new(&self.clut, self.grid_size as usize); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.to_pcs_impl(input, |x, y, z| l_tbl.trilinear_vec3(x, y, z)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.to_pcs_impl(input, |x, y, z| l_tbl.tetra_vec3(x, y, z)) } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.to_pcs_impl(input, |x, y, z| l_tbl.pyramid_vec3(x, y, z)) } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.to_pcs_impl(input, |x, y, z| l_tbl.prism_vec3(x, y, z)) } InterpolationMethod::Linear => { self.to_pcs_impl(input, |x, y, z| l_tbl.trilinear_vec3(x, y, z)) } } } } impl> KatanaFinalStage for KatanaLut3x3 where f32: AsPrimitive, { fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> { let l_tbl = Cube::new(&self.clut, self.grid_size as usize); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.to_output(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.to_output(src, dst, |x, y, z| l_tbl.tetra_vec3(x, y, z)) } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.to_output(src, dst, |x, y, z| l_tbl.pyramid_vec3(x, y, z)) } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.to_output(src, dst, |x, y, z| l_tbl.prism_vec3(x, y, z)) } InterpolationMethod::Linear => { self.to_output(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z)) } } } } pub(crate) fn create_lut3x3( lut: &LutDataType, src: &[f32], options: TransformOptions, pcs: DataColorSpace, ) -> Result, CmsError> { if lut.num_input_channels != 3 || lut.num_output_channels != 3 { return Err(CmsError::UnsupportedProfileConnection); } let mut dest = try_vec![0.; src.len()]; let lut_stage = stage_lut_3x3(lut, options, pcs)?; lut_stage.transform(src, &mut dest)?; Ok(dest) } moxcms-0.7.7/src/conversions/lut3x4.rs000064400000000000000000000214721046102023000160230ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::err::try_vec; use crate::profile::LutDataType; use crate::safe_math::{SafeMul, SafePowi}; use crate::trc::lut_interp_linear_float; use crate::{ CmsError, Cube, DataColorSpace, InterpolationMethod, MalformedSize, Stage, TransformOptions, Vector4f, }; use num_traits::AsPrimitive; #[derive(Default)] struct Lut3x4 { input: [Vec; 3], clut: Vec, grid_size: u8, gamma: [Vec; 4], interpolation_method: InterpolationMethod, pcs: DataColorSpace, } fn make_lut_3x4( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, ) -> Result { let clut_length: usize = (lut.num_clut_grid_points as usize) .safe_powi(lut.num_input_channels as u32)? .safe_mul(lut.num_output_channels as usize)?; let clut_table = lut.clut_table.to_clut_f32(); if clut_table.len() != clut_length { return Err(CmsError::MalformedClut(MalformedSize { size: clut_table.len(), expected: clut_length, })); } let linearization_table = lut.input_table.to_clut_f32(); if linearization_table.len() < lut.num_input_table_entries as usize * 3 { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: linearization_table.len(), expected: lut.num_input_table_entries as usize * 3, })); } let linear_curve0 = linearization_table[..lut.num_input_table_entries as usize].to_vec(); let linear_curve1 = linearization_table [lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2] .to_vec(); let linear_curve2 = linearization_table [lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3] .to_vec(); let gamma_table = lut.output_table.to_clut_f32(); if gamma_table.len() < lut.num_output_table_entries as usize * 4 { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: gamma_table.len(), expected: lut.num_output_table_entries as usize * 4, })); } let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec(); let gamma_curve1 = gamma_table [lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2] .to_vec(); let gamma_curve2 = gamma_table [lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3] .to_vec(); let gamma_curve3 = gamma_table [lut.num_output_table_entries as usize * 3..lut.num_output_table_entries as usize * 4] .to_vec(); let transform = Lut3x4 { input: [linear_curve0, linear_curve1, linear_curve2], interpolation_method: options.interpolation_method, clut: clut_table, grid_size: lut.num_clut_grid_points, pcs, gamma: [gamma_curve0, gamma_curve1, gamma_curve2, gamma_curve3], }; Ok(transform) } fn stage_lut_3x4( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, ) -> Result, CmsError> { let lut = make_lut_3x4(lut, options, pcs)?; let transform = Lut3x4 { input: lut.input, interpolation_method: lut.interpolation_method, clut: lut.clut, grid_size: lut.grid_size, pcs: lut.pcs, gamma: lut.gamma, }; Ok(Box::new(transform)) } impl Lut3x4 { fn transform_impl Vector4f>( &self, src: &[f32], dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { let linearization_0 = &self.input[0]; let linearization_1 = &self.input[1]; let linearization_2 = &self.input[2]; for (dest, src) in dst.chunks_exact_mut(4).zip(src.chunks_exact(3)) { debug_assert!(self.grid_size as i32 >= 1); let linear_x = lut_interp_linear_float(src[0], linearization_0); let linear_y = lut_interp_linear_float(src[1], linearization_1); let linear_z = lut_interp_linear_float(src[2], linearization_2); let clut = fetch(linear_x, linear_y, linear_z); let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]); let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]); let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]); let pcs_w = lut_interp_linear_float(clut.v[3], &self.gamma[3]); dest[0] = pcs_x; dest[1] = pcs_y; dest[2] = pcs_z; dest[3] = pcs_w; } Ok(()) } } impl Stage for Lut3x4 { fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> { let l_tbl = Cube::new(&self.clut, self.grid_size as usize); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec4(x, y, z)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(src, dst, |x, y, z| l_tbl.tetra_vec4(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(src, dst, |x, y, z| l_tbl.pyramid_vec4(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(src, dst, |x, y, z| l_tbl.prism_vec4(x, y, z))?; } InterpolationMethod::Linear => { self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec4(x, y, z))?; } } Ok(()) } } pub(crate) fn create_lut3_samples() -> Vec where u32: AsPrimitive, { let lut_size: u32 = (3 * SAMPLES * SAMPLES * SAMPLES) as u32; assert!(SAMPLES >= 1); let mut src = Vec::with_capacity(lut_size as usize); for x in 0..SAMPLES as u32 { for y in 0..SAMPLES as u32 { for z in 0..SAMPLES as u32 { src.push(x.as_()); src.push(y.as_()); src.push(z.as_()); } } } src } pub(crate) fn create_lut3_samples_norm() -> Vec { let lut_size: u32 = (3 * SAMPLES * SAMPLES * SAMPLES) as u32; assert!(SAMPLES >= 1); let scale = 1. / (SAMPLES as f32 - 1.0); let mut src = Vec::with_capacity(lut_size as usize); for x in 0..SAMPLES as u32 { for y in 0..SAMPLES as u32 { for z in 0..SAMPLES as u32 { src.push(x as f32 * scale); src.push(y as f32 * scale); src.push(z as f32 * scale); } } } src } pub(crate) fn create_lut3x4( lut: &LutDataType, src: &[f32], options: TransformOptions, pcs: DataColorSpace, ) -> Result, CmsError> { if lut.num_input_channels != 3 || lut.num_output_channels != 4 { return Err(CmsError::UnsupportedProfileConnection); } let mut dest = try_vec![0.; (src.len() / 3) * 4]; let lut_stage = stage_lut_3x4(lut, options, pcs)?; lut_stage.transform(src, &mut dest)?; Ok(dest) } moxcms-0.7.7/src/conversions/lut4.rs000064400000000000000000000327641046102023000155560ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::katana::KatanaInitialStage; use crate::err::try_vec; use crate::profile::LutDataType; use crate::safe_math::{SafeMul, SafePowi}; use crate::trc::lut_interp_linear_float; use crate::{ CmsError, DataColorSpace, Hypercube, InterpolationMethod, MalformedSize, PointeeSizeExpressible, Stage, TransformOptions, Vector3f, }; use num_traits::AsPrimitive; use std::marker::PhantomData; #[allow(unused)] #[derive(Default)] struct Lut4x3 { linearization: [Vec; 4], clut: Vec, grid_size: u8, output: [Vec; 3], interpolation_method: InterpolationMethod, pcs: DataColorSpace, } #[allow(unused)] #[derive(Default)] struct KatanaLut4x3> { linearization: [Vec; 4], clut: Vec, grid_size: u8, output: [Vec; 3], interpolation_method: InterpolationMethod, pcs: DataColorSpace, _phantom: PhantomData, bit_depth: usize, } #[allow(unused)] impl Lut4x3 { fn transform_impl Vector3f>( &self, src: &[f32], dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { let linearization_0 = &self.linearization[0]; let linearization_1 = &self.linearization[1]; let linearization_2 = &self.linearization[2]; let linearization_3 = &self.linearization[3]; for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(4)) { debug_assert!(self.grid_size as i32 >= 1); let linear_x = lut_interp_linear_float(src[0], linearization_0); let linear_y = lut_interp_linear_float(src[1], linearization_1); let linear_z = lut_interp_linear_float(src[2], linearization_2); let linear_w = lut_interp_linear_float(src[3], linearization_3); let clut = fetch(linear_x, linear_y, linear_z, linear_w); let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]); let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]); let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]); dest[0] = pcs_x; dest[1] = pcs_y; dest[2] = pcs_z; } Ok(()) } } macro_rules! define_lut4_dispatch { ($dispatcher: ident) => { impl Stage for $dispatcher { fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> { let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize); // If Source PCS is LAB trilinear should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self .transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(src, dst, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(src, dst, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(src, dst, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))? } InterpolationMethod::Linear => { self.transform_impl(src, dst, |x, y, z, w| { l_tbl.quadlinear_vec3(x, y, z, w) })? } } Ok(()) } } }; } impl> KatanaLut4x3 { fn to_pcs_impl Vector3f>( &self, input: &[T], fetch: Fetch, ) -> Result, CmsError> { if input.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } let norm_value = if T::FINITE { 1.0 / ((1u32 << self.bit_depth) - 1) as f32 } else { 1.0 }; let mut dst = try_vec![0.; (input.len() / 4) * 3]; let linearization_0 = &self.linearization[0]; let linearization_1 = &self.linearization[1]; let linearization_2 = &self.linearization[2]; let linearization_3 = &self.linearization[3]; for (dest, src) in dst.chunks_exact_mut(3).zip(input.chunks_exact(4)) { let linear_x = lut_interp_linear_float(src[0].as_() * norm_value, linearization_0); let linear_y = lut_interp_linear_float(src[1].as_() * norm_value, linearization_1); let linear_z = lut_interp_linear_float(src[2].as_() * norm_value, linearization_2); let linear_w = lut_interp_linear_float(src[3].as_() * norm_value, linearization_3); let clut = fetch(linear_x, linear_y, linear_z, linear_w); let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]); let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]); let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]); dest[0] = pcs_x; dest[1] = pcs_y; dest[2] = pcs_z; } Ok(dst) } } impl> KatanaInitialStage for KatanaLut4x3 { fn to_pcs(&self, input: &[T]) -> Result, CmsError> { if input.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize); // If Source PCS is LAB trilinear should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.to_pcs_impl(input, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w)) } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.to_pcs_impl(input, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w)) } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.to_pcs_impl(input, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w)) } InterpolationMethod::Linear => { self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w)) } } } } define_lut4_dispatch!(Lut4x3); fn make_lut_4x3( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, ) -> Result { // There is 4 possible cases: // - All curves are non-linear // - Linearization curves are non-linear, but gamma is linear // - Gamma curves are non-linear, but linearization is linear // - All curves linear let clut_length: usize = (lut.num_clut_grid_points as usize) .safe_powi(lut.num_input_channels as u32)? .safe_mul(lut.num_output_channels as usize)?; let clut_table = lut.clut_table.to_clut_f32(); if clut_table.len() != clut_length { return Err(CmsError::MalformedClut(MalformedSize { size: clut_table.len(), expected: clut_length, })); } let linearization_table = lut.input_table.to_clut_f32(); if linearization_table.len() < lut.num_input_table_entries as usize * 4 { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: linearization_table.len(), expected: lut.num_input_table_entries as usize * 4, })); } let lin_curve0 = linearization_table[0..lut.num_input_table_entries as usize].to_vec(); let lin_curve1 = linearization_table [lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2] .to_vec(); let lin_curve2 = linearization_table [lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3] .to_vec(); let lin_curve3 = linearization_table [lut.num_input_table_entries as usize * 3..lut.num_input_table_entries as usize * 4] .to_vec(); let gamma_table = lut.output_table.to_clut_f32(); if gamma_table.len() < lut.num_output_table_entries as usize * 3 { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: gamma_table.len(), expected: lut.num_output_table_entries as usize * 3, })); } let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec(); let gamma_curve1 = gamma_table [lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2] .to_vec(); let gamma_curve2 = gamma_table [lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3] .to_vec(); let transform = Lut4x3 { linearization: [lin_curve0, lin_curve1, lin_curve2, lin_curve3], interpolation_method: options.interpolation_method, pcs, clut: clut_table, grid_size: lut.num_clut_grid_points, output: [gamma_curve0, gamma_curve1, gamma_curve2], }; Ok(transform) } fn stage_lut_4x3( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, ) -> Result, CmsError> { let lut = make_lut_4x3(lut, options, pcs)?; let transform = Lut4x3 { linearization: lut.linearization, interpolation_method: lut.interpolation_method, pcs: lut.pcs, clut: lut.clut, grid_size: lut.grid_size, output: lut.output, }; Ok(Box::new(transform)) } pub(crate) fn katana_input_stage_lut_4x3< T: Copy + PointeeSizeExpressible + AsPrimitive + Send + Sync, >( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> { // There is 4 possible cases: // - All curves are non-linear // - Linearization curves are non-linear, but gamma is linear // - Gamma curves are non-linear, but linearization is linear // - All curves linear let lut = make_lut_4x3(lut, options, pcs)?; let transform = KatanaLut4x3:: { linearization: lut.linearization, interpolation_method: lut.interpolation_method, pcs: lut.pcs, clut: lut.clut, grid_size: lut.grid_size, output: lut.output, _phantom: PhantomData, bit_depth, }; Ok(Box::new(transform)) } pub(crate) fn create_lut4_norm_samples() -> Vec { let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32; let mut src = Vec::with_capacity(lut_size as usize); let recpeq = 1f32 / (SAMPLES - 1) as f32; for k in 0..SAMPLES { for c in 0..SAMPLES { for m in 0..SAMPLES { for y in 0..SAMPLES { src.push(c as f32 * recpeq); src.push(m as f32 * recpeq); src.push(y as f32 * recpeq); src.push(k as f32 * recpeq); } } } } src } pub(crate) fn create_lut4( lut: &LutDataType, options: TransformOptions, pcs: DataColorSpace, ) -> Result, CmsError> { if lut.num_input_channels != 4 { return Err(CmsError::UnsupportedProfileConnection); } let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32; let src = create_lut4_norm_samples::(); let mut dest = try_vec![0.; (lut_size as usize) / 4 * 3]; let lut_stage = stage_lut_4x3(lut, options, pcs)?; lut_stage.transform(&src, &mut dest)?; Ok(dest) } moxcms-0.7.7/src/conversions/lut_transforms.rs000064400000000000000000000763771046102023000177600ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::lut3x3::{ create_lut3x3, katana_input_stage_lut_3x3, katana_output_stage_lut_3x3, }; use crate::conversions::lut3x4::{create_lut3_samples_norm, create_lut3x4}; use crate::conversions::lut4::{create_lut4, create_lut4_norm_samples, katana_input_stage_lut_4x3}; use crate::conversions::mab::{prepare_mab_3x3, prepare_mba_3x3}; use crate::conversions::transform_lut3_to_4::make_transform_3x4; use crate::mlaf::mlaf; use crate::{ CmsError, ColorProfile, DataColorSpace, InPlaceStage, Layout, LutWarehouse, Matrix3f, ProfileVersion, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; pub(crate) struct MatrixStage { pub(crate) matrices: Vec, } impl InPlaceStage for MatrixStage { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { if !self.matrices.is_empty() { let m = self.matrices[0]; for dst in dst.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]); } } for m in self.matrices.iter().skip(1) { for dst in dst.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]); } } Ok(()) } } pub(crate) const LUT_SAMPLING: u16 = 255; pub(crate) trait Lut3x3Factory { fn make_transform_3x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Send + Sync> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction; } pub(crate) trait Lut4x3Factory { fn make_transform_4x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Sync + Send> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction; } fn pcs_lab_v4_to_v2(profile: &ColorProfile, lut: &mut [f32]) { if profile.pcs == DataColorSpace::Lab && profile.version_internal <= ProfileVersion::V4_0 && lut.len() % 3 == 0 { assert_eq!( lut.len() % 3, 0, "Lut {:?} is not a multiple of 3, this should not happen for lab", lut.len() ); let v_mat = vec![Matrix3f { v: [ [65280.0 / 65535.0, 0f32, 0f32], [0f32, 65280.0 / 65535.0, 0f32], [0f32, 0f32, 65280.0 / 65535.0f32], ], }]; let stage = MatrixStage { matrices: v_mat }; stage.transform(lut).unwrap(); } } fn pcs_lab_v2_to_v4(profile: &ColorProfile, lut: &mut [f32]) { if profile.pcs == DataColorSpace::Lab && profile.version_internal <= ProfileVersion::V4_0 && lut.len() % 3 == 0 { assert_eq!( lut.len() % 3, 0, "Lut {:?} is not a multiple of 3, this should not happen for lab", lut.len() ); let v_mat = vec![Matrix3f { v: [ [65535.0 / 65280.0f32, 0f32, 0f32], [0f32, 65535.0f32 / 65280.0f32, 0f32], [0f32, 0f32, 65535.0f32 / 65280.0f32], ], }]; let stage = MatrixStage { matrices: v_mat }; stage.transform(lut).unwrap(); } } macro_rules! make_transform_3x3_fn { ($method_name: ident, $exec_impl: ident) => { fn $method_name< T: Copy + Default + AsPrimitive + Send + Sync + AsPrimitive + PointeeSizeExpressible, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( src_layout: Layout, dst_layout: Layout, lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Send + Sync> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { match src_layout { Layout::Rgb => match dst_layout { Layout::Rgb => $exec_impl::make_transform_3x3::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, GRID_SIZE, BIT_DEPTH, >(lut, options, color_space, is_linear), Layout::Rgba => $exec_impl::make_transform_3x3::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, GRID_SIZE, BIT_DEPTH, >(lut, options, color_space, is_linear), _ => unimplemented!(), }, Layout::Rgba => match dst_layout { Layout::Rgb => $exec_impl::make_transform_3x3::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, GRID_SIZE, BIT_DEPTH, >(lut, options, color_space, is_linear), Layout::Rgba => $exec_impl::make_transform_3x3::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, GRID_SIZE, BIT_DEPTH, >(lut, options, color_space, is_linear), _ => unimplemented!(), }, _ => unimplemented!(), } } }; } macro_rules! make_transform_4x3_fn { ($method_name: ident, $exec_name: ident) => { fn $method_name< T: Copy + Default + AsPrimitive + Send + Sync + AsPrimitive + PointeeSizeExpressible, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( dst_layout: Layout, lut: Vec, options: TransformOptions, data_color_space: DataColorSpace, is_linear: bool, ) -> Box + Send + Sync> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { match dst_layout { Layout::Rgb => $exec_name::make_transform_4x3::< T, { Layout::Rgb as u8 }, GRID_SIZE, BIT_DEPTH, >(lut, options, data_color_space, is_linear), Layout::Rgba => $exec_name::make_transform_4x3::< T, { Layout::Rgba as u8 }, GRID_SIZE, BIT_DEPTH, >(lut, options, data_color_space, is_linear), _ => unimplemented!(), } } }; } #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] use crate::conversions::neon::NeonLut3x3Factory; #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] make_transform_3x3_fn!(make_transformer_3x3, NeonLut3x3Factory); #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))] use crate::conversions::transform_lut3_to_3::DefaultLut3x3Factory; #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))] make_transform_3x3_fn!(make_transformer_3x3, DefaultLut3x3Factory); #[cfg(all(target_arch = "x86_64", feature = "avx"))] use crate::conversions::avx::AvxLut3x3Factory; #[cfg(all(target_arch = "x86_64", feature = "avx"))] make_transform_3x3_fn!(make_transformer_3x3_avx_fma, AvxLut3x3Factory); #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] use crate::conversions::sse::SseLut3x3Factory; #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] make_transform_3x3_fn!(make_transformer_3x3_sse41, SseLut3x3Factory); #[cfg(all(target_arch = "x86_64", feature = "avx"))] use crate::conversions::avx::AvxLut4x3Factory; use crate::conversions::interpolator::LutBarycentricReduction; use crate::conversions::katana::{ Katana, KatanaDefaultIntermediate, KatanaInitialStage, KatanaPostFinalizationStage, KatanaStageLabToXyz, KatanaStageXyzToLab, katana_create_rgb_lin_lut, katana_pcs_lab_v2_to_v4, katana_pcs_lab_v4_to_v2, katana_prepare_inverse_lut_rgb_xyz, multi_dimensional_3x3_to_device, multi_dimensional_3x3_to_pcs, multi_dimensional_4x3_to_pcs, }; use crate::conversions::mab4x3::prepare_mab_4x3; use crate::conversions::mba3x4::prepare_mba_3x4; use crate::conversions::md_luts_factory::{do_any_to_any, prepare_alpha_finalizer}; // use crate::conversions::bpc::compensate_bpc_in_lut; #[cfg(all(target_arch = "x86_64", feature = "avx"))] make_transform_4x3_fn!(make_transformer_4x3_avx_fma, AvxLut4x3Factory); #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] use crate::conversions::sse::SseLut4x3Factory; #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] make_transform_4x3_fn!(make_transformer_4x3_sse41, SseLut4x3Factory); #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))] use crate::conversions::transform_lut4_to_3::DefaultLut4x3Factory; #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))] make_transform_4x3_fn!(make_transformer_4x3, DefaultLut4x3Factory); #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] use crate::conversions::neon::NeonLut4x3Factory; use crate::conversions::prelude_lut_xyz_rgb::{create_rgb_lin_lut, prepare_inverse_lut_rgb_xyz}; use crate::conversions::xyz_lab::{StageLabToXyz, StageXyzToLab}; use crate::transform::PointeeSizeExpressible; use crate::trc::GammaLutInterpolate; #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] make_transform_4x3_fn!(make_transformer_4x3, NeonLut4x3Factory); #[inline(never)] #[cold] pub(crate) fn make_lut_transform< T: Copy + Default + AsPrimitive + Send + Sync + AsPrimitive + PointeeSizeExpressible + GammaLutInterpolate, const BIT_DEPTH: usize, const LINEAR_CAP: usize, const GAMMA_LUT: usize, >( src_layout: Layout, source: &ColorProfile, dst_layout: Layout, dest: &ColorProfile, options: TransformOptions, ) -> Result + Send + Sync>, CmsError> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { if (source.color_space == DataColorSpace::Cmyk || source.color_space == DataColorSpace::Color4) && (dest.color_space == DataColorSpace::Rgb || dest.color_space == DataColorSpace::Lab) { source.color_space.check_layout(src_layout)?; dest.color_space.check_layout(dst_layout)?; if source.pcs != DataColorSpace::Xyz && source.pcs != DataColorSpace::Lab { return Err(CmsError::UnsupportedProfileConnection); } if dest.pcs != DataColorSpace::Lab && dest.pcs != DataColorSpace::Xyz { return Err(CmsError::UnsupportedProfileConnection); } const GRID_SIZE: usize = 17; let is_katana_required_for_source = source .get_device_to_pcs(options.rendering_intent) .ok_or(CmsError::UnsupportedLutRenderingIntent( source.rendering_intent, )) .map(|x| x.is_katana_required())?; let is_katana_required_for_destination = if dest.is_matrix_shaper() || dest.pcs == DataColorSpace::Xyz { false } else if dest.pcs == DataColorSpace::Lab { dest.get_pcs_to_device(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection) .map(|x| x.is_katana_required())? } else { return Err(CmsError::UnsupportedProfileConnection); }; if is_katana_required_for_source || is_katana_required_for_destination { let initial_stage: Box + Send + Sync> = match source.get_device_to_pcs(options.rendering_intent).ok_or( CmsError::UnsupportedLutRenderingIntent(source.rendering_intent), )? { LutWarehouse::Lut(lut) => { katana_input_stage_lut_4x3::(lut, options, source.pcs, BIT_DEPTH)? } LutWarehouse::Multidimensional(mab) => { multi_dimensional_4x3_to_pcs::(mab, options, source.pcs, BIT_DEPTH)? } }; let mut stages = Vec::new(); stages.push(katana_pcs_lab_v2_to_v4(source)); if source.pcs == DataColorSpace::Lab { stages.push(Box::new(KatanaStageLabToXyz::default())); } if dest.pcs == DataColorSpace::Lab { stages.push(Box::new(KatanaStageXyzToLab::default())); } stages.push(katana_pcs_lab_v4_to_v2(dest)); let final_stage = if dest.has_pcs_to_device_lut() { let pcs_to_device = dest .get_pcs_to_device(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection)?; match pcs_to_device { LutWarehouse::Lut(lut) => { katana_output_stage_lut_3x3::(lut, options, dest.pcs, BIT_DEPTH)? } LutWarehouse::Multidimensional(mab) => { multi_dimensional_3x3_to_device::(mab, options, dest.pcs, BIT_DEPTH)? } } } else if dest.is_matrix_shaper() { let state = katana_prepare_inverse_lut_rgb_xyz::( dest, dst_layout, options, )?; stages.extend(state.stages); state.final_stage } else { return Err(CmsError::UnsupportedProfileConnection); }; let mut post_finalization: Vec + Send + Sync>> = Vec::new(); if let Some(stage) = prepare_alpha_finalizer::(src_layout, source, dst_layout, dest, BIT_DEPTH) { post_finalization.push(stage); } return Ok(Box::new(Katana:: { initial_stage, final_stage, stages, post_finalization, })); } let mut lut = match source.get_device_to_pcs(options.rendering_intent).ok_or( CmsError::UnsupportedLutRenderingIntent(source.rendering_intent), )? { LutWarehouse::Lut(lut) => create_lut4::(lut, options, source.pcs)?, LutWarehouse::Multidimensional(m_curves) => { let mut samples = create_lut4_norm_samples::(); prepare_mab_4x3(m_curves, &mut samples, options, source.pcs)? } }; pcs_lab_v2_to_v4(source, &mut lut); if source.pcs == DataColorSpace::Lab { let lab_to_xyz_stage = StageLabToXyz::default(); lab_to_xyz_stage.transform(&mut lut)?; } // if source.color_space == DataColorSpace::Cmyk // && (options.rendering_intent == RenderingIntent::Perceptual // || options.rendering_intent == RenderingIntent::RelativeColorimetric) // && options.black_point_compensation // { // if let (Some(src_bp), Some(dst_bp)) = ( // source.detect_black_point::(&lut), // dest.detect_black_point::(&lut), // ) { // compensate_bpc_in_lut(&mut lut, src_bp, dst_bp); // } // } if dest.pcs == DataColorSpace::Lab { let lab_to_xyz_stage = StageXyzToLab::default(); lab_to_xyz_stage.transform(&mut lut)?; } pcs_lab_v4_to_v2(dest, &mut lut); if dest.pcs == DataColorSpace::Xyz { if dest.is_matrix_shaper() { prepare_inverse_lut_rgb_xyz::(dest, &mut lut, options)?; } else { return Err(CmsError::UnsupportedProfileConnection); } } else if dest.pcs == DataColorSpace::Lab { let pcs_to_device = dest .get_pcs_to_device(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection)?; match pcs_to_device { LutWarehouse::Lut(lut_data_type) => { lut = create_lut3x3(lut_data_type, &lut, options, dest.pcs)? } LutWarehouse::Multidimensional(mab) => { prepare_mba_3x3(mab, &mut lut, options, dest.pcs)? } } } let is_dest_linear_profile = dest.color_space == DataColorSpace::Rgb && dest.is_matrix_shaper() && dest.is_linear_matrix_shaper(); #[cfg(all(target_arch = "x86_64", feature = "avx"))] if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") { return Ok(make_transformer_4x3_avx_fma::( dst_layout, lut, options, dest.color_space, is_dest_linear_profile, )); } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] if std::arch::is_x86_feature_detected!("sse4.1") { return Ok(make_transformer_4x3_sse41::( dst_layout, lut, options, dest.color_space, is_dest_linear_profile, )); } Ok(make_transformer_4x3::( dst_layout, lut, options, dest.color_space, is_dest_linear_profile, )) } else if (source.color_space == DataColorSpace::Rgb || source.color_space == DataColorSpace::Lab) && (dest.color_space == DataColorSpace::Cmyk || dest.color_space == DataColorSpace::Color4) { source.color_space.check_layout(src_layout)?; dest.color_space.check_layout(dst_layout)?; if source.pcs != DataColorSpace::Xyz && source.pcs != DataColorSpace::Lab { return Err(CmsError::UnsupportedProfileConnection); } const GRID_SIZE: usize = 33; let mut lut: Vec; if source.has_device_to_pcs_lut() { let device_to_pcs = source .get_device_to_pcs(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection)?; lut = create_lut3_samples_norm::(); match device_to_pcs { LutWarehouse::Lut(lut_data_type) => { lut = create_lut3x3(lut_data_type, &lut, options, source.pcs)?; } LutWarehouse::Multidimensional(mab) => { prepare_mab_3x3(mab, &mut lut, options, source.pcs)? } } } else if source.is_matrix_shaper() { lut = create_rgb_lin_lut::(source, options)?; } else { return Err(CmsError::UnsupportedProfileConnection); } pcs_lab_v2_to_v4(source, &mut lut); if source.pcs == DataColorSpace::Xyz && dest.pcs == DataColorSpace::Lab { let xyz_to_lab = StageXyzToLab::default(); xyz_to_lab.transform(&mut lut)?; } else if source.pcs == DataColorSpace::Lab && dest.pcs == DataColorSpace::Xyz { let lab_to_xyz_stage = StageLabToXyz::default(); lab_to_xyz_stage.transform(&mut lut)?; } pcs_lab_v4_to_v2(dest, &mut lut); let lut = match dest .get_pcs_to_device(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection)? { LutWarehouse::Lut(lut_type) => create_lut3x4(lut_type, &lut, options, dest.pcs)?, LutWarehouse::Multidimensional(m_curves) => { prepare_mba_3x4(m_curves, &mut lut, options, dest.pcs)? } }; let is_dest_linear_profile = dest.color_space == DataColorSpace::Rgb && dest.is_matrix_shaper() && dest.is_linear_matrix_shaper(); Ok(make_transform_3x4::( src_layout, lut, options, dest.color_space, is_dest_linear_profile, )) } else if (source.color_space.is_three_channels()) && (dest.color_space.is_three_channels()) { source.color_space.check_layout(src_layout)?; dest.color_space.check_layout(dst_layout)?; const GRID_SIZE: usize = 33; let is_katana_required_for_source = if source.is_matrix_shaper() { false } else { source .get_device_to_pcs(options.rendering_intent) .ok_or(CmsError::UnsupportedLutRenderingIntent( source.rendering_intent, )) .map(|x| x.is_katana_required())? }; let is_katana_required_for_destination = if source.is_matrix_shaper() || dest.pcs == DataColorSpace::Xyz { false } else if dest.pcs == DataColorSpace::Lab { dest.get_pcs_to_device(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection) .map(|x| x.is_katana_required())? } else { return Err(CmsError::UnsupportedProfileConnection); }; let mut stages: Vec> = Vec::new(); // Slow and accurate fallback if anything not acceptable is detected by curve analysis if is_katana_required_for_source || is_katana_required_for_destination { let source_stage: Box + Send + Sync> = if source.is_matrix_shaper() { let state = katana_create_rgb_lin_lut::( src_layout, source, options, )?; stages.extend(state.stages); state.initial_stage } else { match source.get_device_to_pcs(options.rendering_intent).ok_or( CmsError::UnsupportedLutRenderingIntent(source.rendering_intent), )? { LutWarehouse::Lut(lut) => { katana_input_stage_lut_3x3::(lut, options, source.pcs, BIT_DEPTH)? } LutWarehouse::Multidimensional(mab) => { multi_dimensional_3x3_to_pcs::(mab, options, source.pcs, BIT_DEPTH)? } } }; stages.push(katana_pcs_lab_v2_to_v4(source)); if source.pcs == DataColorSpace::Lab { stages.push(Box::new(KatanaStageLabToXyz::default())); } if dest.pcs == DataColorSpace::Lab { stages.push(Box::new(KatanaStageXyzToLab::default())); } stages.push(katana_pcs_lab_v4_to_v2(dest)); let final_stage = if dest.has_pcs_to_device_lut() { let pcs_to_device = dest .get_pcs_to_device(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection)?; match pcs_to_device { LutWarehouse::Lut(lut) => { katana_output_stage_lut_3x3::(lut, options, dest.pcs, BIT_DEPTH)? } LutWarehouse::Multidimensional(mab) => { multi_dimensional_3x3_to_device::(mab, options, dest.pcs, BIT_DEPTH)? } } } else if dest.is_matrix_shaper() { let state = katana_prepare_inverse_lut_rgb_xyz::( dest, dst_layout, options, )?; stages.extend(state.stages); state.final_stage } else { return Err(CmsError::UnsupportedProfileConnection); }; let mut post_finalization: Vec + Send + Sync>> = Vec::new(); if let Some(stage) = prepare_alpha_finalizer::(src_layout, source, dst_layout, dest, BIT_DEPTH) { post_finalization.push(stage); } return Ok(Box::new(Katana:: { initial_stage: source_stage, final_stage, stages, post_finalization, })); } let mut lut: Vec; if source.has_device_to_pcs_lut() { let device_to_pcs = source .get_device_to_pcs(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection)?; lut = create_lut3_samples_norm::(); match device_to_pcs { LutWarehouse::Lut(lut_data_type) => { lut = create_lut3x3(lut_data_type, &lut, options, source.pcs)?; } LutWarehouse::Multidimensional(mab) => { prepare_mab_3x3(mab, &mut lut, options, source.pcs)? } } } else if source.is_matrix_shaper() { lut = create_rgb_lin_lut::(source, options)?; } else { return Err(CmsError::UnsupportedProfileConnection); } pcs_lab_v2_to_v4(source, &mut lut); if source.pcs == DataColorSpace::Xyz && dest.pcs == DataColorSpace::Lab { let xyz_to_lab = StageXyzToLab::default(); xyz_to_lab.transform(&mut lut)?; } else if source.pcs == DataColorSpace::Lab && dest.pcs == DataColorSpace::Xyz { let lab_to_xyz_stage = StageLabToXyz::default(); lab_to_xyz_stage.transform(&mut lut)?; } pcs_lab_v4_to_v2(dest, &mut lut); if dest.has_pcs_to_device_lut() { let pcs_to_device = dest .get_pcs_to_device(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection)?; match pcs_to_device { LutWarehouse::Lut(lut_data_type) => { lut = create_lut3x3(lut_data_type, &lut, options, dest.pcs)?; } LutWarehouse::Multidimensional(mab) => { prepare_mba_3x3(mab, &mut lut, options, dest.pcs)? } } } else if dest.is_matrix_shaper() { prepare_inverse_lut_rgb_xyz::(dest, &mut lut, options)?; } else { return Err(CmsError::UnsupportedProfileConnection); } let is_dest_linear_profile = dest.color_space == DataColorSpace::Rgb && dest.is_matrix_shaper() && dest.is_linear_matrix_shaper(); #[cfg(all(feature = "avx", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx2") && std::is_x86_feature_detected!("fma") { return Ok(make_transformer_3x3_avx_fma::( src_layout, dst_layout, lut, options, dest.color_space, is_dest_linear_profile, )); } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] if std::arch::is_x86_feature_detected!("sse4.1") { return Ok(make_transformer_3x3_sse41::( src_layout, dst_layout, lut, options, dest.color_space, is_dest_linear_profile, )); } Ok(make_transformer_3x3::( src_layout, dst_layout, lut, options, dest.color_space, is_dest_linear_profile, )) } else { do_any_to_any::( src_layout, source, dst_layout, dest, options, ) } } moxcms-0.7.7/src/conversions/mab.rs000064400000000000000000000454421046102023000154220ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::mlaf::mlaf; use crate::safe_math::SafeMul; use crate::{ CmsError, Cube, DataColorSpace, InPlaceStage, InterpolationMethod, LutMultidimensionalType, MalformedSize, Matrix3d, Matrix3f, TransformOptions, Vector3d, Vector3f, }; #[allow(unused)] struct ACurves3<'a> { curve0: Box<[f32; 65536]>, curve1: Box<[f32; 65536]>, curve2: Box<[f32; 65536]>, clut: &'a [f32], grid_size: [u8; 3], interpolation_method: InterpolationMethod, pcs: DataColorSpace, depth: usize, } #[allow(unused)] struct ACurves3Optimized<'a> { clut: &'a [f32], grid_size: [u8; 3], interpolation_method: InterpolationMethod, pcs: DataColorSpace, } #[allow(unused)] impl ACurves3<'_> { fn transform_impl Vector3f>( &self, dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { let scale_value = (self.depth - 1) as f32; for dst in dst.chunks_exact_mut(3) { let a0 = (dst[0] * scale_value).round().min(scale_value) as u16; let a1 = (dst[1] * scale_value).round().min(scale_value) as u16; let a2 = (dst[2] * scale_value).round().min(scale_value) as u16; let b0 = self.curve0[a0 as usize]; let b1 = self.curve1[a1 as usize]; let b2 = self.curve2[a2 as usize]; let interpolated = fetch(b0, b1, b2); dst[0] = interpolated.v[0]; dst[1] = interpolated.v[1]; dst[2] = interpolated.v[2]; } Ok(()) } } #[allow(unused)] impl ACurves3Optimized<'_> { fn transform_impl Vector3f>( &self, dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { for dst in dst.chunks_exact_mut(3) { let a0 = dst[0]; let a1 = dst[1]; let a2 = dst[2]; let interpolated = fetch(a0, a1, a2); dst[0] = interpolated.v[0]; dst[1] = interpolated.v[1]; dst[2] = interpolated.v[2]; } Ok(()) } } impl InPlaceStage for ACurves3<'_> { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { let lut = Cube::new_cube(self.clut, self.grid_size); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?; } InterpolationMethod::Linear => { self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?; } } Ok(()) } } impl InPlaceStage for ACurves3Optimized<'_> { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { let lut = Cube::new_cube(self.clut, self.grid_size); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab { return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?; } InterpolationMethod::Linear => { self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?; } } Ok(()) } } #[allow(unused)] struct ACurves3Inverse<'a> { curve0: Box<[f32; 65536]>, curve1: Box<[f32; 65536]>, curve2: Box<[f32; 65536]>, clut: &'a [f32], grid_size: [u8; 3], interpolation_method: InterpolationMethod, pcs: DataColorSpace, depth: usize, } #[allow(unused)] impl ACurves3Inverse<'_> { fn transform_impl Vector3f>( &self, dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { let scale_value = (self.depth as u32 - 1u32) as f32; for dst in dst.chunks_exact_mut(3) { let interpolated = fetch(dst[0], dst[1], dst[2]); let a0 = (interpolated.v[0] * scale_value).round().min(scale_value) as u16; let a1 = (interpolated.v[1] * scale_value).round().min(scale_value) as u16; let a2 = (interpolated.v[2] * scale_value).round().min(scale_value) as u16; let b0 = self.curve0[a0 as usize]; let b1 = self.curve1[a1 as usize]; let b2 = self.curve2[a2 as usize]; dst[0] = b0; dst[1] = b1; dst[2] = b2; } Ok(()) } } impl InPlaceStage for ACurves3Inverse<'_> { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { let lut = Cube::new_cube(self.clut, self.grid_size); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?; } InterpolationMethod::Linear => { self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?; } } Ok(()) } } pub(crate) struct MCurves3 { pub(crate) curve0: Box<[f32; 65536]>, pub(crate) curve1: Box<[f32; 65536]>, pub(crate) curve2: Box<[f32; 65536]>, pub(crate) matrix: Matrix3f, pub(crate) bias: Vector3f, pub(crate) inverse: bool, pub(crate) depth: usize, } impl MCurves3 { fn execute_matrix_stage(&self, dst: &mut [f32]) { let m = self.matrix; let b = self.bias; if !m.test_equality(Matrix3f::IDENTITY) || !b.eq(&Vector3f::default()) { for dst in dst.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(mlaf(b.v[0], x, m.v[0][0]), y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(mlaf(b.v[1], x, m.v[1][0]), y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(mlaf(b.v[2], x, m.v[2][0]), y, m.v[2][1]), z, m.v[2][2]); } } } } impl InPlaceStage for MCurves3 { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { let scale_value = (self.depth - 1) as f32; if self.inverse { self.execute_matrix_stage(dst); } for dst in dst.chunks_exact_mut(3) { let a0 = (dst[0] * scale_value).round().min(scale_value) as u16; let a1 = (dst[1] * scale_value).round().min(scale_value) as u16; let a2 = (dst[2] * scale_value).round().min(scale_value) as u16; let b0 = self.curve0[a0 as usize]; let b1 = self.curve1[a1 as usize]; let b2 = self.curve2[a2 as usize]; dst[0] = b0; dst[1] = b1; dst[2] = b2; } if !self.inverse { self.execute_matrix_stage(dst); } Ok(()) } } pub(crate) struct BCurves3 { pub(crate) curve0: Box<[f32; 65536]>, pub(crate) curve1: Box<[f32; 65536]>, pub(crate) curve2: Box<[f32; 65536]>, } impl InPlaceStage for BCurves3 { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { let scale_value = (DEPTH - 1) as f32; for dst in dst.chunks_exact_mut(3) { let a0 = (dst[0] * scale_value).round().min(scale_value) as u16; let a1 = (dst[1] * scale_value).round().min(scale_value) as u16; let a2 = (dst[2] * scale_value).round().min(scale_value) as u16; let b0 = self.curve0[a0 as usize]; let b1 = self.curve1[a1 as usize]; let b2 = self.curve2[a2 as usize]; dst[0] = b0; dst[1] = b1; dst[2] = b2; } Ok(()) } } pub(crate) fn prepare_mab_3x3( mab: &LutMultidimensionalType, lut: &mut [f32], options: TransformOptions, pcs: DataColorSpace, ) -> Result<(), CmsError> { const LERP_DEPTH: usize = 65536; const BP: usize = 13; const DEPTH: usize = 8192; if mab.num_input_channels != 3 && mab.num_output_channels != 3 { return Err(CmsError::UnsupportedProfileConnection); } if mab.a_curves.len() == 3 && mab.clut.is_some() { let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap(); let lut_grid = (mab.grid_points[0] as usize) .safe_mul(mab.grid_points[1] as usize)? .safe_mul(mab.grid_points[2] as usize)? .safe_mul(mab.num_output_channels as usize)?; if clut.len() != lut_grid { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: clut.len(), expected: lut_grid, })); } let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear()); let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]]; if all_curves_linear { let l = ACurves3Optimized { clut, grid_size, interpolation_method: options.interpolation_method, pcs, }; l.transform(lut)?; } else { let curves: Result, _> = mab .a_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let l = ACurves3 { curve0, curve1, curve2, clut, grid_size, interpolation_method: options.interpolation_method, pcs, depth: DEPTH, }; l.transform(lut)?; } } if mab.m_curves.len() == 3 { let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear || !mab.matrix.test_equality(Matrix3d::IDENTITY) || mab.bias.ne(&Vector3d::default()) { let curves: Result, _> = mab .m_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let matrix = mab.matrix.to_f32(); let bias: Vector3f = mab.bias.cast(); let m_curves = MCurves3 { curve0, curve1, curve2, matrix, bias, inverse: false, depth: DEPTH, }; m_curves.transform(lut)?; } } if mab.b_curves.len() == 3 { const LERP_DEPTH: usize = 65536; const BP: usize = 13; const DEPTH: usize = 8192; let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear { let curves: Result, _> = mab .b_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let b_curves = BCurves3:: { curve0, curve1, curve2, }; b_curves.transform(lut)?; } } else { return Err(CmsError::InvalidAtoBLut); } Ok(()) } pub(crate) fn prepare_mba_3x3( mab: &LutMultidimensionalType, lut: &mut [f32], options: TransformOptions, pcs: DataColorSpace, ) -> Result<(), CmsError> { if mab.num_input_channels != 3 && mab.num_output_channels != 3 { return Err(CmsError::UnsupportedProfileConnection); } const LERP_DEPTH: usize = 65536; const BP: usize = 13; const DEPTH: usize = 8192; if mab.b_curves.len() == 3 { let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear { let curves: Result, _> = mab .b_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let b_curves = BCurves3:: { curve0, curve1, curve2, }; b_curves.transform(lut)?; } } else { return Err(CmsError::InvalidAtoBLut); } if mab.m_curves.len() == 3 { let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear || !mab.matrix.test_equality(Matrix3d::IDENTITY) || mab.bias.ne(&Vector3d::default()) { let curves: Result, _> = mab .m_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let matrix = mab.matrix.to_f32(); let bias: Vector3f = mab.bias.cast(); let m_curves = MCurves3 { curve0, curve1, curve2, matrix, bias, inverse: true, depth: DEPTH, }; m_curves.transform(lut)?; } } if mab.a_curves.len() == 3 && mab.clut.is_some() { let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap(); let lut_grid = (mab.grid_points[0] as usize) .safe_mul(mab.grid_points[1] as usize)? .safe_mul(mab.grid_points[2] as usize)? .safe_mul(mab.num_output_channels as usize)?; if clut.len() != lut_grid { return Err(CmsError::MalformedCurveLutTable(MalformedSize { size: clut.len(), expected: lut_grid, })); } let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear()); let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]]; if all_curves_linear { let l = ACurves3Optimized { clut, grid_size, interpolation_method: options.interpolation_method, pcs, }; l.transform(lut)?; } else { let curves: Result, _> = mab .a_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let l = ACurves3Inverse { curve0, curve1, curve2, clut, grid_size, interpolation_method: options.interpolation_method, pcs, depth: DEPTH, }; l.transform(lut)?; } } Ok(()) } moxcms-0.7.7/src/conversions/mab4x3.rs000064400000000000000000000254141046102023000157560ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::mab::{BCurves3, MCurves3}; use crate::err::try_vec; use crate::safe_math::SafeMul; use crate::{ CmsError, DataColorSpace, Hypercube, InPlaceStage, InterpolationMethod, LutMultidimensionalType, MalformedSize, Matrix3d, Stage, TransformOptions, Vector3d, Vector3f, }; #[allow(dead_code)] struct ACurves4x3<'a> { curve0: Box<[f32; 65536]>, curve1: Box<[f32; 65536]>, curve2: Box<[f32; 65536]>, curve3: Box<[f32; 65536]>, clut: &'a [f32], grid_size: [u8; 4], interpolation_method: InterpolationMethod, pcs: DataColorSpace, depth: usize, } #[allow(dead_code)] struct ACurves4x3Optimized<'a> { clut: &'a [f32], grid_size: [u8; 4], interpolation_method: InterpolationMethod, pcs: DataColorSpace, } #[allow(dead_code)] impl ACurves4x3<'_> { fn transform_impl Vector3f>( &self, src: &[f32], dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { let scale_value = (self.depth - 1) as f32; assert_eq!(src.len() / 4, dst.len() / 3); for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) { let a0 = (src[0] * scale_value).round().min(scale_value) as u16; let a1 = (src[1] * scale_value).round().min(scale_value) as u16; let a2 = (src[2] * scale_value).round().min(scale_value) as u16; let a3 = (src[3] * scale_value).round().min(scale_value) as u16; let c = self.curve0[a0 as usize]; let m = self.curve1[a1 as usize]; let y = self.curve2[a2 as usize]; let k = self.curve3[a3 as usize]; let r = fetch(c, m, y, k); dst[0] = r.v[0]; dst[1] = r.v[1]; dst[2] = r.v[2]; } Ok(()) } } #[allow(dead_code)] impl ACurves4x3Optimized<'_> { fn transform_impl Vector3f>( &self, src: &[f32], dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { assert_eq!(src.len() / 4, dst.len() / 3); for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) { let c = src[0]; let m = src[1]; let y = src[2]; let k = src[3]; let r = fetch(c, m, y, k); dst[0] = r.v[0]; dst[1] = r.v[1]; dst[2] = r.v[2]; } Ok(()) } } impl Stage for ACurves4x3<'_> { fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> { let lut = Hypercube::new_hypercube(self.clut, self.grid_size); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?; } InterpolationMethod::Linear => { self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?; } } Ok(()) } } impl Stage for ACurves4x3Optimized<'_> { fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> { let lut = Hypercube::new_hypercube(self.clut, self.grid_size); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?; } InterpolationMethod::Linear => { self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?; } } Ok(()) } } pub(crate) fn prepare_mab_4x3( mab: &LutMultidimensionalType, lut: &mut [f32], options: TransformOptions, pcs: DataColorSpace, ) -> Result, CmsError> { const LERP_DEPTH: usize = 65536; const BP: usize = 13; const DEPTH: usize = 8192; if mab.num_input_channels != 4 && mab.num_output_channels != 3 { return Err(CmsError::UnsupportedProfileConnection); } let mut new_lut = try_vec![0f32; (lut.len() / 4) * 3]; if mab.a_curves.len() == 4 && mab.clut.is_some() { let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap(); let lut_grid = (mab.grid_points[0] as usize) .safe_mul(mab.grid_points[1] as usize)? .safe_mul(mab.grid_points[2] as usize)? .safe_mul(mab.grid_points[3] as usize)? .safe_mul(mab.num_output_channels as usize)?; if clut.len() != lut_grid { return Err(CmsError::MalformedClut(MalformedSize { size: clut.len(), expected: lut_grid, })); } let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear()); let grid_size = [ mab.grid_points[0], mab.grid_points[1], mab.grid_points[2], mab.grid_points[3], ]; if all_curves_linear { let l = ACurves4x3Optimized { clut, grid_size, interpolation_method: options.interpolation_method, pcs, }; l.transform(lut, &mut new_lut)?; } else { let curves: Result, _> = mab .a_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2, curve3] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let l = ACurves4x3 { curve0, curve1, curve2, curve3, clut, grid_size, interpolation_method: options.interpolation_method, pcs, depth: DEPTH, }; l.transform(lut, &mut new_lut)?; } } else { // Not supported return Err(CmsError::UnsupportedProfileConnection); } if mab.m_curves.len() == 3 { let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear || !mab.matrix.test_equality(Matrix3d::IDENTITY) || mab.bias.ne(&Vector3d::default()) { let curves: Result, _> = mab .m_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let matrix = mab.matrix.to_f32(); let bias: Vector3f = mab.bias.cast(); let m_curves = MCurves3 { curve0, curve1, curve2, matrix, bias, inverse: false, depth: DEPTH, }; m_curves.transform(&mut new_lut)?; } } if mab.b_curves.len() == 3 { let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear { let curves: Result, _> = mab .b_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let b_curves = BCurves3:: { curve0, curve1, curve2, }; b_curves.transform(&mut new_lut)?; } } else { return Err(CmsError::InvalidAtoBLut); } Ok(new_lut) } moxcms-0.7.7/src/conversions/mba3x4.rs000064400000000000000000000253501046102023000157550ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::mab::{BCurves3, MCurves3}; use crate::err::try_vec; use crate::safe_math::SafeMul; use crate::{ CmsError, Cube, DataColorSpace, InPlaceStage, InterpolationMethod, LutMultidimensionalType, MalformedSize, Matrix3d, Stage, TransformOptions, Vector3d, Vector4f, }; struct ACurves3x4Inverse<'a> { curve0: Box<[f32; 65536]>, curve1: Box<[f32; 65536]>, curve2: Box<[f32; 65536]>, curve3: Box<[f32; 65536]>, clut: &'a [f32], grid_size: [u8; 3], interpolation_method: InterpolationMethod, pcs: DataColorSpace, depth: usize, } struct ACurves3x4InverseOptimized<'a> { clut: &'a [f32], grid_size: [u8; 3], interpolation_method: InterpolationMethod, pcs: DataColorSpace, } impl ACurves3x4Inverse<'_> { fn transform_impl Vector4f>( &self, src: &[f32], dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { let scale_value = (self.depth as u32 - 1u32) as f32; assert_eq!(src.len() / 3, dst.len() / 4); for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(4)) { let interpolated = fetch(src[0], src[1], src[2]); let a0 = (interpolated.v[0] * scale_value).round().min(scale_value) as u16; let a1 = (interpolated.v[1] * scale_value).round().min(scale_value) as u16; let a2 = (interpolated.v[2] * scale_value).round().min(scale_value) as u16; let a3 = (interpolated.v[3] * scale_value).round().min(scale_value) as u16; let b0 = self.curve0[a0 as usize]; let b1 = self.curve1[a1 as usize]; let b2 = self.curve2[a2 as usize]; let b3 = self.curve3[a3 as usize]; dst[0] = b0; dst[1] = b1; dst[2] = b2; dst[3] = b3; } Ok(()) } } impl ACurves3x4InverseOptimized<'_> { fn transform_impl Vector4f>( &self, src: &[f32], dst: &mut [f32], fetch: Fetch, ) -> Result<(), CmsError> { assert_eq!(src.len() / 3, dst.len() / 4); for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(4)) { let interpolated = fetch(src[0], src[1], src[2]); let b0 = interpolated.v[0]; let b1 = interpolated.v[1]; let b2 = interpolated.v[2]; let b3 = interpolated.v[3]; dst[0] = b0; dst[1] = b1; dst[2] = b2; dst[3] = b3; } Ok(()) } } impl Stage for ACurves3x4Inverse<'_> { fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> { let lut = Cube::new_cube(self.clut, self.grid_size); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(src, dst, |x, y, z| lut.tetra_vec4(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(src, dst, |x, y, z| lut.pyramid_vec4(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(src, dst, |x, y, z| lut.prism_vec4(x, y, z))?; } InterpolationMethod::Linear => { self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z))?; } } Ok(()) } } impl Stage for ACurves3x4InverseOptimized<'_> { fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> { let lut = Cube::new_cube(self.clut, self.grid_size); // If PCS is LAB then linear interpolation should be used if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz { return self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z)); } match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_impl(src, dst, |x, y, z| lut.tetra_vec4(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_impl(src, dst, |x, y, z| lut.pyramid_vec4(x, y, z))?; } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_impl(src, dst, |x, y, z| lut.prism_vec4(x, y, z))?; } InterpolationMethod::Linear => { self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z))?; } } Ok(()) } } pub(crate) fn prepare_mba_3x4( mab: &LutMultidimensionalType, lut: &mut [f32], options: TransformOptions, pcs: DataColorSpace, ) -> Result, CmsError> { if mab.num_input_channels != 3 && mab.num_output_channels != 4 { return Err(CmsError::UnsupportedProfileConnection); } const LERP_DEPTH: usize = 65536; const BP: usize = 13; const DEPTH: usize = 8192; if mab.b_curves.len() == 3 { let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear { let curves: Result, _> = mab .b_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let b_curves = BCurves3:: { curve0, curve1, curve2, }; b_curves.transform(lut)?; } } else { return Err(CmsError::InvalidAtoBLut); } if mab.m_curves.len() == 3 { let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear()); if !all_curves_linear || !mab.matrix.test_equality(Matrix3d::IDENTITY) || mab.bias.ne(&Vector3d::default()) { let curves: Result, _> = mab .m_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let matrix = mab.matrix.to_f32(); let bias = mab.bias.cast(); let m_curves = MCurves3 { curve0, curve1, curve2, matrix, bias, inverse: true, depth: DEPTH, }; m_curves.transform(lut)?; } } let mut new_lut = try_vec![0f32; (lut.len() / 3) * 4]; if mab.a_curves.len() == 4 && mab.clut.is_some() { let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap(); let lut_grid = (mab.grid_points[0] as usize) .safe_mul(mab.grid_points[1] as usize)? .safe_mul(mab.grid_points[2] as usize)? .safe_mul(mab.num_output_channels as usize)?; if clut.len() != lut_grid { return Err(CmsError::MalformedClut(MalformedSize { size: clut.len(), expected: lut_grid, })); } let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]]; let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear()); if all_curves_linear { let a_curves = ACurves3x4InverseOptimized { clut, grid_size: [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]], interpolation_method: options.interpolation_method, pcs, }; a_curves.transform(lut, &mut new_lut)?; } else { let curves: Result, _> = mab .a_curves .iter() .map(|c| { c.build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve) }) .collect(); let [curve0, curve1, curve2, curve3] = curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?; let a_curves = ACurves3x4Inverse { curve0, curve1, curve2, curve3, clut, grid_size, interpolation_method: options.interpolation_method, depth: DEPTH, pcs, }; a_curves.transform(lut, &mut new_lut)?; } } else { return Err(CmsError::UnsupportedProfileConnection); } Ok(new_lut) } moxcms-0.7.7/src/conversions/md_lut.rs000064400000000000000000000531671046102023000161520ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd}; use crate::mlaf::{mlaf, neg_mlaf}; use crate::nd_array::{ArrayFetch, lerp}; use crate::{Vector3f, Vector3i}; use num_traits::MulAdd; use std::array::from_fn; use std::marker::PhantomData; use std::ops::{Add, Mul, Neg, Sub}; pub(crate) struct MultidimensionalLut { pub(crate) grid_strides: [u32; 16], pub(crate) grid_filling_size: [u32; 16], pub(crate) grid_scale: [f32; 16], pub(crate) output_inks: usize, } struct FastCube> { fetch: F, _phantom: PhantomData, } struct ArrayFetchVectorN<'a> { array: &'a [f32], x_stride: u32, y_stride: u32, z_stride: u32, output_inks: usize, } #[repr(transparent)] #[derive(Copy, Clone, Debug)] pub(crate) struct NVector { pub(crate) v: [T; N], } impl NVector { pub(crate) fn from_slice(v: &[T; N]) -> Self { Self { v: *v } } } impl From for NVector { #[inline] fn from(value: T) -> Self { Self { v: [value; N] } } } impl + Mul + MulAdd, const N: usize> FusedMultiplyAdd> for NVector { #[inline] fn mla(&self, b: NVector, c: NVector) -> NVector { Self { v: from_fn(|i| mlaf(self.v[i], b.v[i], c.v[i])), } } } impl< T: Copy + Add + Mul + MulAdd + Neg, const N: usize, > FusedMultiplyNegAdd> for NVector { #[inline] fn neg_mla(&self, b: NVector, c: NVector) -> NVector { Self { v: from_fn(|i| neg_mlaf(self.v[i], b.v[i], c.v[i])), } } } impl + Default + Copy, const N: usize> Sub> for NVector { type Output = Self; #[inline] fn sub(self, rhs: NVector) -> Self::Output { Self { v: from_fn(|i| self.v[i] - rhs.v[i]), } } } impl + Default + Copy, const N: usize> Add> for NVector { type Output = Self; #[inline] fn add(self, rhs: NVector) -> Self::Output { Self { v: from_fn(|i| self.v[i] + rhs.v[i]), } } } impl + Default + Copy, const N: usize> Mul> for NVector { type Output = Self; #[inline] fn mul(self, rhs: NVector) -> Self::Output { Self { v: from_fn(|i| self.v[i] * rhs.v[i]), } } } impl ArrayFetch> for ArrayFetchVectorN<'_> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> NVector { let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32 * self.z_stride) as usize * self.output_inks; let k = &self.array[start..start + N]; NVector::::from_slice(k.try_into().unwrap()) } } impl> FastCube where T: Copy + From + Sub + Mul + Add + FusedMultiplyNegAdd + FusedMultiplyAdd, { #[inline(never)] fn tetra(&self, src: Vector3i, src_next: Vector3i, w: Vector3f) -> T { let x = src.v[0]; let y = src.v[1]; let z = src.v[2]; let x_n = src_next.v[0]; let y_n = src_next.v[1]; let z_n = src_next.v[2]; let rx = w.v[0]; let ry = w.v[1]; let rz = w.v[2]; let c0 = self.fetch.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = self.fetch.fetch(x_n, y, z) - c0; c2 = self.fetch.fetch(x_n, y_n, z) - self.fetch.fetch(x_n, y, z); c3 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = self.fetch.fetch(x_n, y, z) - c0; c2 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y, z_n); c3 = self.fetch.fetch(x_n, y, z_n) - self.fetch.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = self.fetch.fetch(x_n, y, z_n) - self.fetch.fetch(x, y, z_n); c2 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y, z_n); c3 = self.fetch.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = self.fetch.fetch(x_n, y_n, z) - self.fetch.fetch(x, y_n, z); c2 = self.fetch.fetch(x, y_n, z) - c0; c3 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x, y_n, z_n); c2 = self.fetch.fetch(x, y_n, z) - c0; c3 = self.fetch.fetch(x, y_n, z_n) - self.fetch.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x, y_n, z_n); c2 = self.fetch.fetch(x, y_n, z_n) - self.fetch.fetch(x, y, z_n); c3 = self.fetch.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, T::from(rx)); let s1 = s0.mla(c2, T::from(ry)); s1.mla(c3, T::from(rz)) } } impl MultidimensionalLut { pub(crate) fn new(grid_size: [u8; 16], input_inks: usize, output_inks: usize) -> Self { assert!(input_inks <= 16); let mut grid_strides = [1u32; 16]; let mut grid_filling_size = [1u32; 16]; for (ink, dst_stride) in grid_strides.iter_mut().take(input_inks - 1).enumerate() { let mut stride = 1u32; let how_many = input_inks.saturating_sub(ink).saturating_sub(1); for &grid_stride in grid_size.iter().take(how_many) { stride *= grid_stride as u32; } *dst_stride = stride; } for (ink, dst_stride) in grid_filling_size.iter_mut().take(input_inks).enumerate() { let mut stride = output_inks as u32; let how_many = input_inks.saturating_sub(ink).saturating_sub(1); for &grid_stride in grid_size.iter().take(how_many) { stride *= grid_stride as u32; } *dst_stride = stride; } let mut grid_strides_f = [0f32; 16]; for (dst, src) in grid_strides_f .iter_mut() .zip(grid_size.iter()) .take(input_inks) { *dst = (*src - 1) as f32; } Self { grid_strides, grid_scale: grid_strides_f, grid_filling_size, output_inks, } } } pub(crate) fn linear_4i_vec3f_direct( lut: &MultidimensionalLut, arr: &[f32], lx: f32, ly: f32, lz: f32, lw: f32, ) -> NVector { let lin_x = lx.max(0.0).min(1.0); let lin_y = ly.max(0.0).min(1.0); let lin_z = lz.max(0.0).min(1.0); let lin_w = lw.max(0.0).min(1.0); let scale_x = lut.grid_scale[0]; let scale_y = lut.grid_scale[1]; let scale_z = lut.grid_scale[2]; let scale_w = lut.grid_scale[3]; let lx = lin_x * scale_x; let ly = lin_y * scale_y; let lz = lin_z * scale_z; let lw = lin_w * scale_w; let x = lx.floor() as i32; let y = ly.floor() as i32; let z = lz.floor() as i32; let w = lw.floor() as i32; let src_x = Vector3i { v: [x, y, z] }; let x_n = lx.ceil() as i32; let y_n = ly.ceil() as i32; let z_n = lz.ceil() as i32; let w_n = lw.ceil() as i32; let src_next = Vector3i { v: [x_n, y_n, z_n] }; let x_w = lx - x as f32; let y_w = ly - y as f32; let z_w = lz - z as f32; let w_w = lw - w as f32; let weights = Vector3f { v: [x_w, y_w, z_w] }; let cube0 = &arr[(w as usize * lut.grid_filling_size[3] as usize)..]; let cube1 = &arr[(w_n as usize * lut.grid_filling_size[3] as usize)..]; let fast_cube0 = FastCube { fetch: ArrayFetchVectorN { array: cube0, x_stride: lut.grid_strides[0], y_stride: lut.grid_strides[1], z_stride: lut.grid_strides[2], output_inks: lut.output_inks, }, _phantom: PhantomData, }; let fast_cube1 = FastCube { fetch: ArrayFetchVectorN { array: cube1, x_stride: lut.grid_strides[0], y_stride: lut.grid_strides[1], z_stride: lut.grid_strides[2], output_inks: lut.output_inks, }, _phantom: PhantomData, }; let w0 = fast_cube0.tetra(src_x, src_next, weights); let w1 = fast_cube1.tetra(src_x, src_next, weights); lerp(w0, w1, NVector::::from(w_w)) } pub(crate) fn linear_3i_vec3f_direct( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { linear_3i_vec3f(lut, arr, inputs[0], inputs[1], inputs[2]) } fn linear_3i_vec3f( lut: &MultidimensionalLut, arr: &[f32], x: f32, y: f32, z: f32, ) -> NVector { let lin_x = x.max(0.0).min(1.0); let lin_y = y.max(0.0).min(1.0); let lin_z = z.max(0.0).min(1.0); let scale_x = lut.grid_scale[0]; let scale_y = lut.grid_scale[1]; let scale_z = lut.grid_scale[2]; let lx = lin_x * scale_x; let ly = lin_y * scale_y; let lz = lin_z * scale_z; let x = lx.floor() as i32; let y = ly.floor() as i32; let z = lz.floor() as i32; let src_x = Vector3i { v: [x, y, z] }; let x_n = lx.ceil() as i32; let y_n = ly.ceil() as i32; let z_n = lz.ceil() as i32; let src_next = Vector3i { v: [x_n, y_n, z_n] }; let x_w = lx - x as f32; let y_w = ly - y as f32; let z_w = lz - z as f32; let weights = Vector3f { v: [x_w, y_w, z_w] }; let fast_cube = FastCube { fetch: ArrayFetchVectorN { array: arr, x_stride: lut.grid_strides[0], y_stride: lut.grid_strides[1], z_stride: lut.grid_strides[2], output_inks: lut.output_inks, }, _phantom: PhantomData, }; fast_cube.tetra(src_x, src_next, weights) } pub(crate) fn linear_1i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let lin_x = inputs[0].max(0.0).min(1.0); let scale_x = lut.grid_scale[0]; let lx = lin_x * scale_x; let x = lx.floor() as i32; let x_n = lx.ceil() as i32; let x_w = lx - x as f32; let x_stride = lut.grid_strides[0]; let offset = |xi: i32| -> usize { (xi as u32 * x_stride) as usize * lut.output_inks }; // Sample 2 corners let a = NVector::::from_slice(&arr[offset(x)..][..N].try_into().unwrap()); let b = NVector::::from_slice(&arr[offset(x_n)..][..N].try_into().unwrap()); a * NVector::::from(1.0 - x_w) + b * NVector::::from(x_w) } pub(crate) fn linear_2i_vec3f_direct( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { linear_2i_vec3f(lut, arr, inputs[0], inputs[1]) } fn linear_2i_vec3f( lut: &MultidimensionalLut, arr: &[f32], x: f32, y: f32, ) -> NVector { let lin_x = x.max(0.0).min(1.0); let lin_y = y.max(0.0).min(1.0); let scale_x = lut.grid_scale[0]; let scale_y = lut.grid_scale[1]; let lx = lin_x * scale_x; let ly = lin_y * scale_y; let x = lx.floor() as i32; let y = ly.floor() as i32; let x_n = lx.ceil() as i32; let y_n = ly.ceil() as i32; let x_w = lx - x as f32; let y_w = ly - y as f32; let x_stride = lut.grid_strides[0]; let y_stride = lut.grid_strides[1]; let offset = |xi: i32, yi: i32| -> usize { (xi as u32 * x_stride + yi as u32 * y_stride) as usize * lut.output_inks }; // Sample 4 corners let a = NVector::::from_slice(&arr[offset(x, y)..][..N].try_into().unwrap()); let b = NVector::::from_slice(&arr[offset(x_n, y)..][..N].try_into().unwrap()); let c = NVector::::from_slice(&arr[offset(x, y_n)..][..N].try_into().unwrap()); let d = NVector::::from_slice(&arr[offset(x_n, y_n)..][..N].try_into().unwrap()); let ab = a * NVector::::from(1.0 - x_w) + b * NVector::::from(x_w); let cd = c * NVector::::from(1.0 - x_w) + d * NVector::::from(x_w); ab * NVector::::from(1.0 - y_w) + cd * NVector::::from(y_w) } pub(crate) fn linear_4i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { linear_4i_vec3f_direct(lut, arr, inputs[0], inputs[1], inputs[2], inputs[3]) } type FHandle = fn(&MultidimensionalLut, &[f32], &[f32]) -> NVector; #[inline(never)] pub(crate) fn linear_n_i_vec3f< const N: usize, const I: usize, Handle: Fn(&MultidimensionalLut, &[f32], &[f32]) -> NVector, >( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], handle: Handle, ) -> NVector { let lin_w = inputs[I]; let w_c = lin_w.max(0.).min(1.); let scale_p = lut.grid_scale[I]; let wf = w_c * scale_p; let w0 = wf.min(scale_p) as usize; let w1 = (wf + 1.).min(scale_p) as usize; let w = wf - w0 as f32; let cube0 = &arr[(w0 * lut.grid_filling_size[I] as usize)..]; let cube1 = &arr[(w1 * lut.grid_filling_size[I] as usize)..]; let inputs_sliced = &inputs[0..I]; let w0 = handle(lut, cube0, inputs_sliced); let w1 = handle(lut, cube1, inputs_sliced); lerp(w0, w1, NVector::::from(w)) } #[inline(never)] pub(crate) fn linear_5i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let lin_w = inputs[4]; let w_c = lin_w.max(0.).min(1.); let scale_p = lut.grid_scale[4]; let wf = w_c * scale_p; let w0 = wf.min(scale_p) as usize; let w1 = (wf + 1.).min(scale_p) as usize; let w = wf - w0 as f32; let cube0 = &arr[(w0 * lut.grid_filling_size[4] as usize)..]; let cube1 = &arr[(w1 * lut.grid_filling_size[4] as usize)..]; let w0 = linear_4i_vec3f_direct(lut, cube0, inputs[0], inputs[1], inputs[2], inputs[3]); let w1 = linear_4i_vec3f_direct(lut, cube1, inputs[0], inputs[1], inputs[2], inputs[3]); lerp(w0, w1, NVector::::from(w)) } #[inline(never)] pub(crate) fn linear_6i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_5i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn linear_7i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_6i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn linear_8i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_7i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn linear_9i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_8i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn linear_10i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_9i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn linear_11i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_10i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn linear_12i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_11i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn linear_13i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_12i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn linear_14i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_13i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn linear_15i_vec3f( lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32], ) -> NVector { let f = linear_14i_vec3f::; linear_n_i_vec3f::>(lut, arr, inputs, f) } #[inline(never)] pub(crate) fn tetra_3i_to_any_vec( lut: &MultidimensionalLut, arr: &[f32], x: f32, y: f32, z: f32, dst: &mut [f32], inks: usize, ) { match inks { 1 => { let vec3 = linear_3i_vec3f::<1>(lut, arr, x, y, z); dst[0] = vec3.v[0]; } 2 => { let vec3 = linear_3i_vec3f::<2>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 3 => { let vec3 = linear_3i_vec3f::<3>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 4 => { let vec3 = linear_3i_vec3f::<4>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 5 => { let vec3 = linear_3i_vec3f::<5>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 6 => { let vec3 = linear_3i_vec3f::<6>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 7 => { let vec3 = linear_3i_vec3f::<7>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 8 => { let vec3 = linear_3i_vec3f::<8>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 9 => { let vec3 = linear_3i_vec3f::<9>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 10 => { let vec3 = linear_3i_vec3f::<10>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 11 => { let vec3 = linear_3i_vec3f::<11>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 12 => { let vec3 = linear_3i_vec3f::<12>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 13 => { let vec3 = linear_3i_vec3f::<13>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 14 => { let vec3 = linear_3i_vec3f::<14>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } 15 => { let vec3 = linear_3i_vec3f::<15>(lut, arr, x, y, z); for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) { *dst = *src; } } _ => unreachable!(), } } moxcms-0.7.7/src/conversions/md_luts_factory.rs000064400000000000000000000155721046102023000200620ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::katana::{ CopyAlphaStage, InjectAlphaStage, Katana, KatanaInitialStage, KatanaIntermediateStage, KatanaPostFinalizationStage, KatanaStageLabToXyz, KatanaStageXyzToLab, katana_create_rgb_lin_lut, katana_input_make_lut_nx3, katana_multi_dimensional_3xn_to_device, katana_multi_dimensional_nx3_to_pcs, katana_output_make_lut_3xn, katana_pcs_lab_v2_to_v4, katana_pcs_lab_v4_to_v2, katana_prepare_inverse_lut_rgb_xyz, }; use crate::{ CmsError, ColorProfile, DataColorSpace, GammaLutInterpolate, Layout, LutWarehouse, PointeeSizeExpressible, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; pub(crate) fn do_any_to_any< T: Copy + Default + AsPrimitive + Send + Sync + AsPrimitive + PointeeSizeExpressible + GammaLutInterpolate, const BIT_DEPTH: usize, const LINEAR_CAP: usize, const GAMMA_LUT: usize, >( src_layout: Layout, source: &ColorProfile, dst_layout: Layout, dest: &ColorProfile, options: TransformOptions, ) -> Result + Send + Sync>, CmsError> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { let mut stages: Vec + Send + Sync>> = Vec::new(); let initial_stage: Box + Send + Sync> = match source .is_matrix_shaper() { true => { let state = katana_create_rgb_lin_lut::(src_layout, source, options)?; stages.extend(state.stages); state.initial_stage } false => match source.get_device_to_pcs(options.rendering_intent).ok_or( CmsError::UnsupportedLutRenderingIntent(source.rendering_intent), )? { LutWarehouse::Lut(lut) => katana_input_make_lut_nx3::( src_layout, src_layout.channels(), lut, options, source.pcs, BIT_DEPTH, )?, LutWarehouse::Multidimensional(mab) => katana_multi_dimensional_nx3_to_pcs::( src_layout, mab, options, source.pcs, BIT_DEPTH, )?, }, }; stages.push(katana_pcs_lab_v2_to_v4(source)); if source.pcs == DataColorSpace::Lab { stages.push(Box::new(KatanaStageLabToXyz::default())); } if dest.pcs == DataColorSpace::Lab { stages.push(Box::new(KatanaStageXyzToLab::default())); } stages.push(katana_pcs_lab_v4_to_v2(dest)); let final_stage = if dest.has_pcs_to_device_lut() { let pcs_to_device = dest .get_pcs_to_device(options.rendering_intent) .ok_or(CmsError::UnsupportedProfileConnection)?; match pcs_to_device { LutWarehouse::Lut(lut) => katana_output_make_lut_3xn::( dst_layout, lut, options, dest.color_space, BIT_DEPTH, )?, LutWarehouse::Multidimensional(mab) => katana_multi_dimensional_3xn_to_device::( dst_layout, mab, options, dest.pcs, BIT_DEPTH, )?, } } else if dest.is_matrix_shaper() { let state = katana_prepare_inverse_lut_rgb_xyz::( dest, dst_layout, options, )?; stages.extend(state.stages); state.final_stage } else { return Err(CmsError::UnsupportedProfileConnection); }; let mut post_finalization: Vec + Send + Sync>> = Vec::new(); if let Some(stage) = prepare_alpha_finalizer::(src_layout, source, dst_layout, dest, BIT_DEPTH) { post_finalization.push(stage); } Ok(Box::new(Katana:: { initial_stage, final_stage, stages, post_finalization, })) } pub(crate) fn prepare_alpha_finalizer< T: Copy + Default + AsPrimitive + Send + Sync + AsPrimitive + PointeeSizeExpressible + GammaLutInterpolate, >( src_layout: Layout, source: &ColorProfile, dst_layout: Layout, dest: &ColorProfile, bit_depth: usize, ) -> Option + Send + Sync>> where f32: AsPrimitive, { if (dst_layout == Layout::GrayAlpha && dest.color_space == DataColorSpace::Gray) || (dst_layout == Layout::Rgba || dest.color_space == DataColorSpace::Rgb) { return if (src_layout == Layout::GrayAlpha && source.color_space == DataColorSpace::Gray) || (src_layout == Layout::Rgba || source.color_space == DataColorSpace::Rgb) { Some(Box::new(CopyAlphaStage { src_layout, dst_layout, target_color_space: dest.color_space, _phantom: Default::default(), })) } else { Some(Box::new(InjectAlphaStage { dst_layout, target_color_space: dest.color_space, _phantom: Default::default(), bit_depth, })) }; } None } moxcms-0.7.7/src/conversions/mod.rs000064400000000000000000000060211046102023000154300ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #[cfg(all(target_arch = "x86_64", feature = "avx"))] mod avx; #[cfg(all(target_arch = "x86_64", feature = "avx512"))] mod avx512; mod bpc; mod gray2rgb; mod gray2rgb_extended; mod interpolator; mod katana; mod lut3x3; mod lut3x4; mod lut4; mod lut_transforms; mod mab; mod mab4x3; mod mba3x4; mod md_lut; mod md_luts_factory; #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] mod neon; mod prelude_lut_xyz_rgb; mod rgb2gray; mod rgb2gray_extended; mod rgb_xyz_factory; mod rgbxyz; mod rgbxyz_fixed; mod rgbxyz_float; #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] mod sse; mod transform_lut3_to_3; mod transform_lut3_to_4; mod transform_lut4_to_3; mod xyz_lab; pub(crate) use gray2rgb::{make_gray_to_unfused, make_gray_to_x}; pub(crate) use gray2rgb_extended::{make_gray_to_one_trc_extended, make_gray_to_rgb_extended}; pub(crate) use interpolator::LutBarycentricReduction; pub(crate) use lut_transforms::make_lut_transform; pub(crate) use rgb_xyz_factory::{RgbXyzFactory, RgbXyzFactoryOpt}; pub(crate) use rgb2gray::{ToneReproductionRgbToGray, make_rgb_to_gray}; pub(crate) use rgb2gray_extended::make_rgb_to_gray_extended; pub(crate) use rgbxyz::{TransformMatrixShaper, TransformMatrixShaperOptimized}; pub(crate) use rgbxyz_float::{ TransformShaperFloatInOut, TransformShaperRgbFloat, make_rgb_xyz_rgb_transform_float, make_rgb_xyz_rgb_transform_float_in_out, }; moxcms-0.7.7/src/conversions/neon/interpolator.rs000064400000000000000000000646021046102023000203430ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #![allow(dead_code)] use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::neon::rgb_xyz::NeonAlignedF32; use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd}; use std::arch::aarch64::*; use std::ops::{Add, Mul, Sub}; pub(crate) struct TetrahedralNeon {} pub(crate) struct PyramidalNeon {} pub(crate) struct TrilinearNeon {} pub(crate) struct PyramidalNeonDouble {} pub(crate) struct PrismaticNeonDouble {} pub(crate) struct TrilinearNeonDouble {} pub(crate) struct TetrahedralNeonDouble {} pub(crate) struct PrismaticNeon {} trait Fetcher { fn fetch(&self, x: i32, y: i32, z: i32) -> T; } struct TetrahedralNeonFetchVector<'a, const GRID_SIZE: usize> { cube: &'a [NeonAlignedF32], } struct TetrahedralNeonFetchVectorDouble<'a, const GRID_SIZE: usize> { cube0: &'a [NeonAlignedF32], cube1: &'a [NeonAlignedF32], } #[derive(Copy, Clone)] pub(crate) struct NeonVector { pub(crate) v: float32x4_t, } #[derive(Copy, Clone)] pub(crate) struct NeonVectorDouble { pub(crate) v0: float32x4_t, pub(crate) v1: float32x4_t, } impl From for NeonVector { #[inline(always)] fn from(v: f32) -> Self { NeonVector { v: unsafe { vdupq_n_f32(v) }, } } } impl From for NeonVectorDouble { #[inline(always)] fn from(v: f32) -> Self { NeonVectorDouble { v0: unsafe { vdupq_n_f32(v) }, v1: unsafe { vdupq_n_f32(v) }, } } } impl Sub for NeonVector { type Output = Self; #[inline(always)] fn sub(self, rhs: NeonVector) -> Self::Output { NeonVector { v: unsafe { vsubq_f32(self.v, rhs.v) }, } } } impl Mul for NeonVector { type Output = Self; #[inline(always)] fn mul(self, rhs: NeonVector) -> Self::Output { NeonVector { v: unsafe { vmulq_f32(self.v, rhs.v) }, } } } impl Sub for NeonVectorDouble { type Output = Self; #[inline(always)] fn sub(self, rhs: NeonVectorDouble) -> Self::Output { NeonVectorDouble { v0: unsafe { vsubq_f32(self.v0, rhs.v0) }, v1: unsafe { vsubq_f32(self.v1, rhs.v1) }, } } } impl Mul for NeonVectorDouble { type Output = Self; #[inline(always)] fn mul(self, rhs: NeonVectorDouble) -> Self::Output { NeonVectorDouble { v0: unsafe { vmulq_f32(self.v0, rhs.v0) }, v1: unsafe { vmulq_f32(self.v1, rhs.v1) }, } } } impl Add for NeonVector { type Output = Self; #[inline(always)] fn add(self, rhs: NeonVector) -> Self::Output { NeonVector { v: unsafe { vaddq_f32(self.v, rhs.v) }, } } } impl Add for NeonVectorDouble { type Output = Self; #[inline(always)] fn add(self, rhs: NeonVectorDouble) -> Self::Output { NeonVectorDouble { v0: unsafe { vaddq_f32(self.v0, rhs.v0) }, v1: unsafe { vaddq_f32(self.v1, rhs.v1) }, } } } impl FusedMultiplyAdd for NeonVector { #[inline(always)] fn mla(&self, b: NeonVector, c: NeonVector) -> NeonVector { NeonVector { v: unsafe { vfmaq_f32(self.v, b.v, c.v) }, } } } impl FusedMultiplyNegAdd for NeonVector { #[inline(always)] fn neg_mla(&self, b: NeonVector, c: NeonVector) -> NeonVector { NeonVector { v: unsafe { vfmsq_f32(self.v, b.v, c.v) }, } } } impl NeonVectorDouble { #[inline(always)] fn neg_mla(&self, b: NeonVectorDouble, c: NeonVectorDouble) -> NeonVectorDouble { NeonVectorDouble { v0: unsafe { vfmsq_f32(self.v0, b.v0, c.v0) }, v1: unsafe { vfmsq_f32(self.v1, b.v1, c.v1) }, } } } impl NeonVectorDouble { #[inline(always)] fn mla(&self, b: NeonVectorDouble, c: NeonVector) -> NeonVectorDouble { NeonVectorDouble { v0: unsafe { vfmaq_f32(self.v0, b.v0, c.v) }, v1: unsafe { vfmaq_f32(self.v1, b.v1, c.v) }, } } #[inline(always)] pub(crate) fn split(self) -> (NeonVector, NeonVector) { (NeonVector { v: self.v0 }, NeonVector { v: self.v1 }) } } impl Fetcher for TetrahedralNeonFetchVector<'_, GRID_SIZE> { fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVector { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx = unsafe { self.cube.get_unchecked(offset..) }; NeonVector { v: unsafe { vld1q_f32(jx.as_ptr() as *const f32) }, } } } impl Fetcher for TetrahedralNeonFetchVectorDouble<'_, GRID_SIZE> { fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVectorDouble { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx0 = unsafe { self.cube0.get_unchecked(offset..) }; let jx1 = unsafe { self.cube1.get_unchecked(offset..) }; NeonVectorDouble { v0: unsafe { vld1q_f32(jx0.as_ptr() as *const f32) }, v1: unsafe { vld1q_f32(jx1.as_ptr() as *const f32) }, } } } pub(crate) trait NeonMdInterpolation { fn inter3_neon( &self, cube: &[NeonAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> NeonVector; } pub(crate) trait NeonMdInterpolationDouble { fn inter3_neon( &self, table0: &[NeonAlignedF32], table1: &[NeonAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (NeonVector, NeonVector); } impl TetrahedralNeon { #[inline] fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> NeonVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = r.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z); c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n); c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, NeonVector::from(rx)); let s1 = s0.mla(c2, NeonVector::from(ry)); s1.mla(c3, NeonVector::from(rz)) } } impl TetrahedralNeonDouble { #[inline] fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> (NeonVector, NeonVector) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = r.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z); c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n); c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, NeonVector::from(rx)); let s1 = s0.mla(c2, NeonVector::from(ry)); s1.mla(c3, NeonVector::from(rz)).split() } } macro_rules! define_md_inter_neon { ($interpolator: ident) => { impl NeonMdInterpolation for $interpolator { fn inter3_neon( &self, cube: &[NeonAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> NeonVector { self.interpolate( in_r, in_g, in_b, lut, TetrahedralNeonFetchVector:: { cube }, ) } } }; } macro_rules! define_md_inter_neon_d { ($interpolator: ident) => { impl NeonMdInterpolationDouble for $interpolator { fn inter3_neon( &self, table0: &[NeonAlignedF32], table1: &[NeonAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (NeonVector, NeonVector) { self.interpolate( in_r, in_g, in_b, lut, TetrahedralNeonFetchVectorDouble:: { cube0: table0, cube1: table1, }, ) } } }; } define_md_inter_neon!(TetrahedralNeon); define_md_inter_neon!(PyramidalNeon); define_md_inter_neon!(PrismaticNeon); define_md_inter_neon!(TrilinearNeon); define_md_inter_neon_d!(PrismaticNeonDouble); define_md_inter_neon_d!(PyramidalNeonDouble); define_md_inter_neon_d!(TetrahedralNeonDouble); define_md_inter_neon_d!(TrilinearNeonDouble); impl PyramidalNeon { #[inline] fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> NeonVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); if dr > db && dg > db { let x0 = r.fetch(x_n, y_n, z_n); let x1 = r.fetch(x_n, y_n, z); let x2 = r.fetch(x_n, y, z); let x3 = r.fetch(x, y_n, z); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, NeonVector::from(db)); let s1 = s0.mla(c2, NeonVector::from(dr)); let s2 = s1.mla(c3, NeonVector::from(dg)); s2.mla(c4, NeonVector::from(dr * dg)) } else if db > dr && dg > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y_n, z_n); let x2 = r.fetch(x, y_n, z_n); let x3 = r.fetch(x, y_n, z); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, NeonVector::from(db)); let s1 = s0.mla(c2, NeonVector::from(dr)); let s2 = s1.mla(c3, NeonVector::from(dg)); s2.mla(c4, NeonVector::from(dg * db)) } else { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z); let x2 = r.fetch(x_n, y, z_n); let x3 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, NeonVector::from(db)); let s1 = s0.mla(c2, NeonVector::from(dr)); let s2 = s1.mla(c3, NeonVector::from(dg)); s2.mla(c4, NeonVector::from(db * dr)) } } } impl PyramidalNeonDouble { #[inline] fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> (NeonVector, NeonVector) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); let w0 = NeonVector::from(db); let w1 = NeonVector::from(dr); let w2 = NeonVector::from(dg); if dr > db && dg > db { let x0 = r.fetch(x_n, y_n, z_n); let x1 = r.fetch(x_n, y_n, z); let x2 = r.fetch(x_n, y, z); let x3 = r.fetch(x, y_n, z); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let w3 = NeonVector::from(dr * dg); let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } else if db > dr && dg > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y_n, z_n); let x2 = r.fetch(x, y_n, z_n); let x3 = r.fetch(x, y_n, z); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let w3 = NeonVector::from(dg * db); let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } else { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z); let x2 = r.fetch(x_n, y, z_n); let x3 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let w3 = NeonVector::from(db * dr); let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } } } impl PrismaticNeon { #[inline] fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> NeonVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); if db > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x, y_n, z_n); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, NeonVector::from(db)); let s1 = s0.mla(c2, NeonVector::from(dr)); let s2 = s1.mla(c3, NeonVector::from(dg)); let s3 = s2.mla(c4, NeonVector::from(dg * db)); s3.mla(c5, NeonVector::from(dr * dg)) } else { let x0 = r.fetch(x_n, y, z); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x_n, y_n, z); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, NeonVector::from(db)); let s1 = s0.mla(c2, NeonVector::from(dr)); let s2 = s1.mla(c3, NeonVector::from(dg)); let s3 = s2.mla(c4, NeonVector::from(dg * db)); s3.mla(c5, NeonVector::from(dr * dg)) } } } impl PrismaticNeonDouble { #[inline] fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], rv: impl Fetcher, ) -> (NeonVector, NeonVector) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = rv.fetch(x, y, z); let w0 = NeonVector::from(db); let w1 = NeonVector::from(dr); let w2 = NeonVector::from(dg); let w3 = NeonVector::from(dg * db); let w4 = NeonVector::from(dr * dg); if db > dr { let x0 = rv.fetch(x, y, z_n); let x1 = rv.fetch(x_n, y, z_n); let x2 = rv.fetch(x, y_n, z); let x3 = rv.fetch(x, y_n, z_n); let x4 = rv.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4).split() } else { let x0 = rv.fetch(x_n, y, z); let x1 = rv.fetch(x_n, y, z_n); let x2 = rv.fetch(x, y_n, z); let x3 = rv.fetch(x_n, y_n, z); let x4 = rv.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4).split() } } } impl TrilinearNeonDouble { #[inline] fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> (NeonVector, NeonVector) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let w0 = NeonVector::from(dr); let w1 = NeonVector::from(dg); let w2 = NeonVector::from(db); let c000 = r.fetch(x, y, z); let c100 = r.fetch(x_n, y, z); let c010 = r.fetch(x, y_n, z); let c110 = r.fetch(x_n, y_n, z); let c001 = r.fetch(x, y, z_n); let c101 = r.fetch(x_n, y, z_n); let c011 = r.fetch(x, y_n, z_n); let c111 = r.fetch(x_n, y_n, z_n); let dx = NeonVectorDouble::from(dr); let c00 = c000.neg_mla(c000, dx).mla(c100, w0); let c10 = c010.neg_mla(c010, dx).mla(c110, w0); let c01 = c001.neg_mla(c001, dx).mla(c101, w0); let c11 = c011.neg_mla(c011, dx).mla(c111, w0); let dy = NeonVectorDouble::from(dg); let c0 = c00.neg_mla(c00, dy).mla(c10, w1); let c1 = c01.neg_mla(c01, dy).mla(c11, w1); let dz = NeonVectorDouble::from(db); c0.neg_mla(c0, dz).mla(c1, w2).split() } } impl TrilinearNeon { #[inline] fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> NeonVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let w0 = NeonVector::from(dr); let w1 = NeonVector::from(dg); let w2 = NeonVector::from(db); let c000 = r.fetch(x, y, z); let c100 = r.fetch(x_n, y, z); let c010 = r.fetch(x, y_n, z); let c110 = r.fetch(x_n, y_n, z); let c001 = r.fetch(x, y, z_n); let c101 = r.fetch(x_n, y, z_n); let c011 = r.fetch(x, y_n, z_n); let c111 = r.fetch(x_n, y_n, z_n); let dx = NeonVector::from(dr); let c00 = c000.neg_mla(c000, dx).mla(c100, w0); let c10 = c010.neg_mla(c010, dx).mla(c110, w0); let c01 = c001.neg_mla(c001, dx).mla(c101, w0); let c11 = c011.neg_mla(c011, dx).mla(c111, w0); let dy = NeonVector::from(dg); let c0 = c00.neg_mla(c00, dy).mla(c10, w1); let c1 = c01.neg_mla(c01, dy).mla(c11, w1); let dz = NeonVector::from(db); c0.neg_mla(c0, dz).mla(c1, w2) } } moxcms-0.7.7/src/conversions/neon/interpolator_q0_15.rs000064400000000000000000000700331046102023000212430ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::interpolator::BarycentricWeight; use crate::math::FusedMultiplyAdd; use std::arch::aarch64::*; use std::ops::{Add, Mul, Sub}; #[repr(align(8), C)] pub(crate) struct NeonAlignedI16x4(pub(crate) [i16; 4]); #[cfg(feature = "options")] pub(crate) struct TetrahedralNeonQ0_15 {} #[cfg(feature = "options")] pub(crate) struct PyramidalNeonQ0_15 {} pub(crate) struct TrilinearNeonQ0_15 {} #[cfg(feature = "options")] pub(crate) struct PrismaticNeonQ0_15 {} #[cfg(feature = "options")] pub(crate) struct PyramidalNeonQ0_15Double {} #[cfg(feature = "options")] pub(crate) struct PrismaticNeonQ0_15Double {} pub(crate) struct TrilinearNeonQ0_15Double {} #[cfg(feature = "options")] pub(crate) struct TetrahedralNeonQ0_15Double {} trait Fetcher { fn fetch(&self, x: i32, y: i32, z: i32) -> T; } struct TetrahedralNeonQ0_15FetchVector<'a, const GRID_SIZE: usize> { cube: &'a [NeonAlignedI16x4], } struct TetrahedralNeonQ0_15FetchVectorDouble<'a, const GRID_SIZE: usize> { cube0: &'a [NeonAlignedI16x4], cube1: &'a [NeonAlignedI16x4], } #[derive(Copy, Clone)] pub(crate) struct NeonVectorQ0_15 { pub(crate) v: int16x4_t, } #[derive(Copy, Clone)] pub(crate) struct NeonVectorQ0_15Double { pub(crate) v: int16x8_t, } impl From for NeonVectorQ0_15 { #[inline(always)] fn from(v: i16) -> Self { NeonVectorQ0_15 { v: unsafe { vdup_n_s16(v) }, } } } impl From for NeonVectorQ0_15Double { #[inline(always)] fn from(v: i16) -> Self { NeonVectorQ0_15Double { v: unsafe { vdupq_n_s16(v) }, } } } impl Sub for NeonVectorQ0_15 { type Output = Self; #[inline(always)] fn sub(self, rhs: NeonVectorQ0_15) -> Self::Output { NeonVectorQ0_15 { v: unsafe { vsub_s16(self.v, rhs.v) }, } } } impl Mul for NeonVectorQ0_15 { type Output = Self; #[inline(always)] fn mul(self, rhs: NeonVectorQ0_15) -> Self::Output { NeonVectorQ0_15 { v: unsafe { vqrdmulh_s16(self.v, rhs.v) }, } } } impl Sub for NeonVectorQ0_15Double { type Output = Self; #[inline(always)] fn sub(self, rhs: NeonVectorQ0_15Double) -> Self::Output { NeonVectorQ0_15Double { v: unsafe { vsubq_s16(self.v, rhs.v) }, } } } impl Mul for NeonVectorQ0_15Double { type Output = Self; #[inline(always)] fn mul(self, rhs: NeonVectorQ0_15Double) -> Self::Output { NeonVectorQ0_15Double { v: unsafe { vqrdmulhq_s16(self.v, rhs.v) }, } } } impl Add for NeonVectorQ0_15 { type Output = Self; #[inline(always)] fn add(self, rhs: NeonVectorQ0_15) -> Self::Output { NeonVectorQ0_15 { v: unsafe { vadd_s16(self.v, rhs.v) }, } } } impl Add for NeonVectorQ0_15Double { type Output = Self; #[inline(always)] fn add(self, rhs: NeonVectorQ0_15Double) -> Self::Output { NeonVectorQ0_15Double { v: unsafe { vaddq_s16(self.v, rhs.v) }, } } } impl FusedMultiplyAdd for NeonVectorQ0_15 { #[inline(always)] fn mla(&self, b: NeonVectorQ0_15, c: NeonVectorQ0_15) -> NeonVectorQ0_15 { NeonVectorQ0_15 { v: unsafe { vqrdmlah_s16(self.v, b.v, c.v) }, } } } impl NeonVectorQ0_15 { #[inline(always)] fn neg_mla(&self, b: NeonVectorQ0_15, c: NeonVectorQ0_15) -> NeonVectorQ0_15 { NeonVectorQ0_15 { v: unsafe { vqrdmlsh_s16(self.v, b.v, c.v) }, } } } impl NeonVectorQ0_15Double { #[inline(always)] fn neg_mla(&self, b: NeonVectorQ0_15Double, c: NeonVectorQ0_15Double) -> NeonVectorQ0_15Double { NeonVectorQ0_15Double { v: unsafe { vqrdmlshq_s16(self.v, b.v, c.v) }, } } } impl NeonVectorQ0_15Double { #[inline(always)] fn mla(&self, b: NeonVectorQ0_15Double, c: NeonVectorQ0_15) -> NeonVectorQ0_15Double { NeonVectorQ0_15Double { v: unsafe { vqrdmlahq_s16(self.v, b.v, vcombine_s16(c.v, c.v)) }, } } #[inline(always)] pub(crate) fn split(self) -> (NeonVectorQ0_15, NeonVectorQ0_15) { unsafe { ( NeonVectorQ0_15 { v: vget_low_s16(self.v), }, NeonVectorQ0_15 { v: vget_high_s16(self.v), }, ) } } } impl Fetcher for TetrahedralNeonQ0_15FetchVector<'_, GRID_SIZE> { fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVectorQ0_15 { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx = unsafe { self.cube.get_unchecked(offset..) }; NeonVectorQ0_15 { v: unsafe { vld1_s16(jx.as_ptr() as *const i16) }, } } } impl Fetcher for TetrahedralNeonQ0_15FetchVectorDouble<'_, GRID_SIZE> { fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVectorQ0_15Double { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx0 = unsafe { self.cube0.get_unchecked(offset..) }; let jx1 = unsafe { self.cube1.get_unchecked(offset..) }; NeonVectorQ0_15Double { v: unsafe { vcombine_s16( vld1_s16(jx0.as_ptr() as *const i16), vld1_s16(jx1.as_ptr() as *const i16), ) }, } } } pub(crate) trait NeonMdInterpolationQ0_15 { fn inter3_neon( &self, cube: &[NeonAlignedI16x4], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> NeonVectorQ0_15; } pub(crate) trait NeonMdInterpolationQ0_15Double { fn inter3_neon( &self, table0: &[NeonAlignedI16x4], table1: &[NeonAlignedI16x4], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (NeonVectorQ0_15, NeonVectorQ0_15); } #[cfg(feature = "options")] impl TetrahedralNeonQ0_15 { #[target_feature(enable = "rdm")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> NeonVectorQ0_15 { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = r.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z); c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n); c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, NeonVectorQ0_15::from(rx)); let s1 = s0.mla(c2, NeonVectorQ0_15::from(ry)); s1.mla(c3, NeonVectorQ0_15::from(rz)) } } #[cfg(feature = "options")] impl TetrahedralNeonQ0_15Double { #[target_feature(enable = "rdm")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> (NeonVectorQ0_15, NeonVectorQ0_15) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = r.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z); c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n); c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, NeonVectorQ0_15::from(rx)); let s1 = s0.mla(c2, NeonVectorQ0_15::from(ry)); s1.mla(c3, NeonVectorQ0_15::from(rz)).split() } } macro_rules! define_md_inter_neon { ($interpolator: ident) => { impl NeonMdInterpolationQ0_15 for $interpolator { fn inter3_neon( &self, cube: &[NeonAlignedI16x4], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> NeonVectorQ0_15 { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralNeonQ0_15FetchVector:: { cube }, ) } } } }; } macro_rules! define_md_inter_neon_d { ($interpolator: ident) => { impl NeonMdInterpolationQ0_15Double for $interpolator { fn inter3_neon( &self, table0: &[NeonAlignedI16x4], table1: &[NeonAlignedI16x4], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> (NeonVectorQ0_15, NeonVectorQ0_15) { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralNeonQ0_15FetchVectorDouble:: { cube0: table0, cube1: table1, }, ) } } } }; } #[cfg(feature = "options")] define_md_inter_neon!(TetrahedralNeonQ0_15); #[cfg(feature = "options")] define_md_inter_neon!(PyramidalNeonQ0_15); #[cfg(feature = "options")] define_md_inter_neon!(PrismaticNeonQ0_15); define_md_inter_neon!(TrilinearNeonQ0_15); #[cfg(feature = "options")] define_md_inter_neon_d!(PrismaticNeonQ0_15Double); #[cfg(feature = "options")] define_md_inter_neon_d!(PyramidalNeonQ0_15Double); #[cfg(feature = "options")] define_md_inter_neon_d!(TetrahedralNeonQ0_15Double); define_md_inter_neon_d!(TrilinearNeonQ0_15Double); #[cfg(feature = "options")] impl PyramidalNeonQ0_15 { #[target_feature(enable = "rdm")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> NeonVectorQ0_15 { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); let w0 = NeonVectorQ0_15::from(db); let w1 = NeonVectorQ0_15::from(dr); let w2 = NeonVectorQ0_15::from(dg); if dr > db && dg > db { let x0 = r.fetch(x_n, y_n, z_n); let x1 = r.fetch(x_n, y_n, z); let x2 = r.fetch(x_n, y, z); let x3 = r.fetch(x, y_n, z); let w3 = w1 * w2; let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3) } else if db > dr && dg > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y_n, z_n); let x2 = r.fetch(x, y_n, z_n); let x3 = r.fetch(x, y_n, z); let w3 = w2 * w0; let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3) } else { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z); let x2 = r.fetch(x_n, y, z_n); let x3 = r.fetch(x_n, y_n, z_n); let w3 = w0 * w1; let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3) } } } #[cfg(feature = "options")] impl PyramidalNeonQ0_15Double { #[target_feature(enable = "rdm")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> (NeonVectorQ0_15, NeonVectorQ0_15) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); let w0 = NeonVectorQ0_15::from(db); let w1 = NeonVectorQ0_15::from(dr); let w2 = NeonVectorQ0_15::from(dg); if dr > db && dg > db { let w3 = NeonVectorQ0_15::from(dr) * NeonVectorQ0_15::from(dg); let x0 = r.fetch(x_n, y_n, z_n); let x1 = r.fetch(x_n, y_n, z); let x2 = r.fetch(x_n, y, z); let x3 = r.fetch(x, y_n, z); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } else if db > dr && dg > dr { let w3 = NeonVectorQ0_15::from(dg) * NeonVectorQ0_15::from(db); let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y_n, z_n); let x2 = r.fetch(x, y_n, z_n); let x3 = r.fetch(x, y_n, z); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } else { let w3 = NeonVectorQ0_15::from(db) * NeonVectorQ0_15::from(dr); let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z); let x2 = r.fetch(x_n, y, z_n); let x3 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); s2.mla(c4, w3).split() } } } #[cfg(feature = "options")] impl PrismaticNeonQ0_15 { #[target_feature(enable = "rdm")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> NeonVectorQ0_15 { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); let w0 = NeonVectorQ0_15::from(db); let w1 = NeonVectorQ0_15::from(dr); let w2 = NeonVectorQ0_15::from(dg); if db > dr { let w3 = w2 * w0; let w4 = w1 * w2; let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x, y_n, z_n); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4) } else { let w3 = w2 * w0; let w4 = w1 * w2; let x0 = r.fetch(x_n, y, z); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x_n, y_n, z); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4) } } } #[cfg(feature = "options")] impl PrismaticNeonQ0_15Double { #[target_feature(enable = "rdm")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], rv: impl Fetcher, ) -> (NeonVectorQ0_15, NeonVectorQ0_15) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = rv.fetch(x, y, z); let w0 = NeonVectorQ0_15::from(db); let w1 = NeonVectorQ0_15::from(dr); let w2 = NeonVectorQ0_15::from(dg); let w3 = NeonVectorQ0_15::from(dg) * NeonVectorQ0_15::from(db); let w4 = NeonVectorQ0_15::from(dr) * NeonVectorQ0_15::from(dg); if db > dr { let x0 = rv.fetch(x, y, z_n); let x1 = rv.fetch(x_n, y, z_n); let x2 = rv.fetch(x, y_n, z); let x3 = rv.fetch(x, y_n, z_n); let x4 = rv.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4).split() } else { let x0 = rv.fetch(x_n, y, z); let x1 = rv.fetch(x_n, y, z_n); let x2 = rv.fetch(x, y_n, z); let x3 = rv.fetch(x_n, y_n, z); let x4 = rv.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, w0); let s1 = s0.mla(c2, w1); let s2 = s1.mla(c3, w2); let s3 = s2.mla(c4, w3); s3.mla(c5, w4).split() } } } impl TrilinearNeonQ0_15Double { #[target_feature(enable = "rdm")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> (NeonVectorQ0_15, NeonVectorQ0_15) { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let w0 = NeonVectorQ0_15::from(dr); let w1 = NeonVectorQ0_15::from(dg); let w2 = NeonVectorQ0_15::from(db); let c000 = r.fetch(x, y, z); let c100 = r.fetch(x_n, y, z); let c010 = r.fetch(x, y_n, z); let c110 = r.fetch(x_n, y_n, z); let c001 = r.fetch(x, y, z_n); let c101 = r.fetch(x_n, y, z_n); let c011 = r.fetch(x, y_n, z_n); let c111 = r.fetch(x_n, y_n, z_n); let dx = NeonVectorQ0_15Double::from(dr); let c00 = c000.neg_mla(c000, dx).mla(c100, w0); let c10 = c010.neg_mla(c010, dx).mla(c110, w0); let c01 = c001.neg_mla(c001, dx).mla(c101, w0); let c11 = c011.neg_mla(c011, dx).mla(c111, w0); let dy = NeonVectorQ0_15Double::from(dg); let c0 = c00.neg_mla(c00, dy).mla(c10, w1); let c1 = c01.neg_mla(c01, dy).mla(c11, w1); let dz = NeonVectorQ0_15Double::from(db); c0.neg_mla(c0, dz).mla(c1, w2).split() } } impl TrilinearNeonQ0_15 { #[target_feature(enable = "rdm")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> NeonVectorQ0_15 { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let w0 = NeonVectorQ0_15::from(dr); let w1 = NeonVectorQ0_15::from(dg); let w2 = NeonVectorQ0_15::from(db); let c000 = r.fetch(x, y, z); let c100 = r.fetch(x_n, y, z); let c010 = r.fetch(x, y_n, z); let c110 = r.fetch(x_n, y_n, z); let c001 = r.fetch(x, y, z_n); let c101 = r.fetch(x_n, y, z_n); let c011 = r.fetch(x, y_n, z_n); let c111 = r.fetch(x_n, y_n, z_n); let dx = NeonVectorQ0_15::from(dr); let c00 = c000.neg_mla(c000, dx).mla(c100, w0); let c10 = c010.neg_mla(c010, dx).mla(c110, w0); let c01 = c001.neg_mla(c001, dx).mla(c101, w0); let c11 = c011.neg_mla(c011, dx).mla(c111, w0); let dy = NeonVectorQ0_15::from(dg); let c0 = c00.neg_mla(c00, dy).mla(c10, w1); let c1 = c01.neg_mla(c01, dy).mla(c11, w1); let dz = NeonVectorQ0_15::from(db); c0.neg_mla(c0, dz).mla(c1, w2) } } moxcms-0.7.7/src/conversions/neon/lut4_to_3.rs000064400000000000000000000302051046102023000174250ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::lut_transforms::Lut4x3Factory; use crate::conversions::neon::interpolator::*; use crate::conversions::neon::interpolator_q0_15::NeonAlignedI16x4; use crate::conversions::neon::lut4_to_3_q0_15::TransformLut4To3NeonQ0_15; use crate::conversions::neon::rgb_xyz::NeonAlignedF32; use crate::transform::PointeeSizeExpressible; use crate::{ BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; use std::arch::aarch64::*; use std::marker::PhantomData; struct TransformLut4To3Neon< T, U, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { lut: Vec, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: InterpolationMethod, weights: Box<[BarycentricWeight; BINS]>, color_space: DataColorSpace, is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut4To3Neon where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[inline(never)] fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { let cn = Layout::from(LAYOUT); let channels = cn.channels(); let grid_size = GRID_SIZE as i32; let grid_size3 = grid_size * grid_size * grid_size; let value_scale = unsafe { vdupq_n_f32(((1 << BIT_DEPTH) - 1) as f32) }; let max_value = ((1 << BIT_DEPTH) - 1u32).as_(); for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) { let c = <() as LutBarycentricReduction>::reduce::( src[0], ); let m = <() as LutBarycentricReduction>::reduce::( src[1], ); let y = <() as LutBarycentricReduction>::reduce::( src[2], ); let k = <() as LutBarycentricReduction>::reduce::( src[3], ); let k_weights = self.weights[k.as_()]; let w: i32 = k_weights.x; let w_n: i32 = k_weights.x_n; let t: f32 = k_weights.w; let table1 = &self.lut[(w * grid_size3) as usize..]; let table2 = &self.lut[(w_n * grid_size3) as usize..]; let (a0, b0) = interpolator.inter3_neon( table1, table2, c.as_(), m.as_(), y.as_(), self.weights.as_slice(), ); let (a0, b0) = (a0.v, b0.v); if T::FINITE { unsafe { let t0 = vdupq_n_f32(t); let hp = vfmsq_f32(a0, a0, t0); let mut v = vfmaq_f32(hp, b0, t0); v = vmulq_f32(v, value_scale); v = vminq_f32(v, value_scale); let jvx = vcvtaq_u32_f32(v); dst[cn.r_i()] = vgetq_lane_u32::<0>(jvx).as_(); dst[cn.g_i()] = vgetq_lane_u32::<1>(jvx).as_(); dst[cn.b_i()] = vgetq_lane_u32::<2>(jvx).as_(); } } else { unsafe { let t0 = vdupq_n_f32(t); let hp = vfmsq_f32(a0, a0, t0); let v = vfmaq_f32(hp, b0, t0); dst[cn.r_i()] = vgetq_lane_f32::<0>(v).as_(); dst[cn.g_i()] = vgetq_lane_f32::<1>(v).as_(); dst[cn.b_i()] = vgetq_lane_f32::<2>(v).as_(); } } if channels == 4 { dst[cn.a_i()] = max_value; } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut4To3Neon where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let cn = Layout::from(LAYOUT); let channels = cn.channels(); if src.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / 4; let dst_chunks = dst.len() / channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearNeonDouble:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk(src, dst, Box::new(TetrahedralNeonDouble:: {})); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk(src, dst, Box::new(PyramidalNeonDouble:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk(src, dst, Box::new(PrismaticNeonDouble:: {})); } InterpolationMethod::Linear => { self.transform_chunk(src, dst, Box::new(TrilinearNeonDouble:: {})); } } } Ok(()) } } pub(crate) struct NeonLut4x3Factory {} impl Lut4x3Factory for NeonLut4x3Factory { fn make_transform_4x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Sync + Send> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { if options.prefer_fixed_point && BIT_DEPTH < 16 && std::arch::is_aarch64_feature_detected!("rdm") { let q: f32 = if T::FINITE { ((1i32 << BIT_DEPTH as i32) - 1) as f32 } else { ((1i32 << 14i32) - 1) as f32 }; let lut = lut .chunks_exact(3) .map(|x| { NeonAlignedI16x4([ (x[0] * q).round() as i16, (x[1] * q).round() as i16, (x[2] * q).round() as i16, 0, ]) }) .collect::>(); return match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut4To3NeonQ0_15::< T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut4To3NeonQ0_15::< T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), }; } let lut = lut .chunks_exact(3) .map(|x| NeonAlignedF32([x[0], x[1], x[2], 0f32])) .collect::>(); match options.barycentric_weight_scale { BarycentricWeightScale::Low => { Box::new( TransformLut4To3Neon:: { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }, ) } #[cfg(feature = "options")] BarycentricWeightScale::High => { Box::new( TransformLut4To3Neon:: { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }, ) } } } } moxcms-0.7.7/src/conversions/neon/lut4_to_3_q0_15.rs000064400000000000000000000212641046102023000203370ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::neon::interpolator_q0_15::*; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::aarch64::*; use std::marker::PhantomData; pub(crate) struct TransformLut4To3NeonQ0_15< T, U, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { pub(crate) lut: Vec, pub(crate) _phantom: PhantomData, pub(crate) _phantom1: PhantomData, pub(crate) interpolation_method: InterpolationMethod, pub(crate) weights: Box<[BarycentricWeight; BINS]>, pub(crate) color_space: DataColorSpace, pub(crate) is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut4To3NeonQ0_15 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[target_feature(enable = "rdm")] #[inline(never)] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { unsafe { let cn = Layout::from(LAYOUT); let channels = cn.channels(); let grid_size = GRID_SIZE as i32; let grid_size3 = grid_size * grid_size * grid_size; let f_value_scale = vdupq_n_f32(1. / ((1 << 14i32) - 1) as f32); let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); let v_max_scale = if T::FINITE { vdup_n_s16(((1i32 << BIT_DEPTH) - 1) as i16) } else { vdup_n_s16(((1i32 << 14i32) - 1) as i16) }; for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) { let c = <() as LutBarycentricReduction>::reduce::( src[0], ); let m = <() as LutBarycentricReduction>::reduce::( src[1], ); let y = <() as LutBarycentricReduction>::reduce::( src[2], ); let k = <() as LutBarycentricReduction>::reduce::( src[3], ); let k_weights = self.weights[k.as_()]; let w: i32 = k_weights.x; let w_n: i32 = k_weights.x_n; let t: i16 = k_weights.w; let table1 = &self.lut[(w * grid_size3) as usize..]; let table2 = &self.lut[(w_n * grid_size3) as usize..]; let (a0, b0) = interpolator.inter3_neon( table1, table2, c.as_(), m.as_(), y.as_(), self.weights.as_slice(), ); let (a0, b0) = (a0.v, b0.v); let t0 = vdup_n_s16(t); let hp = vqrdmlsh_s16(a0, a0, t0); let mut v = vqrdmlah_s16(hp, b0, t0); if T::FINITE { v = vmax_s16(v, vdup_n_s16(0)); v = vmin_s16(v, v_max_scale); dst[cn.r_i()] = (vget_lane_s16::<0>(v) as u32).as_(); dst[cn.g_i()] = (vget_lane_s16::<1>(v) as u32).as_(); dst[cn.b_i()] = (vget_lane_s16::<2>(v) as u32).as_(); } else { let o = vcvtq_f32_s32(vmovl_s16(v)); let r = vmulq_f32(o, f_value_scale); dst[cn.r_i()] = vgetq_lane_f32::<0>(r).as_(); dst[cn.g_i()] = vgetq_lane_f32::<1>(r).as_(); dst[cn.b_i()] = vgetq_lane_f32::<2>(r).as_(); } if channels == 4 { dst[cn.a_i()] = max_value; } } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut4To3NeonQ0_15 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let cn = Layout::from(LAYOUT); let channels = cn.channels(); if src.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / 4; let dst_chunks = dst.len() / channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearNeonQ0_15Double:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk( src, dst, Box::new(TetrahedralNeonQ0_15Double:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk( src, dst, Box::new(PyramidalNeonQ0_15Double:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk( src, dst, Box::new(PrismaticNeonQ0_15Double:: {}), ); } InterpolationMethod::Linear => { self.transform_chunk( src, dst, Box::new(TrilinearNeonQ0_15Double:: {}), ); } } } } Ok(()) } } moxcms-0.7.7/src/conversions/neon/mod.rs000064400000000000000000000043031046102023000163700ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ mod interpolator; mod interpolator_q0_15; mod lut4_to_3; mod lut4_to_3_q0_15; mod rgb_xyz; mod rgb_xyz_opt; mod rgb_xyz_q1_30_opt; mod rgb_xyz_q2_13; mod rgb_xyz_q2_13_opt; mod t_lut3_to_3; mod t_lut3_to_3_q0_15; pub(crate) use lut4_to_3::NeonLut4x3Factory; pub(crate) use rgb_xyz::TransformShaperRgbNeon; pub(crate) use rgb_xyz_opt::TransformShaperRgbOptNeon; pub(crate) use rgb_xyz_q1_30_opt::TransformShaperQ1_30NeonOpt; pub(crate) use rgb_xyz_q2_13::TransformShaperQ2_13Neon; pub(crate) use rgb_xyz_q2_13_opt::TransformShaperQ2_13NeonOpt; pub(crate) use t_lut3_to_3::NeonLut3x3Factory; moxcms-0.7.7/src/conversions/neon/rgb_xyz.rs000064400000000000000000000462661046102023000173130ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut}; use crate::conversions::rgbxyz::TransformMatrixShaperV; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::aarch64::*; #[repr(align(16), C)] pub(crate) struct NeonAlignedU16(pub(crate) [u16; 8]); #[repr(align(16), C)] pub(crate) struct NeonAlignedF32(pub(crate) [f32; 4]); pub(crate) struct TransformShaperRgbNeon< T: Clone + PointeeSizeExpressible + Copy + Default + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > { pub(crate) profile: TransformMatrixShaperV, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Clone + PointeeSizeExpressible + Copy + Default + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformShaperRgbNeon where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary0 = NeonAlignedU16([0; 8]); let mut temporary1 = NeonAlignedU16([0; 8]); let mut temporary2 = NeonAlignedU16([0; 8]); let mut temporary3 = NeonAlignedU16([0; 8]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let scale = (self.gamma_lut - 1) as f32; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.r_linear.len() >= cap); assert!(self.profile.g_linear.len() >= cap); assert!(self.profile.b_linear.len() >= cap); } else { assert!(self.profile.r_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); assert!(self.profile.g_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); assert!(self.profile.b_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let r_lin = &self.profile.r_linear; let g_lin = &self.profile.g_linear; let b_lin = &self.profile.b_linear; let (src_chunks, src_remainder) = split_by_twos(src, src_channels); let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels); unsafe { let m0 = vld1q_f32([t.v[0][0], t.v[0][1], t.v[0][2], 0.].as_ptr()); let m1 = vld1q_f32([t.v[1][0], t.v[1][1], t.v[1][2], 0.].as_ptr()); let m2 = vld1q_f32([t.v[2][0], t.v[2][1], t.v[2][2], 0.].as_ptr()); let v_scale = vdupq_n_f32(scale); let rnd = vdupq_n_f32(0.5); if !src_chunks.is_empty() { let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2); let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2); let mut src_iter0 = src0.chunks_exact(src_channels * 2); let mut src_iter1 = src1.chunks_exact(src_channels * 2); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); let (mut r2, mut g2, mut b2, mut a2); let (mut r3, mut g3, mut b3, mut a3); if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) { let r0p = r_lin.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = g_lin.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = b_lin.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = r_lin.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = g_lin.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = b_lin.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = r_lin.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = g_lin.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = b_lin.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = r_lin.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = g_lin.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = b_lin.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1q_dup_f32(r0p); g0 = vld1q_dup_f32(g0p); b0 = vld1q_dup_f32(b0p); r1 = vld1q_dup_f32(r1p); g1 = vld1q_dup_f32(g1p); b1 = vld1q_dup_f32(b1p); r2 = vld1q_dup_f32(r2p); g2 = vld1q_dup_f32(g2p); b2 = vld1q_dup_f32(b2p); r3 = vld1q_dup_f32(r3p); g3 = vld1q_dup_f32(g3p); b3 = vld1q_dup_f32(b3p); a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = vdupq_n_f32(0.); g0 = vdupq_n_f32(0.); b0 = vdupq_n_f32(0.); r1 = vdupq_n_f32(0.); g1 = vdupq_n_f32(0.); b1 = vdupq_n_f32(0.); r2 = vdupq_n_f32(0.); g2 = vdupq_n_f32(0.); b2 = vdupq_n_f32(0.); r3 = vdupq_n_f32(0.); g3 = vdupq_n_f32(0.); b3 = vdupq_n_f32(0.); a0 = max_colors; a1 = max_colors; a2 = max_colors; a3 = max_colors; } for (((src0, src1), dst0), dst1) in src_iter0 .zip(src_iter1) .zip(dst0.chunks_exact_mut(dst_channels * 2)) .zip(dst1.chunks_exact_mut(dst_channels * 2)) { let v0_0 = vmulq_f32(r0, m0); let v0_1 = vmulq_f32(r1, m0); let v0_2 = vmulq_f32(r2, m0); let v0_3 = vmulq_f32(r3, m0); let v1_0 = vfmaq_f32(v0_0, g0, m1); let v1_1 = vfmaq_f32(v0_1, g1, m1); let v1_2 = vfmaq_f32(v0_2, g2, m1); let v1_3 = vfmaq_f32(v0_3, g3, m1); let mut vr0 = vfmaq_f32(v1_0, b0, m2); let mut vr1 = vfmaq_f32(v1_1, b1, m2); let mut vr2 = vfmaq_f32(v1_2, b2, m2); let mut vr3 = vfmaq_f32(v1_3, b3, m2); vr0 = vfmaq_f32(rnd, vr0, v_scale); vr1 = vfmaq_f32(rnd, vr1, v_scale); vr2 = vfmaq_f32(rnd, vr2, v_scale); vr3 = vfmaq_f32(rnd, vr3, v_scale); vr0 = vminq_f32(vr0, v_scale); vr1 = vminq_f32(vr1, v_scale); vr2 = vminq_f32(vr2, v_scale); vr3 = vminq_f32(vr3, v_scale); let zx0 = vcvtq_u32_f32(vr0); let zx1 = vcvtq_u32_f32(vr1); let zx2 = vcvtq_u32_f32(vr2); let zx3 = vcvtq_u32_f32(vr3); vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0); vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1); vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2); vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3); let r0p = r_lin.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = g_lin.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = b_lin.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = r_lin.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = g_lin.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = b_lin.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = r_lin.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = g_lin.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = b_lin.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = r_lin.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = g_lin.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = b_lin.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1q_dup_f32(r0p); g0 = vld1q_dup_f32(g0p); b0 = vld1q_dup_f32(b0p); r1 = vld1q_dup_f32(r1p); g1 = vld1q_dup_f32(g1p); b1 = vld1q_dup_f32(b1p); r2 = vld1q_dup_f32(r2p); g2 = vld1q_dup_f32(g2p); b2 = vld1q_dup_f32(b2p); r3 = vld1q_dup_f32(r3p); g3 = vld1q_dup_f32(g3p); b3 = vld1q_dup_f32(b3p); dst0[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize]; dst0[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize]; dst0[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary1.0[0] as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary1.0[2] as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary1.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.r_gamma[temporary2.0[0] as usize]; dst1[dst_cn.g_i()] = self.profile.g_gamma[temporary2.0[2] as usize]; dst1[dst_cn.b_i()] = self.profile.b_gamma[temporary2.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary3.0[0] as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary3.0[2] as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary3.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } if let (Some(dst0), Some(dst1)) = ( dst0.chunks_exact_mut(dst_channels * 2).last(), dst1.chunks_exact_mut(dst_channels * 2).last(), ) { let v0_0 = vmulq_f32(r0, m0); let v0_1 = vmulq_f32(r1, m0); let v0_2 = vmulq_f32(r2, m0); let v0_3 = vmulq_f32(r3, m0); let v1_0 = vfmaq_f32(v0_0, g0, m1); let v1_1 = vfmaq_f32(v0_1, g1, m1); let v1_2 = vfmaq_f32(v0_2, g2, m1); let v1_3 = vfmaq_f32(v0_3, g3, m1); let mut vr0 = vfmaq_f32(v1_0, b0, m2); let mut vr1 = vfmaq_f32(v1_1, b1, m2); let mut vr2 = vfmaq_f32(v1_2, b2, m2); let mut vr3 = vfmaq_f32(v1_3, b3, m2); vr0 = vfmaq_f32(rnd, vr0, v_scale); vr1 = vfmaq_f32(rnd, vr1, v_scale); vr2 = vfmaq_f32(rnd, vr2, v_scale); vr3 = vfmaq_f32(rnd, vr3, v_scale); vr0 = vminq_f32(vr0, v_scale); vr1 = vminq_f32(vr1, v_scale); vr2 = vminq_f32(vr2, v_scale); vr3 = vminq_f32(vr3, v_scale); let zx0 = vcvtq_u32_f32(vr0); let zx1 = vcvtq_u32_f32(vr1); let zx2 = vcvtq_u32_f32(vr2); let zx3 = vcvtq_u32_f32(vr3); vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0); vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1); vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2); vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3); dst0[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize]; dst0[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize]; dst0[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary1.0[0] as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary1.0[2] as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary1.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.r_gamma[temporary2.0[0] as usize]; dst1[dst_cn.g_i()] = self.profile.g_gamma[temporary2.0[2] as usize]; dst1[dst_cn.b_i()] = self.profile.b_gamma[temporary2.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary3.0[0] as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary3.0[2] as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary3.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } } } for (src, dst) in src_remainder .chunks_exact(src_channels) .zip(dst_remainder.chunks_exact_mut(dst_channels)) { let rp = r_lin.get_unchecked(src[src_cn.r_i()]._as_usize()); let gp = g_lin.get_unchecked(src[src_cn.g_i()]._as_usize()); let bp = b_lin.get_unchecked(src[src_cn.b_i()]._as_usize()); let r = vld1q_dup_f32(rp); let g = vld1q_dup_f32(gp); let b = vld1q_dup_f32(bp); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let v0 = vmulq_f32(r, m0); let v1 = vfmaq_f32(v0, g, m1); let mut v = vfmaq_f32(v1, b, m2); v = vfmaq_f32(rnd, v, v_scale); v = vminq_f32(v, v_scale); let zx = vcvtq_u32_f32(v); vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx); dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } moxcms-0.7.7/src/conversions/neon/rgb_xyz_opt.rs000064400000000000000000000453611046102023000201700ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::neon::rgb_xyz::NeonAlignedU16; use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut}; use crate::conversions::rgbxyz::TransformMatrixShaperOptimizedV; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::aarch64::*; pub(crate) struct TransformShaperRgbOptNeon< T: Clone + PointeeSizeExpressible + Copy + Default + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > { pub(crate) profile: TransformMatrixShaperOptimizedV, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Clone + PointeeSizeExpressible + Copy + Default + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformShaperRgbOptNeon where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary0 = NeonAlignedU16([0; 8]); let mut temporary1 = NeonAlignedU16([0; 8]); let mut temporary2 = NeonAlignedU16([0; 8]); let mut temporary3 = NeonAlignedU16([0; 8]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let scale = (self.gamma_lut - 1) as f32; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.linear.len() >= cap); } else { assert!(self.profile.linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let lut_lin = &self.profile.linear; let (src_chunks, src_remainder) = split_by_twos(src, src_channels); let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels); unsafe { let m0 = vld1q_f32([t.v[0][0], t.v[0][1], t.v[0][2], 0.].as_ptr()); let m1 = vld1q_f32([t.v[1][0], t.v[1][1], t.v[1][2], 0.].as_ptr()); let m2 = vld1q_f32([t.v[2][0], t.v[2][1], t.v[2][2], 0.].as_ptr()); let v_scale = vdupq_n_f32(scale); let rnd = vdupq_n_f32(0.5); if !src_chunks.is_empty() { let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2); let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2); let mut src_iter0 = src0.chunks_exact(src_channels * 2); let mut src_iter1 = src1.chunks_exact(src_channels * 2); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); let (mut r2, mut g2, mut b2, mut a2); let (mut r3, mut g3, mut b3, mut a3); if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) { let r0p = lut_lin.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = lut_lin.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = lut_lin.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = lut_lin.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = lut_lin.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = lut_lin.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = lut_lin.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = lut_lin.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = lut_lin.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = lut_lin.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = lut_lin.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = lut_lin.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1q_dup_f32(r0p); g0 = vld1q_dup_f32(g0p); b0 = vld1q_dup_f32(b0p); r1 = vld1q_dup_f32(r1p); g1 = vld1q_dup_f32(g1p); b1 = vld1q_dup_f32(b1p); r2 = vld1q_dup_f32(r2p); g2 = vld1q_dup_f32(g2p); b2 = vld1q_dup_f32(b2p); r3 = vld1q_dup_f32(r3p); g3 = vld1q_dup_f32(g3p); b3 = vld1q_dup_f32(b3p); a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = vdupq_n_f32(0.); g0 = vdupq_n_f32(0.); b0 = vdupq_n_f32(0.); r1 = vdupq_n_f32(0.); g1 = vdupq_n_f32(0.); b1 = vdupq_n_f32(0.); r2 = vdupq_n_f32(0.); g2 = vdupq_n_f32(0.); b2 = vdupq_n_f32(0.); r3 = vdupq_n_f32(0.); g3 = vdupq_n_f32(0.); b3 = vdupq_n_f32(0.); a0 = max_colors; a1 = max_colors; a2 = max_colors; a3 = max_colors; } for (((src0, src1), dst0), dst1) in src_iter0 .zip(src_iter1) .zip(dst0.chunks_exact_mut(dst_channels * 2)) .zip(dst1.chunks_exact_mut(dst_channels * 2)) { let v0_0 = vmulq_f32(r0, m0); let v0_1 = vmulq_f32(r1, m0); let v0_2 = vmulq_f32(r2, m0); let v0_3 = vmulq_f32(r3, m0); let v1_0 = vfmaq_f32(v0_0, g0, m1); let v1_1 = vfmaq_f32(v0_1, g1, m1); let v1_2 = vfmaq_f32(v0_2, g2, m1); let v1_3 = vfmaq_f32(v0_3, g3, m1); let mut vr0 = vfmaq_f32(v1_0, b0, m2); let mut vr1 = vfmaq_f32(v1_1, b1, m2); let mut vr2 = vfmaq_f32(v1_2, b2, m2); let mut vr3 = vfmaq_f32(v1_3, b3, m2); vr0 = vfmaq_f32(rnd, vr0, v_scale); vr1 = vfmaq_f32(rnd, vr1, v_scale); vr2 = vfmaq_f32(rnd, vr2, v_scale); vr3 = vfmaq_f32(rnd, vr3, v_scale); vr0 = vminq_f32(vr0, v_scale); vr1 = vminq_f32(vr1, v_scale); vr2 = vminq_f32(vr2, v_scale); vr3 = vminq_f32(vr3, v_scale); let zx0 = vcvtq_u32_f32(vr0); let zx1 = vcvtq_u32_f32(vr1); let zx2 = vcvtq_u32_f32(vr2); let zx3 = vcvtq_u32_f32(vr3); vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0); vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1); vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2); vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3); let r0p = lut_lin.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = lut_lin.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = lut_lin.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = lut_lin.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = lut_lin.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = lut_lin.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = lut_lin.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = lut_lin.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = lut_lin.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = lut_lin.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = lut_lin.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = lut_lin.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1q_dup_f32(r0p); g0 = vld1q_dup_f32(g0p); b0 = vld1q_dup_f32(b0p); r1 = vld1q_dup_f32(r1p); g1 = vld1q_dup_f32(g1p); b1 = vld1q_dup_f32(b1p); r2 = vld1q_dup_f32(r2p); g2 = vld1q_dup_f32(g2p); b2 = vld1q_dup_f32(b2p); r3 = vld1q_dup_f32(r3p); g3 = vld1q_dup_f32(g3p); b3 = vld1q_dup_f32(b3p); dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary1.0[0] as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary1.0[2] as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary1.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[temporary2.0[0] as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[temporary2.0[2] as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[temporary2.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary3.0[0] as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary3.0[2] as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary3.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } if let (Some(dst0), Some(dst1)) = ( dst0.chunks_exact_mut(dst_channels * 2).last(), dst1.chunks_exact_mut(dst_channels * 2).last(), ) { let v0_0 = vmulq_f32(r0, m0); let v0_1 = vmulq_f32(r1, m0); let v0_2 = vmulq_f32(r2, m0); let v0_3 = vmulq_f32(r3, m0); let v1_0 = vfmaq_f32(v0_0, g0, m1); let v1_1 = vfmaq_f32(v0_1, g1, m1); let v1_2 = vfmaq_f32(v0_2, g2, m1); let v1_3 = vfmaq_f32(v0_3, g3, m1); let mut vr0 = vfmaq_f32(v1_0, b0, m2); let mut vr1 = vfmaq_f32(v1_1, b1, m2); let mut vr2 = vfmaq_f32(v1_2, b2, m2); let mut vr3 = vfmaq_f32(v1_3, b3, m2); vr0 = vfmaq_f32(rnd, vr0, v_scale); vr1 = vfmaq_f32(rnd, vr1, v_scale); vr2 = vfmaq_f32(rnd, vr2, v_scale); vr3 = vfmaq_f32(rnd, vr3, v_scale); vr0 = vminq_f32(vr0, v_scale); vr1 = vminq_f32(vr1, v_scale); vr2 = vminq_f32(vr2, v_scale); vr3 = vminq_f32(vr3, v_scale); let zx0 = vcvtq_u32_f32(vr0); let zx1 = vcvtq_u32_f32(vr1); let zx2 = vcvtq_u32_f32(vr2); let zx3 = vcvtq_u32_f32(vr3); vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0); vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1); vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2); vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3); dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary1.0[0] as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary1.0[2] as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary1.0[4] as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[temporary2.0[0] as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[temporary2.0[2] as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[temporary2.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary3.0[0] as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary3.0[2] as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary3.0[4] as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } } } for (src, dst) in src_remainder .chunks_exact(src_channels) .zip(dst_remainder.chunks_exact_mut(dst_channels)) { let rp = lut_lin.get_unchecked(src[src_cn.r_i()]._as_usize()); let gp = lut_lin.get_unchecked(src[src_cn.g_i()]._as_usize()); let bp = lut_lin.get_unchecked(src[src_cn.b_i()]._as_usize()); let r = vld1q_dup_f32(rp); let g = vld1q_dup_f32(gp); let b = vld1q_dup_f32(bp); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let v0 = vmulq_f32(r, m0); let v1 = vfmaq_f32(v0, g, m1); let mut v = vfmaq_f32(v1, b, m2); v = vfmaq_f32(rnd, v, v_scale); v = vminq_f32(v, v_scale); let zx = vcvtq_u32_f32(v); vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx); dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } moxcms-0.7.7/src/conversions/neon/rgb_xyz_q1_30_opt.rs000064400000000000000000000436331046102023000210730ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut}; use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFpOptVec; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::aarch64::*; pub(crate) struct TransformShaperQ1_30NeonOpt { pub(crate) profile: TransformMatrixShaperFpOptVec, pub(crate) gamma_lut: usize, pub(crate) bit_depth: usize, } impl< T: Copy + PointeeSizeExpressible + 'static + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformShaperQ1_30NeonOpt where u32: AsPrimitive, { #[target_feature(enable = "rdm")] unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let max_colors: T = ((1 << self.bit_depth) - 1).as_(); let (src_chunks, src_remainder) = split_by_twos(src, src_channels); let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels); // safety precondition for linearization table if T::FINITE { assert!(self.profile.linear.len() >= (1 << self.bit_depth) - 1); } else { assert!(self.profile.linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } unsafe { let m0 = vld1q_s32([t.v[0][0], t.v[0][1], t.v[0][2], 0].as_ptr()); let m1 = vld1q_s32([t.v[1][0], t.v[1][1], t.v[1][2], 0].as_ptr()); let m2 = vld1q_s32([t.v[2][0], t.v[2][1], t.v[2][2], 0].as_ptr()); let lin_lut = &self.profile.linear; let v_max_value = vdup_n_u16((self.gamma_lut - 1) as u16); if !src_chunks.is_empty() { let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2); let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2); let mut src_iter0 = src0.chunks_exact(src_channels * 2); let mut src_iter1 = src1.chunks_exact(src_channels * 2); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); let (mut r2, mut g2, mut b2, mut a2); let (mut r3, mut g3, mut b3, mut a3); if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) { let r0p = lin_lut.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = lin_lut.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = lin_lut.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = lin_lut.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = lin_lut.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = lin_lut.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = lin_lut.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = lin_lut.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = lin_lut.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = lin_lut.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = lin_lut.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = lin_lut.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1q_dup_s32(r0p); g0 = vld1q_dup_s32(g0p); b0 = vld1q_dup_s32(b0p); r1 = vld1q_dup_s32(r1p); g1 = vld1q_dup_s32(g1p); b1 = vld1q_dup_s32(b1p); r2 = vld1q_dup_s32(r2p); g2 = vld1q_dup_s32(g2p); b2 = vld1q_dup_s32(b2p); r3 = vld1q_dup_s32(r3p); g3 = vld1q_dup_s32(g3p); b3 = vld1q_dup_s32(b3p); a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = vdupq_n_s32(0); g0 = vdupq_n_s32(0); b0 = vdupq_n_s32(0); r1 = vdupq_n_s32(0); g1 = vdupq_n_s32(0); b1 = vdupq_n_s32(0); r2 = vdupq_n_s32(0); g2 = vdupq_n_s32(0); b2 = vdupq_n_s32(0); r3 = vdupq_n_s32(0); g3 = vdupq_n_s32(0); b3 = vdupq_n_s32(0); a0 = max_colors; a1 = max_colors; a2 = max_colors; a3 = max_colors; } for (((src0, src1), dst0), dst1) in src_iter0 .zip(src_iter1) .zip(dst0.chunks_exact_mut(dst_channels * 2)) .zip(dst1.chunks_exact_mut(dst_channels * 2)) { let v0_0 = vqrdmulhq_s32(r0, m0); let v0_1 = vqrdmulhq_s32(r1, m0); let v0_2 = vqrdmulhq_s32(r2, m0); let v0_3 = vqrdmulhq_s32(r3, m0); let v1_0 = vqrdmlahq_s32(v0_0, g0, m1); let v1_1 = vqrdmlahq_s32(v0_1, g1, m1); let v1_2 = vqrdmlahq_s32(v0_2, g2, m1); let v1_3 = vqrdmlahq_s32(v0_3, g3, m1); let vr0 = vqrdmlahq_s32(v1_0, b0, m2); let vr1 = vqrdmlahq_s32(v1_1, b1, m2); let vr2 = vqrdmlahq_s32(v1_2, b2, m2); let vr3 = vqrdmlahq_s32(v1_3, b3, m2); let mut vr0 = vqmovun_s32(vr0); let mut vr1 = vqmovun_s32(vr1); let mut vr2 = vqmovun_s32(vr2); let mut vr3 = vqmovun_s32(vr3); vr0 = vmin_u16(vr0, v_max_value); vr1 = vmin_u16(vr1, v_max_value); vr2 = vmin_u16(vr2, v_max_value); vr3 = vmin_u16(vr3, v_max_value); let r0p = lin_lut.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = lin_lut.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = lin_lut.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = lin_lut.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = lin_lut.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = lin_lut.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = lin_lut.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = lin_lut.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = lin_lut.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = lin_lut.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = lin_lut.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = lin_lut.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1q_dup_s32(r0p); g0 = vld1q_dup_s32(g0p); b0 = vld1q_dup_s32(b0p); r1 = vld1q_dup_s32(r1p); g1 = vld1q_dup_s32(g1p); b1 = vld1q_dup_s32(b1p); r2 = vld1q_dup_s32(r2p); g2 = vld1q_dup_s32(g2p); b2 = vld1q_dup_s32(b2p); r3 = vld1q_dup_s32(r3p); g3 = vld1q_dup_s32(g3p); b3 = vld1q_dup_s32(b3p); dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<0>(vr1) as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<1>(vr1) as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<0>(vr3) as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<1>(vr3) as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<2>(vr3) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } if let (Some(dst0), Some(dst1)) = ( dst0.chunks_exact_mut(dst_channels * 2).last(), dst1.chunks_exact_mut(dst_channels * 2).last(), ) { let v0_0 = vqrdmulhq_s32(r0, m0); let v0_1 = vqrdmulhq_s32(r1, m0); let v0_2 = vqrdmulhq_s32(r2, m0); let v0_3 = vqrdmulhq_s32(r3, m0); let v1_0 = vqrdmlahq_s32(v0_0, g0, m1); let v1_1 = vqrdmlahq_s32(v0_1, g1, m1); let v1_2 = vqrdmlahq_s32(v0_2, g2, m1); let v1_3 = vqrdmlahq_s32(v0_3, g3, m1); let vr0 = vqrdmlahq_s32(v1_0, b0, m2); let vr1 = vqrdmlahq_s32(v1_1, b1, m2); let vr2 = vqrdmlahq_s32(v1_2, b2, m2); let vr3 = vqrdmlahq_s32(v1_3, b3, m2); let mut vr0 = vqmovun_s32(vr0); let mut vr1 = vqmovun_s32(vr1); let mut vr2 = vqmovun_s32(vr2); let mut vr3 = vqmovun_s32(vr3); vr0 = vmin_u16(vr0, v_max_value); vr1 = vmin_u16(vr1, v_max_value); vr2 = vmin_u16(vr2, v_max_value); vr3 = vmin_u16(vr3, v_max_value); dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<0>(vr1) as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<1>(vr1) as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<0>(vr3) as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<1>(vr3) as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<2>(vr3) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } } } for (src, dst) in src_remainder .chunks_exact(src_channels) .zip(dst_remainder.chunks_exact_mut(dst_channels)) { let rp = lin_lut.get_unchecked(src[src_cn.r_i()]._as_usize()); let gp = lin_lut.get_unchecked(src[src_cn.g_i()]._as_usize()); let bp = lin_lut.get_unchecked(src[src_cn.b_i()]._as_usize()); let r = vld1q_dup_s32(rp); let g = vld1q_dup_s32(gp); let b = vld1q_dup_s32(bp); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let v0 = vqrdmulhq_s32(r, m0); let v1 = vqrdmlahq_s32(v0, g, m1); let v = vqrdmlahq_s32(v1, b, m2); let mut vr0 = vqmovun_s32(v); vr0 = vmin_u16(vr0, v_max_value); dst[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize]; dst[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize]; dst[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Copy + PointeeSizeExpressible + 'static + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformShaperQ1_30NeonOpt where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { self.transform_impl(src, dst) } } } moxcms-0.7.7/src/conversions/neon/rgb_xyz_q2_13.rs000064400000000000000000000447101046102023000202100ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFp; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::aarch64::*; #[allow(dead_code)] #[inline] pub(crate) fn split_by_twos(data: &[T], channels: usize) -> (&[T], &[T]) { let len = data.len() / (channels * 4); let split_point = len * 4; data.split_at(split_point * channels) } #[allow(dead_code)] #[inline] pub(crate) fn split_by_twos_mut(data: &mut [T], channels: usize) -> (&mut [T], &mut [T]) { let len = data.len() / (channels * 4); let split_point = len * 4; data.split_at_mut(split_point * channels) } pub(crate) struct TransformShaperQ2_13Neon< T: Copy, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > { pub(crate) profile: TransformMatrixShaperFp, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Copy + PointeeSizeExpressible + 'static + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformExecutor for TransformShaperQ2_13Neon where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let max_colors: T = ((1 << self.bit_depth) - 1).as_(); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.r_linear.len() >= cap); assert!(self.profile.g_linear.len() >= cap); assert!(self.profile.b_linear.len() >= cap); } else { assert!(self.profile.r_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); assert!(self.profile.g_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); assert!(self.profile.b_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let r_lin = &self.profile.r_linear; let g_lin = &self.profile.g_linear; let b_lin = &self.profile.b_linear; let (src_chunks, src_remainder) = split_by_twos(src, src_channels); let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels); unsafe { let m0 = vld1_s16([t.v[0][0], t.v[0][1], t.v[0][2], 0].as_ptr()); let m1 = vld1_s16([t.v[1][0], t.v[1][1], t.v[1][2], 0].as_ptr()); let m2 = vld1_s16([t.v[2][0], t.v[2][1], t.v[2][2], 0].as_ptr()); let v_max_value = vdup_n_u16((self.gamma_lut - 1) as u16); let rnd = vdupq_n_s32(1 << (PRECISION - 1)); if !src_chunks.is_empty() { let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2); let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2); let mut src_iter0 = src0.chunks_exact(src_channels * 2); let mut src_iter1 = src1.chunks_exact(src_channels * 2); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); let (mut r2, mut g2, mut b2, mut a2); let (mut r3, mut g3, mut b3, mut a3); if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) { let r0p = r_lin.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = g_lin.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = b_lin.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = r_lin.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = g_lin.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = b_lin.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = r_lin.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = g_lin.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = b_lin.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = r_lin.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = g_lin.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = b_lin.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1_dup_s16(r0p); g0 = vld1_dup_s16(g0p); b0 = vld1_dup_s16(b0p); r1 = vld1_dup_s16(r1p); g1 = vld1_dup_s16(g1p); b1 = vld1_dup_s16(b1p); r2 = vld1_dup_s16(r2p); g2 = vld1_dup_s16(g2p); b2 = vld1_dup_s16(b2p); r3 = vld1_dup_s16(r3p); g3 = vld1_dup_s16(g3p); b3 = vld1_dup_s16(b3p); a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = vdup_n_s16(0); g0 = vdup_n_s16(0); b0 = vdup_n_s16(0); r1 = vdup_n_s16(0); g1 = vdup_n_s16(0); b1 = vdup_n_s16(0); r2 = vdup_n_s16(0); g2 = vdup_n_s16(0); b2 = vdup_n_s16(0); r3 = vdup_n_s16(0); g3 = vdup_n_s16(0); b3 = vdup_n_s16(0); a0 = max_colors; a1 = max_colors; a2 = max_colors; a3 = max_colors; } for (((src0, src1), dst0), dst1) in src_iter0 .zip(src_iter1) .zip(dst0.chunks_exact_mut(dst_channels * 2)) .zip(dst1.chunks_exact_mut(dst_channels * 2)) { let v0_0 = vmlal_s16(rnd, r0, m0); let v0_1 = vmlal_s16(rnd, r1, m0); let v0_2 = vmlal_s16(rnd, r2, m0); let v0_3 = vmlal_s16(rnd, r3, m0); let v1_0 = vmlal_s16(v0_0, g0, m1); let v1_1 = vmlal_s16(v0_1, g1, m1); let v1_2 = vmlal_s16(v0_2, g2, m1); let v1_3 = vmlal_s16(v0_3, g3, m1); let vr0 = vmlal_s16(v1_0, b0, m2); let vr1 = vmlal_s16(v1_1, b1, m2); let vr2 = vmlal_s16(v1_2, b2, m2); let vr3 = vmlal_s16(v1_3, b3, m2); let mut vr0 = vqshrun_n_s32::(vr0); let mut vr1 = vqshrun_n_s32::(vr1); let mut vr2 = vqshrun_n_s32::(vr2); let mut vr3 = vqshrun_n_s32::(vr3); vr0 = vmin_u16(vr0, v_max_value); vr1 = vmin_u16(vr1, v_max_value); vr2 = vmin_u16(vr2, v_max_value); vr3 = vmin_u16(vr3, v_max_value); let r0p = r_lin.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = g_lin.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = b_lin.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = r_lin.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = g_lin.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = b_lin.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = r_lin.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = g_lin.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = b_lin.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = r_lin.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = g_lin.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = b_lin.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1_dup_s16(r0p); g0 = vld1_dup_s16(g0p); b0 = vld1_dup_s16(b0p); r1 = vld1_dup_s16(r1p); g1 = vld1_dup_s16(g1p); b1 = vld1_dup_s16(b1p); r2 = vld1_dup_s16(r2p); g2 = vld1_dup_s16(g2p); b2 = vld1_dup_s16(b2p); r3 = vld1_dup_s16(r3p); g3 = vld1_dup_s16(g3p); b3 = vld1_dup_s16(b3p); dst0[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr0) as usize]; dst0[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr0) as usize]; dst0[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[vget_lane_u16::<0>(vr1) as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[vget_lane_u16::<1>(vr1) as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr2) as usize]; dst1[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr2) as usize]; dst1[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr2) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[vget_lane_u16::<0>(vr3) as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[vget_lane_u16::<1>(vr3) as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[vget_lane_u16::<2>(vr3) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } if let (Some(dst0), Some(dst1)) = ( dst0.chunks_exact_mut(dst_channels * 2).last(), dst1.chunks_exact_mut(dst_channels * 2).last(), ) { let v0_0 = vmlal_s16(rnd, r0, m0); let v0_1 = vmlal_s16(rnd, r1, m0); let v0_2 = vmlal_s16(rnd, r2, m0); let v0_3 = vmlal_s16(rnd, r3, m0); let v1_0 = vmlal_s16(v0_0, g0, m1); let v1_1 = vmlal_s16(v0_1, g1, m1); let v1_2 = vmlal_s16(v0_2, g2, m1); let v1_3 = vmlal_s16(v0_3, g3, m1); let vr0 = vmlal_s16(v1_0, b0, m2); let vr1 = vmlal_s16(v1_1, b1, m2); let vr2 = vmlal_s16(v1_2, b2, m2); let vr3 = vmlal_s16(v1_3, b3, m2); let mut vr0 = vqshrun_n_s32::(vr0); let mut vr1 = vqshrun_n_s32::(vr1); let mut vr2 = vqshrun_n_s32::(vr2); let mut vr3 = vqshrun_n_s32::(vr3); vr0 = vmin_u16(vr0, v_max_value); vr1 = vmin_u16(vr1, v_max_value); vr2 = vmin_u16(vr2, v_max_value); vr3 = vmin_u16(vr3, v_max_value); dst0[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr0) as usize]; dst0[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr0) as usize]; dst0[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[vget_lane_u16::<0>(vr1) as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[vget_lane_u16::<1>(vr1) as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr2) as usize]; dst1[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr2) as usize]; dst1[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr2) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[vget_lane_u16::<0>(vr3) as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[vget_lane_u16::<1>(vr3) as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[vget_lane_u16::<2>(vr3) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } } } for (src, dst) in src_remainder .chunks_exact(src_channels) .zip(dst_remainder.chunks_exact_mut(dst_channels)) { let rp = r_lin.get_unchecked(src[src_cn.r_i()]._as_usize()); let gp = g_lin.get_unchecked(src[src_cn.g_i()]._as_usize()); let bp = b_lin.get_unchecked(src[src_cn.b_i()]._as_usize()); let r = vld1_dup_s16(rp); let g = vld1_dup_s16(gp); let b = vld1_dup_s16(bp); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let v0 = vmlal_s16(rnd, r, m0); let v1 = vmlal_s16(v0, g, m1); let v = vmlal_s16(v1, b, m2); let mut vr0 = vqshrun_n_s32::(v); vr0 = vmin_u16(vr0, v_max_value); dst[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr0) as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr0) as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } moxcms-0.7.7/src/conversions/neon/rgb_xyz_q2_13_opt.rs000064400000000000000000000433221046102023000210700ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut}; use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFpOptVec; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::aarch64::*; pub(crate) struct TransformShaperQ2_13NeonOpt< T: Copy, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > { pub(crate) profile: TransformMatrixShaperFpOptVec, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Copy + PointeeSizeExpressible + 'static + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformExecutor for TransformShaperQ2_13NeonOpt where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let max_colors: T = ((1 << self.bit_depth) - 1).as_(); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.linear.len() >= cap); } else { assert!(self.profile.linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let lut_lin = &self.profile.linear; let (src_chunks, src_remainder) = split_by_twos(src, src_channels); let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels); unsafe { let m0 = vld1_s16([t.v[0][0], t.v[0][1], t.v[0][2], 0].as_ptr()); let m1 = vld1_s16([t.v[1][0], t.v[1][1], t.v[1][2], 0].as_ptr()); let m2 = vld1_s16([t.v[2][0], t.v[2][1], t.v[2][2], 0].as_ptr()); let v_max_value = vdup_n_u16((self.gamma_lut - 1) as u16); let rnd = vdupq_n_s32(1 << (PRECISION - 1)); if !src_chunks.is_empty() { let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2); let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2); let mut src_iter0 = src0.chunks_exact(src_channels * 2); let mut src_iter1 = src1.chunks_exact(src_channels * 2); let (mut r0, mut g0, mut b0, mut a0); let (mut r1, mut g1, mut b1, mut a1); let (mut r2, mut g2, mut b2, mut a2); let (mut r3, mut g3, mut b3, mut a3); if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) { let r0p = lut_lin.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = lut_lin.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = lut_lin.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = lut_lin.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = lut_lin.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = lut_lin.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = lut_lin.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = lut_lin.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = lut_lin.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = lut_lin.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = lut_lin.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = lut_lin.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1_dup_s16(r0p); g0 = vld1_dup_s16(g0p); b0 = vld1_dup_s16(b0p); r1 = vld1_dup_s16(r1p); g1 = vld1_dup_s16(g1p); b1 = vld1_dup_s16(b1p); r2 = vld1_dup_s16(r2p); g2 = vld1_dup_s16(g2p); b2 = vld1_dup_s16(b2p); r3 = vld1_dup_s16(r3p); g3 = vld1_dup_s16(g3p); b3 = vld1_dup_s16(b3p); a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } else { r0 = vdup_n_s16(0); g0 = vdup_n_s16(0); b0 = vdup_n_s16(0); r1 = vdup_n_s16(0); g1 = vdup_n_s16(0); b1 = vdup_n_s16(0); r2 = vdup_n_s16(0); g2 = vdup_n_s16(0); b2 = vdup_n_s16(0); r3 = vdup_n_s16(0); g3 = vdup_n_s16(0); b3 = vdup_n_s16(0); a0 = max_colors; a1 = max_colors; a2 = max_colors; a3 = max_colors; } for (((src0, src1), dst0), dst1) in src_iter0 .zip(src_iter1) .zip(dst0.chunks_exact_mut(dst_channels * 2)) .zip(dst1.chunks_exact_mut(dst_channels * 2)) { let v0_0 = vmlal_s16(rnd, r0, m0); let v0_1 = vmlal_s16(rnd, r1, m0); let v0_2 = vmlal_s16(rnd, r2, m0); let v0_3 = vmlal_s16(rnd, r3, m0); let v1_0 = vmlal_s16(v0_0, g0, m1); let v1_1 = vmlal_s16(v0_1, g1, m1); let v1_2 = vmlal_s16(v0_2, g2, m1); let v1_3 = vmlal_s16(v0_3, g3, m1); let vr0 = vmlal_s16(v1_0, b0, m2); let vr1 = vmlal_s16(v1_1, b1, m2); let vr2 = vmlal_s16(v1_2, b2, m2); let vr3 = vmlal_s16(v1_3, b3, m2); let mut vr0 = vqshrun_n_s32::(vr0); let mut vr1 = vqshrun_n_s32::(vr1); let mut vr2 = vqshrun_n_s32::(vr2); let mut vr3 = vqshrun_n_s32::(vr3); vr0 = vmin_u16(vr0, v_max_value); vr1 = vmin_u16(vr1, v_max_value); vr2 = vmin_u16(vr2, v_max_value); vr3 = vmin_u16(vr3, v_max_value); let r0p = lut_lin.get_unchecked(src0[src_cn.r_i()]._as_usize()); let g0p = lut_lin.get_unchecked(src0[src_cn.g_i()]._as_usize()); let b0p = lut_lin.get_unchecked(src0[src_cn.b_i()]._as_usize()); let r1p = lut_lin.get_unchecked(src0[src_cn.r_i() + src_channels]._as_usize()); let g1p = lut_lin.get_unchecked(src0[src_cn.g_i() + src_channels]._as_usize()); let b1p = lut_lin.get_unchecked(src0[src_cn.b_i() + src_channels]._as_usize()); let r2p = lut_lin.get_unchecked(src1[src_cn.r_i()]._as_usize()); let g2p = lut_lin.get_unchecked(src1[src_cn.g_i()]._as_usize()); let b2p = lut_lin.get_unchecked(src1[src_cn.b_i()]._as_usize()); let r3p = lut_lin.get_unchecked(src1[src_cn.r_i() + src_channels]._as_usize()); let g3p = lut_lin.get_unchecked(src1[src_cn.g_i() + src_channels]._as_usize()); let b3p = lut_lin.get_unchecked(src1[src_cn.b_i() + src_channels]._as_usize()); r0 = vld1_dup_s16(r0p); g0 = vld1_dup_s16(g0p); b0 = vld1_dup_s16(b0p); r1 = vld1_dup_s16(r1p); g1 = vld1_dup_s16(g1p); b1 = vld1_dup_s16(b1p); r2 = vld1_dup_s16(r2p); g2 = vld1_dup_s16(g2p); b2 = vld1_dup_s16(b2p); r3 = vld1_dup_s16(r3p); g3 = vld1_dup_s16(g3p); b3 = vld1_dup_s16(b3p); dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<0>(vr1) as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<1>(vr1) as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<0>(vr3) as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<1>(vr3) as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<2>(vr3) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } a0 = if src_channels == 4 { src0[src_cn.a_i()] } else { max_colors }; a1 = if src_channels == 4 { src0[src_cn.a_i() + src_channels] } else { max_colors }; a2 = if src_channels == 4 { src1[src_cn.a_i()] } else { max_colors }; a3 = if src_channels == 4 { src1[src_cn.a_i() + src_channels] } else { max_colors }; } if let (Some(dst0), Some(dst1)) = ( dst0.chunks_exact_mut(dst_channels * 2).last(), dst1.chunks_exact_mut(dst_channels * 2).last(), ) { let v0_0 = vmlal_s16(rnd, r0, m0); let v0_1 = vmlal_s16(rnd, r1, m0); let v0_2 = vmlal_s16(rnd, r2, m0); let v0_3 = vmlal_s16(rnd, r3, m0); let v1_0 = vmlal_s16(v0_0, g0, m1); let v1_1 = vmlal_s16(v0_1, g1, m1); let v1_2 = vmlal_s16(v0_2, g2, m1); let v1_3 = vmlal_s16(v0_3, g3, m1); let vr0 = vmlal_s16(v1_0, b0, m2); let vr1 = vmlal_s16(v1_1, b1, m2); let vr2 = vmlal_s16(v1_2, b2, m2); let vr3 = vmlal_s16(v1_3, b3, m2); let mut vr0 = vqshrun_n_s32::(vr0); let mut vr1 = vqshrun_n_s32::(vr1); let mut vr2 = vqshrun_n_s32::(vr2); let mut vr3 = vqshrun_n_s32::(vr3); vr0 = vmin_u16(vr0, v_max_value); vr1 = vmin_u16(vr1, v_max_value); vr2 = vmin_u16(vr2, v_max_value); vr3 = vmin_u16(vr3, v_max_value); dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize]; dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize]; dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i()] = a0; } dst0[dst_cn.r_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<0>(vr1) as usize]; dst0[dst_cn.g_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<1>(vr1) as usize]; dst0[dst_cn.b_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst0[dst_cn.a_i() + dst_channels] = a1; } dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize]; dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize]; dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i()] = a2; } dst1[dst_cn.r_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<0>(vr3) as usize]; dst1[dst_cn.g_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<1>(vr3) as usize]; dst1[dst_cn.b_i() + dst_channels] = self.profile.gamma[vget_lane_u16::<2>(vr3) as usize]; if dst_channels == 4 { dst1[dst_cn.a_i() + dst_channels] = a3; } } } for (src, dst) in src_remainder .chunks_exact(src_channels) .zip(dst_remainder.chunks_exact_mut(dst_channels)) { let rp = lut_lin.get_unchecked(src[src_cn.r_i()]._as_usize()); let gp = lut_lin.get_unchecked(src[src_cn.g_i()]._as_usize()); let bp = lut_lin.get_unchecked(src[src_cn.b_i()]._as_usize()); let r = vld1_dup_s16(rp); let g = vld1_dup_s16(gp); let b = vld1_dup_s16(bp); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let v0 = vmlal_s16(rnd, r, m0); let v1 = vmlal_s16(v0, g, m1); let v = vmlal_s16(v1, b, m2); let mut vr0 = vqshrun_n_s32::(v); vr0 = vmin_u16(vr0, v_max_value); dst[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize]; dst[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize]; dst[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } moxcms-0.7.7/src/conversions/neon/t_lut3_to_3.rs000064400000000000000000000301601046102023000177470ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::lut_transforms::Lut3x3Factory; use crate::conversions::neon::interpolator::*; use crate::conversions::neon::interpolator_q0_15::NeonAlignedI16x4; use crate::conversions::neon::rgb_xyz::NeonAlignedF32; use crate::conversions::neon::t_lut3_to_3_q0_15::TransformLut3x3NeonQ0_15; use crate::transform::PointeeSizeExpressible; use crate::{ BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; use std::arch::aarch64::*; use std::marker::PhantomData; struct TransformLut3x3Neon< T, U, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { lut: Vec, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: InterpolationMethod, weights: Box<[BarycentricWeight; BINS]>, color_space: DataColorSpace, is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut3x3Neon where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[inline(never)] fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { unsafe { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); let value_scale = vdupq_n_f32(((1 << BIT_DEPTH) - 1) as f32); let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let x = <() as LutBarycentricReduction>::reduce::( src[src_cn.r_i()], ); let y = <() as LutBarycentricReduction>::reduce::( src[src_cn.g_i()], ); let z = <() as LutBarycentricReduction>::reduce::( src[src_cn.b_i()], ); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_value }; let v = interpolator.inter3_neon( &self.lut, x.as_(), y.as_(), z.as_(), self.weights.as_slice(), ); if T::FINITE { let mut r = vfmaq_f32(vdupq_n_f32(0.5f32), v.v, value_scale); r = vminq_f32(r, value_scale); let jvx = vcvtaq_u32_f32(r); dst[dst_cn.r_i()] = vgetq_lane_u32::<0>(jvx).as_(); dst[dst_cn.g_i()] = vgetq_lane_u32::<1>(jvx).as_(); dst[dst_cn.b_i()] = vgetq_lane_u32::<2>(jvx).as_(); } else { dst[dst_cn.r_i()] = vgetq_lane_f32::<0>(v.v).as_(); dst[dst_cn.g_i()] = vgetq_lane_f32::<1>(v.v).as_(); dst[dst_cn.b_i()] = vgetq_lane_f32::<2>(v.v).as_(); } if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut3x3Neon< T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS, > where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / src_channels; let dst_chunks = dst.len() / dst_channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearNeon:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk(src, dst, Box::new(TetrahedralNeon:: {})); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk(src, dst, Box::new(PyramidalNeon:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk(src, dst, Box::new(PrismaticNeon:: {})); } InterpolationMethod::Linear => { self.transform_chunk(src, dst, Box::new(TrilinearNeon:: {})); } } } Ok(()) } } pub(crate) struct NeonLut3x3Factory {} impl Lut3x3Factory for NeonLut3x3Factory { fn make_transform_3x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Send + Sync> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { if options.prefer_fixed_point && BIT_DEPTH < 16 && std::arch::is_aarch64_feature_detected!("rdm") { let q: f32 = if T::FINITE { ((1i32 << BIT_DEPTH as i32) - 1) as f32 } else { ((1i32 << 14i32) - 1) as f32 }; let lut = lut .chunks_exact(3) .map(|x| { NeonAlignedI16x4([ (x[0] * q).round() as i16, (x[1] * q).round() as i16, (x[2] * q).round() as i16, 0, ]) }) .collect::>(); return match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut3x3NeonQ0_15::< T, u8, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut3x3NeonQ0_15::< T, u16, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), }; } let lut = lut .chunks_exact(3) .map(|x| NeonAlignedF32([x[0], x[1], x[2], 0f32])) .collect::>(); match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut3x3Neon::< T, u8, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut3x3Neon::< T, u16, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), } } } moxcms-0.7.7/src/conversions/neon/t_lut3_to_3_q0_15.rs000064400000000000000000000212221046102023000206530ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::neon::interpolator_q0_15::*; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor}; use num_traits::AsPrimitive; use std::arch::aarch64::*; use std::marker::PhantomData; pub(crate) struct TransformLut3x3NeonQ0_15< T, U, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { pub(crate) lut: Vec, pub(crate) _phantom: PhantomData, pub(crate) _phantom1: PhantomData, pub(crate) interpolation_method: InterpolationMethod, pub(crate) weights: Box<[BarycentricWeight; BINS]>, pub(crate) color_space: DataColorSpace, pub(crate) is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut3x3NeonQ0_15< T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS, > where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[inline(never)] #[target_feature(enable = "rdm")] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { unsafe { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); let f_value_scale = vdupq_n_f32(1. / ((1 << 14i32) - 1) as f32); let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); let v_max_scale = if T::FINITE { vdup_n_s16(((1i32 << BIT_DEPTH) - 1) as i16) } else { vdup_n_s16(((1i32 << 14i32) - 1) as i16) }; for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let x = <() as LutBarycentricReduction>::reduce::( src[src_cn.r_i()], ); let y = <() as LutBarycentricReduction>::reduce::( src[src_cn.g_i()], ); let z = <() as LutBarycentricReduction>::reduce::( src[src_cn.b_i()], ); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_value }; let v = interpolator.inter3_neon( &self.lut, x.as_(), y.as_(), z.as_(), self.weights.as_slice(), ); if T::FINITE { let mut o = vmax_s16(v.v, vdup_n_s16(0)); o = vmin_s16(o, v_max_scale); dst[dst_cn.r_i()] = (vget_lane_s16::<0>(o) as u32).as_(); dst[dst_cn.g_i()] = (vget_lane_s16::<1>(o) as u32).as_(); dst[dst_cn.b_i()] = (vget_lane_s16::<2>(o) as u32).as_(); } else { let o = vcvtq_f32_s32(vmovl_s16(v.v)); let r = vmulq_f32(o, f_value_scale); dst[dst_cn.r_i()] = vgetq_lane_f32::<0>(r).as_(); dst[dst_cn.g_i()] = vgetq_lane_f32::<1>(r).as_(); dst[dst_cn.b_i()] = vgetq_lane_f32::<2>(r).as_(); } if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut3x3NeonQ0_15< T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS, > where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / src_channels; let dst_chunks = dst.len() / dst_channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearNeonQ0_15:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk( src, dst, Box::new(TetrahedralNeonQ0_15:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk( src, dst, Box::new(PyramidalNeonQ0_15:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk( src, dst, Box::new(PrismaticNeonQ0_15:: {}), ); } InterpolationMethod::Linear => { self.transform_chunk( src, dst, Box::new(TrilinearNeonQ0_15:: {}), ); } } } } Ok(()) } } moxcms-0.7.7/src/conversions/prelude_lut_xyz_rgb.rs000064400000000000000000000274401046102023000207510ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::lut3x4::create_lut3_samples; use crate::err::try_vec; use crate::mlaf::mlaf; use crate::trc::ToneCurveEvaluator; use crate::{ CmsError, ColorProfile, GammaLutInterpolate, InPlaceStage, Matrix3f, PointeeSizeExpressible, RenderingIntent, Rgb, TransformOptions, filmlike_clip, }; use num_traits::AsPrimitive; use std::marker::PhantomData; pub(crate) struct XyzToRgbStage { pub(crate) r_gamma: Box<[T; 65536]>, pub(crate) g_gamma: Box<[T; 65536]>, pub(crate) b_gamma: Box<[T; 65536]>, pub(crate) matrices: Vec, pub(crate) intent: RenderingIntent, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl> InPlaceStage for XyzToRgbStage { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { assert!(self.bit_depth > 0); if !self.matrices.is_empty() { let m = self.matrices[0]; for dst in dst.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]); } } for m in self.matrices.iter().skip(1) { for dst in dst.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]); } } let max_colors = (1 << self.bit_depth) - 1; let color_scale = 1f32 / max_colors as f32; let lut_cap = (self.gamma_lut - 1) as f32; if self.intent != RenderingIntent::AbsoluteColorimetric { for dst in dst.chunks_exact_mut(3) { let mut rgb = Rgb::new(dst[0], dst[1], dst[2]); if rgb.is_out_of_gamut() { rgb = filmlike_clip(rgb); } let r = mlaf(0.5f32, rgb.r, lut_cap).min(lut_cap).max(0f32) as u16; let g = mlaf(0.5f32, rgb.g, lut_cap).min(lut_cap).max(0f32) as u16; let b = mlaf(0.5f32, rgb.b, lut_cap).min(lut_cap).max(0f32) as u16; dst[0] = self.r_gamma[r as usize].as_() * color_scale; dst[1] = self.g_gamma[g as usize].as_() * color_scale; dst[2] = self.b_gamma[b as usize].as_() * color_scale; } } else { for dst in dst.chunks_exact_mut(3) { let rgb = Rgb::new(dst[0], dst[1], dst[2]); let r = mlaf(0.5f32, rgb.r, lut_cap).min(lut_cap).max(0f32) as u16; let g = mlaf(0.5f32, rgb.g, lut_cap).min(lut_cap).max(0f32) as u16; let b = mlaf(0.5f32, rgb.b, lut_cap).min(lut_cap).max(0f32) as u16; dst[0] = self.r_gamma[r as usize].as_() * color_scale; dst[1] = self.g_gamma[g as usize].as_() * color_scale; dst[2] = self.b_gamma[b as usize].as_() * color_scale; } } Ok(()) } } pub(crate) struct XyzToRgbStageExtended { pub(crate) gamma_evaluator: Box, pub(crate) matrices: Vec, pub(crate) phantom_data: PhantomData, } impl> InPlaceStage for XyzToRgbStageExtended { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { if !self.matrices.is_empty() { let m = self.matrices[0]; for dst in dst.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]); } } for m in self.matrices.iter().skip(1) { for dst in dst.chunks_exact_mut(3) { let x = dst[0]; let y = dst[1]; let z = dst[2]; dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]); dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]); dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]); } } for dst in dst.chunks_exact_mut(3) { let mut rgb = Rgb::new(dst[0], dst[1], dst[2]); rgb = self.gamma_evaluator.evaluate_tristimulus(rgb); dst[0] = rgb.r.as_(); dst[1] = rgb.g.as_(); dst[2] = rgb.b.as_(); } Ok(()) } } struct RgbLinearizationStage { r_lin: Box<[f32; LINEAR_CAP]>, g_lin: Box<[f32; LINEAR_CAP]>, b_lin: Box<[f32; LINEAR_CAP]>, _phantom: PhantomData, bit_depth: usize, } impl< T: Clone + AsPrimitive + PointeeSizeExpressible, const LINEAR_CAP: usize, const SAMPLES: usize, > RgbLinearizationStage { fn transform(&self, src: &[T], dst: &mut [f32]) -> Result<(), CmsError> { if src.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % 3 != 0 { return Err(CmsError::LaneMultipleOfChannels); } let scale = if T::FINITE { ((1 << self.bit_depth) - 1) as f32 / (SAMPLES as f32 - 1f32) } else { (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 / (SAMPLES as f32 - 1f32) }; let capped_value = if T::FINITE { (1 << self.bit_depth) - 1 } else { T::NOT_FINITE_LINEAR_TABLE_SIZE - 1 }; for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) { let j_r = src[0].as_() as f32 * scale; let j_g = src[1].as_() as f32 * scale; let j_b = src[2].as_() as f32 * scale; dst[0] = self.r_lin[(j_r.round().max(0.0).min(capped_value as f32) as u16) as usize]; dst[1] = self.g_lin[(j_g.round().max(0.0).min(capped_value as f32) as u16) as usize]; dst[2] = self.b_lin[(j_b.round().max(0.0).min(capped_value as f32) as u16) as usize]; } Ok(()) } } pub(crate) fn create_rgb_lin_lut< T: Copy + Default + AsPrimitive + Send + Sync + AsPrimitive + PointeeSizeExpressible, const BIT_DEPTH: usize, const LINEAR_CAP: usize, const GRID_SIZE: usize, >( source: &ColorProfile, opts: TransformOptions, ) -> Result, CmsError> where u32: AsPrimitive, f32: AsPrimitive, { let lut_origins = create_lut3_samples::(); let lin_r = source.build_r_linearize_table::(opts.allow_use_cicp_transfer)?; let lin_g = source.build_g_linearize_table::(opts.allow_use_cicp_transfer)?; let lin_b = source.build_b_linearize_table::(opts.allow_use_cicp_transfer)?; let lin_stage = RgbLinearizationStage:: { r_lin: lin_r, g_lin: lin_g, b_lin: lin_b, _phantom: PhantomData, bit_depth: BIT_DEPTH, }; let mut lut = try_vec![0f32; lut_origins.len()]; lin_stage.transform(&lut_origins, &mut lut)?; let xyz_to_rgb = source.rgb_to_xyz_matrix(); let matrices = vec![ xyz_to_rgb.to_f32(), Matrix3f { v: [ [32768.0 / 65535.0, 0.0, 0.0], [0.0, 32768.0 / 65535.0, 0.0], [0.0, 0.0, 32768.0 / 65535.0], ], }, ]; let matrix_stage = crate::conversions::lut_transforms::MatrixStage { matrices }; matrix_stage.transform(&mut lut)?; Ok(lut) } pub(crate) fn prepare_inverse_lut_rgb_xyz< T: Copy + Default + AsPrimitive + Send + Sync + AsPrimitive + PointeeSizeExpressible + GammaLutInterpolate, const BIT_DEPTH: usize, const GAMMA_LUT: usize, >( dest: &ColorProfile, lut: &mut [f32], options: TransformOptions, ) -> Result<(), CmsError> where f32: AsPrimitive, u32: AsPrimitive, { if !T::FINITE { if let Some(extended_gamma) = dest.try_extended_gamma_evaluator() { let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse(); let mut matrices = vec![Matrix3f { v: [ [65535.0 / 32768.0, 0.0, 0.0], [0.0, 65535.0 / 32768.0, 0.0], [0.0, 0.0, 65535.0 / 32768.0], ], }]; matrices.push(xyz_to_rgb.to_f32()); let xyz_to_rgb_stage = XyzToRgbStageExtended:: { gamma_evaluator: extended_gamma, matrices, phantom_data: PhantomData, }; xyz_to_rgb_stage.transform(lut)?; return Ok(()); } } let gamma_map_r = dest.build_gamma_table::( &dest.red_trc, options.allow_use_cicp_transfer, )?; let gamma_map_g = dest.build_gamma_table::( &dest.green_trc, options.allow_use_cicp_transfer, )?; let gamma_map_b = dest.build_gamma_table::( &dest.blue_trc, options.allow_use_cicp_transfer, )?; let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse(); let mut matrices = vec![Matrix3f { v: [ [65535.0 / 32768.0, 0.0, 0.0], [0.0, 65535.0 / 32768.0, 0.0], [0.0, 0.0, 65535.0 / 32768.0], ], }]; matrices.push(xyz_to_rgb.to_f32()); let xyz_to_rgb_stage = XyzToRgbStage:: { r_gamma: gamma_map_r, g_gamma: gamma_map_g, b_gamma: gamma_map_b, matrices, intent: options.rendering_intent, gamma_lut: GAMMA_LUT, bit_depth: BIT_DEPTH, }; xyz_to_rgb_stage.transform(lut)?; Ok(()) } moxcms-0.7.7/src/conversions/rgb2gray.rs000064400000000000000000000146141046102023000163770ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::mlaf::mlaf; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor, Vector3f}; use num_traits::AsPrimitive; #[derive(Clone)] pub(crate) struct ToneReproductionRgbToGray { pub(crate) r_linear: Box<[f32; BUCKET]>, pub(crate) g_linear: Box<[f32; BUCKET]>, pub(crate) b_linear: Box<[f32; BUCKET]>, pub(crate) gray_gamma: Box<[T; 65536]>, } #[derive(Clone)] struct TransformRgbToGrayExecutor< T, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const BUCKET: usize, > { trc_box: ToneReproductionRgbToGray, weights: Vector3f, bit_depth: usize, gamma_lut: usize, } pub(crate) fn make_rgb_to_gray< T: Copy + Default + PointeeSizeExpressible + Send + Sync + 'static, const BUCKET: usize, >( src_layout: Layout, dst_layout: Layout, trc: ToneReproductionRgbToGray, weights: Vector3f, gamma_lut: usize, bit_depth: usize, ) -> Box + Send + Sync> where u32: AsPrimitive, { match src_layout { Layout::Rgb => match dst_layout { Layout::Rgb => unreachable!(), Layout::Rgba => unreachable!(), Layout::Gray => Box::new(TransformRgbToGrayExecutor::< T, { Layout::Rgb as u8 }, { Layout::Gray as u8 }, BUCKET, > { trc_box: trc, weights, bit_depth, gamma_lut, }), Layout::GrayAlpha => Box::new(TransformRgbToGrayExecutor::< T, { Layout::Rgb as u8 }, { Layout::GrayAlpha as u8 }, BUCKET, > { trc_box: trc, weights, bit_depth, gamma_lut, }), _ => unreachable!(), }, Layout::Rgba => match dst_layout { Layout::Rgb => unreachable!(), Layout::Rgba => unreachable!(), Layout::Gray => Box::new(TransformRgbToGrayExecutor::< T, { Layout::Rgba as u8 }, { Layout::Gray as u8 }, BUCKET, > { trc_box: trc, weights, bit_depth, gamma_lut, }), Layout::GrayAlpha => Box::new(TransformRgbToGrayExecutor::< T, { Layout::Rgba as u8 }, { Layout::GrayAlpha as u8 }, BUCKET, > { trc_box: trc, weights, bit_depth, gamma_lut, }), _ => unreachable!(), }, Layout::Gray => unreachable!(), Layout::GrayAlpha => unreachable!(), _ => unreachable!(), } } impl< T: Copy + Default + PointeeSizeExpressible + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const BUCKET: usize, > TransformExecutor for TransformRgbToGrayExecutor where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let scale_value = (self.gamma_lut - 1) as f32; let max_value = ((1u32 << self.bit_depth) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = self.trc_box.r_linear[src[src_cn.r_i()]._as_usize()]; let g = self.trc_box.g_linear[src[src_cn.g_i()]._as_usize()]; let b = self.trc_box.b_linear[src[src_cn.b_i()]._as_usize()]; let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_value }; let grey = mlaf( 0.5, mlaf( mlaf(self.weights.v[0] * r, self.weights.v[1], g), self.weights.v[2], b, ) .min(1.) .max(0.), scale_value, ); dst[0] = self.trc_box.gray_gamma[(grey as u16) as usize]; if dst_channels == 2 { dst[1] = a; } } Ok(()) } } moxcms-0.7.7/src/conversions/rgb2gray_extended.rs000064400000000000000000000147241046102023000202610ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::mlaf::mlaf; use crate::transform::PointeeSizeExpressible; use crate::trc::ToneCurveEvaluator; use crate::{CmsError, Layout, Rgb, TransformExecutor, Vector3f}; use num_traits::AsPrimitive; use std::marker::PhantomData; struct TransformRgbToGrayExtendedExecutor { linear_eval: Box, gamma_eval: Box, weights: Vector3f, _phantom: PhantomData, bit_depth: usize, } pub(crate) fn make_rgb_to_gray_extended< T: Copy + Default + PointeeSizeExpressible + Send + Sync + 'static + AsPrimitive, >( src_layout: Layout, dst_layout: Layout, linear_eval: Box, gamma_eval: Box, weights: Vector3f, bit_depth: usize, ) -> Box + Send + Sync> where u32: AsPrimitive, f32: AsPrimitive, { match src_layout { Layout::Rgb => match dst_layout { Layout::Rgb => unreachable!(), Layout::Rgba => unreachable!(), Layout::Gray => Box::new(TransformRgbToGrayExtendedExecutor::< T, { Layout::Rgb as u8 }, { Layout::Gray as u8 }, > { linear_eval, gamma_eval, weights, _phantom: PhantomData, bit_depth, }), Layout::GrayAlpha => Box::new(TransformRgbToGrayExtendedExecutor::< T, { Layout::Rgb as u8 }, { Layout::GrayAlpha as u8 }, > { linear_eval, gamma_eval, weights, _phantom: PhantomData, bit_depth, }), _ => unreachable!(), }, Layout::Rgba => match dst_layout { Layout::Rgb => unreachable!(), Layout::Rgba => unreachable!(), Layout::Gray => Box::new(TransformRgbToGrayExtendedExecutor::< T, { Layout::Rgba as u8 }, { Layout::Gray as u8 }, > { linear_eval, gamma_eval, weights, _phantom: PhantomData, bit_depth, }), Layout::GrayAlpha => Box::new(TransformRgbToGrayExtendedExecutor::< T, { Layout::Rgba as u8 }, { Layout::GrayAlpha as u8 }, > { linear_eval, gamma_eval, weights, _phantom: PhantomData, bit_depth, }), _ => unreachable!(), }, Layout::Gray => unreachable!(), Layout::GrayAlpha => unreachable!(), _ => unreachable!(), } } impl< T: Copy + Default + PointeeSizeExpressible + 'static + AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformRgbToGrayExtendedExecutor where u32: AsPrimitive, f32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let max_value = ((1u32 << self.bit_depth) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let in_tristimulus = Rgb::::new( src[src_cn.r_i()].as_(), src[src_cn.g_i()].as_(), src[src_cn.b_i()].as_(), ); let lin_tristimulus = self.linear_eval.evaluate_tristimulus(in_tristimulus); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_value }; let grey = mlaf( mlaf( self.weights.v[0] * lin_tristimulus.r, self.weights.v[1], lin_tristimulus.g, ), self.weights.v[2], lin_tristimulus.b, ) .min(1.) .max(0.); let gamma_value = self.gamma_eval.evaluate_value(grey); dst[0] = gamma_value.as_(); if dst_channels == 2 { dst[1] = a; } } Ok(()) } } moxcms-0.7.7/src/conversions/rgb_xyz_factory.rs000064400000000000000000000420221046102023000200650ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::TransformMatrixShaper; use crate::conversions::rgbxyz::{ TransformMatrixShaperOptimized, make_rgb_xyz_rgb_transform, make_rgb_xyz_rgb_transform_opt, }; use crate::conversions::rgbxyz_fixed::{make_rgb_xyz_q2_13, make_rgb_xyz_q2_13_opt}; use crate::{CmsError, Layout, TransformExecutor, TransformOptions}; use num_traits::AsPrimitive; const FIXED_POINT_SCALE: i32 = 13; // Q2.13; pub(crate) trait RgbXyzFactory + Default> { fn make_transform( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaper, transform_options: TransformOptions, ) -> Result + Send + Sync>, CmsError>; } pub(crate) trait RgbXyzFactoryOpt + Default> { fn make_optimized_transform< const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize, >( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaperOptimized, transform_options: TransformOptions, ) -> Result + Send + Sync>, CmsError>; } impl RgbXyzFactory for u16 { fn make_transform( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaper, transform_options: TransformOptions, ) -> Result + Send + Sync>, CmsError> { if BIT_DEPTH < 16 && transform_options.prefer_fixed_point { #[cfg(all(target_arch = "x86_64", feature = "avx"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2; if std::arch::is_x86_feature_detected!("avx2") { return make_rgb_xyz_q2_13_transform_avx2::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ); } } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41; if std::arch::is_x86_feature_detected!("sse4.1") { return make_rgb_xyz_q2_13_transform_sse_41::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ); } } #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] { return make_rgb_xyz_q2_13::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ); } } make_rgb_xyz_rgb_transform::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ) } } impl RgbXyzFactory for f32 { fn make_transform( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaper, transform_options: TransformOptions, ) -> Result + Send + Sync>, CmsError> { if transform_options.prefer_fixed_point { #[cfg(all(target_arch = "x86_64", feature = "avx"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2; if std::arch::is_x86_feature_detected!("avx2") { return make_rgb_xyz_q2_13_transform_avx2::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ); } } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41; if std::arch::is_x86_feature_detected!("sse4.1") { return make_rgb_xyz_q2_13_transform_sse_41::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ); } } #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] { return make_rgb_xyz_q2_13::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ); } } make_rgb_xyz_rgb_transform::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ) } } impl RgbXyzFactory for f64 { fn make_transform( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaper, _: TransformOptions, ) -> Result + Send + Sync>, CmsError> { make_rgb_xyz_rgb_transform::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ) } } impl RgbXyzFactory for u8 { fn make_transform( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaper, transform_options: TransformOptions, ) -> Result + Send + Sync>, CmsError> { if transform_options.prefer_fixed_point { #[cfg(all(target_arch = "x86_64", feature = "avx"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2; if std::arch::is_x86_feature_detected!("avx2") { return make_rgb_xyz_q2_13_transform_avx2::( src_layout, dst_layout, profile, GAMMA_LUT, 8, ); } } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41; if std::arch::is_x86_feature_detected!("sse4.1") { return make_rgb_xyz_q2_13_transform_sse_41::( src_layout, dst_layout, profile, GAMMA_LUT, 8, ); } } make_rgb_xyz_q2_13::( src_layout, dst_layout, profile, GAMMA_LUT, 8, ) } else { make_rgb_xyz_rgb_transform::( src_layout, dst_layout, profile, GAMMA_LUT, 8, ) } } } // Optimized factories impl RgbXyzFactoryOpt for u16 { fn make_optimized_transform< const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize, >( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaperOptimized, transform_options: TransformOptions, ) -> Result + Send + Sync>, CmsError> { if BIT_DEPTH >= 12 && transform_options.prefer_fixed_point { #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] { if std::arch::is_aarch64_feature_detected!("rdm") { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q1_30_opt; return make_rgb_xyz_q1_30_opt::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ); } } } if BIT_DEPTH < 16 && transform_options.prefer_fixed_point { #[cfg(all(target_arch = "x86_64", feature = "avx"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2_opt; if std::arch::is_x86_feature_detected!("avx2") { return make_rgb_xyz_q2_13_transform_avx2_opt::< u16, LINEAR_CAP, FIXED_POINT_SCALE, >( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH ); } } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41_opt; if std::arch::is_x86_feature_detected!("sse4.1") { return make_rgb_xyz_q2_13_transform_sse_41_opt::< u16, LINEAR_CAP, FIXED_POINT_SCALE, >( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH ); } } #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] { return make_rgb_xyz_q2_13_opt::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ); } } make_rgb_xyz_rgb_transform_opt::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ) } } impl RgbXyzFactoryOpt for f32 { fn make_optimized_transform< const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize, >( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaperOptimized, transform_options: TransformOptions, ) -> Result + Send + Sync>, CmsError> { if transform_options.prefer_fixed_point { #[cfg(all(target_arch = "x86_64", feature = "avx"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2_opt; if std::arch::is_x86_feature_detected!("avx2") { return make_rgb_xyz_q2_13_transform_avx2_opt::< f32, LINEAR_CAP, FIXED_POINT_SCALE, >( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH ); } } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41_opt; if std::arch::is_x86_feature_detected!("sse4.1") { return make_rgb_xyz_q2_13_transform_sse_41_opt::< f32, LINEAR_CAP, FIXED_POINT_SCALE, >( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH ); } } #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] { return if std::arch::is_aarch64_feature_detected!("rdm") { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q1_30_opt; make_rgb_xyz_q1_30_opt::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ) } else { make_rgb_xyz_q2_13_opt::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ) }; } } make_rgb_xyz_rgb_transform_opt::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ) } } impl RgbXyzFactoryOpt for f64 { fn make_optimized_transform< const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize, >( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaperOptimized, transform_options: TransformOptions, ) -> Result + Send + Sync>, CmsError> { if transform_options.prefer_fixed_point { #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] { if std::arch::is_aarch64_feature_detected!("rdm") { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q1_30_opt; return make_rgb_xyz_q1_30_opt::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ); } } } make_rgb_xyz_rgb_transform_opt::( src_layout, dst_layout, profile, GAMMA_LUT, BIT_DEPTH, ) } } impl RgbXyzFactoryOpt for u8 { fn make_optimized_transform< const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize, >( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaperOptimized, transform_options: TransformOptions, ) -> Result + Send + Sync>, CmsError> { if transform_options.prefer_fixed_point { #[cfg(all(target_arch = "x86_64", feature = "avx512"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx512_opt; if std::arch::is_x86_feature_detected!("avx512bw") && std::arch::is_x86_feature_detected!("avx512vl") { return make_rgb_xyz_q2_13_transform_avx512_opt::< u8, LINEAR_CAP, FIXED_POINT_SCALE, >(src_layout, dst_layout, profile, GAMMA_LUT, 8); } } #[cfg(all(target_arch = "x86_64", feature = "avx"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2_opt; if std::arch::is_x86_feature_detected!("avx2") { return make_rgb_xyz_q2_13_transform_avx2_opt::< u8, LINEAR_CAP, FIXED_POINT_SCALE, >(src_layout, dst_layout, profile, GAMMA_LUT, 8); } } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] { use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41_opt; if std::arch::is_x86_feature_detected!("sse4.1") { return make_rgb_xyz_q2_13_transform_sse_41_opt::< u8, LINEAR_CAP, FIXED_POINT_SCALE, >(src_layout, dst_layout, profile, GAMMA_LUT, 8); } } make_rgb_xyz_q2_13_opt::( src_layout, dst_layout, profile, GAMMA_LUT, 8, ) } else { make_rgb_xyz_rgb_transform_opt::( src_layout, dst_layout, profile, GAMMA_LUT, 8, ) } } } moxcms-0.7.7/src/conversions/rgbxyz.rs000064400000000000000000000753021046102023000162060ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::{CmsError, Layout, Matrix3, Matrix3f, TransformExecutor}; use num_traits::AsPrimitive; pub(crate) struct TransformMatrixShaper { pub(crate) r_linear: Box<[f32; BUCKET]>, pub(crate) g_linear: Box<[f32; BUCKET]>, pub(crate) b_linear: Box<[f32; BUCKET]>, pub(crate) r_gamma: Box<[T; 65536]>, pub(crate) g_gamma: Box<[T; 65536]>, pub(crate) b_gamma: Box<[T; 65536]>, pub(crate) adaptation_matrix: Matrix3f, } impl TransformMatrixShaper { #[inline(never)] #[allow(dead_code)] fn convert_to_v(self) -> TransformMatrixShaperV { TransformMatrixShaperV { r_linear: self.r_linear.iter().copied().collect(), g_linear: self.g_linear.iter().copied().collect(), b_linear: self.b_linear.iter().copied().collect(), r_gamma: self.r_gamma, g_gamma: self.g_gamma, b_gamma: self.b_gamma, adaptation_matrix: self.adaptation_matrix, } } } #[allow(dead_code)] pub(crate) struct TransformMatrixShaperV { pub(crate) r_linear: Vec, pub(crate) g_linear: Vec, pub(crate) b_linear: Vec, pub(crate) r_gamma: Box<[T; 65536]>, pub(crate) g_gamma: Box<[T; 65536]>, pub(crate) b_gamma: Box<[T; 65536]>, pub(crate) adaptation_matrix: Matrix3f, } /// Low memory footprint optimized routine for matrix shaper profiles with the same /// Gamma and linear curves. pub(crate) struct TransformMatrixShaperOptimized { pub(crate) linear: Box<[f32; BUCKET]>, pub(crate) gamma: Box<[T; 65536]>, pub(crate) adaptation_matrix: Matrix3f, } #[allow(dead_code)] impl TransformMatrixShaperOptimized { fn convert_to_v(self) -> TransformMatrixShaperOptimizedV { TransformMatrixShaperOptimizedV { linear: self.linear.iter().copied().collect::>(), gamma: self.gamma, adaptation_matrix: self.adaptation_matrix, } } } /// Low memory footprint optimized routine for matrix shaper profiles with the same /// Gamma and linear curves. #[allow(dead_code)] pub(crate) struct TransformMatrixShaperOptimizedV { pub(crate) linear: Vec, pub(crate) gamma: Box<[T; 65536]>, pub(crate) adaptation_matrix: Matrix3f, } impl TransformMatrixShaper { #[inline(never)] #[allow(dead_code)] pub(crate) fn to_q2_13_n< R: Copy + 'static + Default, const PRECISION: i32, const LINEAR_CAP: usize, >( &self, gamma_lut: usize, bit_depth: usize, ) -> TransformMatrixShaperFixedPoint where f32: AsPrimitive, { let linear_scale = if T::FINITE { let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32; ((1 << bit_depth) - 1) as f32 * lut_scale } else { let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32; (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale }; let mut new_box_r = Box::new([R::default(); BUCKET]); let mut new_box_g = Box::new([R::default(); BUCKET]); let mut new_box_b = Box::new([R::default(); BUCKET]); for (dst, &src) in new_box_r.iter_mut().zip(self.r_linear.iter()) { *dst = (src * linear_scale).round().as_(); } for (dst, &src) in new_box_g.iter_mut().zip(self.g_linear.iter()) { *dst = (src * linear_scale).round().as_(); } for (dst, &src) in new_box_b.iter_mut().zip(self.b_linear.iter()) { *dst = (src * linear_scale).round().as_(); } let scale: f32 = (1i32 << PRECISION) as f32; let source_matrix = self.adaptation_matrix; let mut dst_matrix = Matrix3:: { v: [[0i16; 3]; 3] }; for i in 0..3 { for j in 0..3 { dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16; } } TransformMatrixShaperFixedPoint { r_linear: new_box_r, g_linear: new_box_g, b_linear: new_box_b, r_gamma: self.r_gamma.clone(), g_gamma: self.g_gamma.clone(), b_gamma: self.b_gamma.clone(), adaptation_matrix: dst_matrix, } } #[inline(never)] #[allow(dead_code)] pub(crate) fn to_q2_13_i( &self, gamma_lut: usize, bit_depth: usize, ) -> TransformMatrixShaperFp where f32: AsPrimitive, { let linear_scale = if T::FINITE { let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32; ((1 << bit_depth) - 1) as f32 * lut_scale } else { let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32; (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale }; let new_box_r = self .r_linear .iter() .map(|&x| (x * linear_scale).round().as_()) .collect::>(); let new_box_g = self .g_linear .iter() .map(|&x| (x * linear_scale).round().as_()) .collect::>(); let new_box_b = self .b_linear .iter() .map(|&x| (x * linear_scale).round().as_()) .collect::>(); let scale: f32 = (1i32 << PRECISION) as f32; let source_matrix = self.adaptation_matrix; let mut dst_matrix = Matrix3:: { v: [[0i16; 3]; 3] }; for i in 0..3 { for j in 0..3 { dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16; } } TransformMatrixShaperFp { r_linear: new_box_r, g_linear: new_box_g, b_linear: new_box_b, r_gamma: self.r_gamma.clone(), g_gamma: self.g_gamma.clone(), b_gamma: self.b_gamma.clone(), adaptation_matrix: dst_matrix, } } } impl TransformMatrixShaperOptimized { #[allow(dead_code)] pub(crate) fn to_q2_13_n< R: Copy + 'static + Default, const PRECISION: i32, const LINEAR_CAP: usize, >( &self, gamma_lut: usize, bit_depth: usize, ) -> TransformMatrixShaperFixedPointOpt where f32: AsPrimitive, { let linear_scale = if T::FINITE { let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32; ((1 << bit_depth) - 1) as f32 * lut_scale } else { let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32; (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale }; let mut new_box_linear = Box::new([R::default(); BUCKET]); for (dst, src) in new_box_linear.iter_mut().zip(self.linear.iter()) { *dst = (*src * linear_scale).round().as_(); } let scale: f32 = (1i32 << PRECISION) as f32; let source_matrix = self.adaptation_matrix; let mut dst_matrix = Matrix3:: { v: [[i16::default(); 3]; 3], }; for i in 0..3 { for j in 0..3 { dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16; } } TransformMatrixShaperFixedPointOpt { linear: new_box_linear, gamma: self.gamma.clone(), adaptation_matrix: dst_matrix, } } #[allow(dead_code)] pub(crate) fn to_q2_13_i( &self, gamma_lut: usize, bit_depth: usize, ) -> TransformMatrixShaperFpOptVec where f32: AsPrimitive, { let linear_scale = if T::FINITE { let lut_scale = (gamma_lut - 1) as f32 / ((1 << bit_depth) - 1) as f32; ((1 << bit_depth) - 1) as f32 * lut_scale } else { let lut_scale = (gamma_lut - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32; (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale }; let new_box_linear = self .linear .iter() .map(|&x| (x * linear_scale).round().as_()) .collect::>(); let scale: f32 = (1i32 << PRECISION) as f32; let source_matrix = self.adaptation_matrix; let mut dst_matrix = Matrix3:: { v: [[i16::default(); 3]; 3], }; for i in 0..3 { for j in 0..3 { dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16; } } TransformMatrixShaperFpOptVec { linear: new_box_linear, gamma: self.gamma.clone(), adaptation_matrix: dst_matrix, } } #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] pub(crate) fn to_q1_30_n( &self, gamma_lut: usize, bit_depth: usize, ) -> TransformMatrixShaperFpOptVec where f32: AsPrimitive, f64: AsPrimitive, { // It is important to scale 1 bit more to compensate vqrdmlah Q0.31, because we're going to use Q1.30 let table_size = if T::FINITE { (1 << bit_depth) - 1 } else { T::NOT_FINITE_LINEAR_TABLE_SIZE - 1 }; let ext_bp = if T::FINITE { bit_depth as u32 + 1 } else { let bp = (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1).count_ones(); bp + 1 }; let linear_scale = { let lut_scale = (gamma_lut - 1) as f64 / table_size as f64; ((1u32 << ext_bp) - 1) as f64 * lut_scale }; let new_box_linear = self .linear .iter() .map(|&v| (v as f64 * linear_scale).round().as_()) .collect::>(); let scale: f64 = (1i64 << PRECISION) as f64; let source_matrix = self.adaptation_matrix; let mut dst_matrix = Matrix3:: { v: [[i32::default(); 3]; 3], }; for i in 0..3 { for j in 0..3 { dst_matrix.v[i][j] = (source_matrix.v[i][j] as f64 * scale) as i32; } } TransformMatrixShaperFpOptVec { linear: new_box_linear, gamma: self.gamma.clone(), adaptation_matrix: dst_matrix, } } } #[allow(unused)] struct TransformMatrixShaperScalar< T: Clone, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > { pub(crate) profile: TransformMatrixShaper, pub(crate) gamma_lut: usize, pub(crate) bit_depth: usize, } #[allow(unused)] struct TransformMatrixShaperOptScalar< T: Clone, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > { pub(crate) profile: TransformMatrixShaperOptimized, pub(crate) gamma_lut: usize, pub(crate) bit_depth: usize, } #[cfg(any( any(target_arch = "x86", target_arch = "x86_64"), all(target_arch = "aarch64", target_feature = "neon") ))] #[allow(unused)] macro_rules! create_rgb_xyz_dependant_executor { ($dep_name: ident, $dependant: ident, $shaper: ident) => { pub(crate) fn $dep_name< T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static, const LINEAR_CAP: usize, >( src_layout: Layout, dst_layout: Layout, profile: $shaper, gamma_lut: usize, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where u32: AsPrimitive, { if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, > { profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, > { profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, > { profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, > { profile, bit_depth, gamma_lut, })); } Err(CmsError::UnsupportedProfileConnection) } }; } #[cfg(any( any(target_arch = "x86", target_arch = "x86_64"), all(target_arch = "aarch64", target_feature = "neon") ))] #[allow(unused)] macro_rules! create_rgb_xyz_dependant_executor_to_v { ($dep_name: ident, $dependant: ident, $shaper: ident) => { pub(crate) fn $dep_name< T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static, const LINEAR_CAP: usize, >( src_layout: Layout, dst_layout: Layout, profile: $shaper, gamma_lut: usize, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where u32: AsPrimitive, { let profile = profile.convert_to_v(); if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, > { profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, > { profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, > { profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, > { profile, bit_depth, gamma_lut, })); } Err(CmsError::UnsupportedProfileConnection) } }; } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] use crate::conversions::sse::{TransformShaperRgbOptSse, TransformShaperRgbSse}; #[cfg(all(target_arch = "x86_64", feature = "avx"))] use crate::conversions::avx::{TransformShaperRgbAvx, TransformShaperRgbOptAvx}; #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] create_rgb_xyz_dependant_executor!( make_rgb_xyz_rgb_transform_sse_41, TransformShaperRgbSse, TransformMatrixShaper ); #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] create_rgb_xyz_dependant_executor_to_v!( make_rgb_xyz_rgb_transform_sse_41_opt, TransformShaperRgbOptSse, TransformMatrixShaperOptimized ); #[cfg(all(target_arch = "x86_64", feature = "avx"))] create_rgb_xyz_dependant_executor!( make_rgb_xyz_rgb_transform_avx2, TransformShaperRgbAvx, TransformMatrixShaper ); #[cfg(all(target_arch = "x86_64", feature = "avx"))] create_rgb_xyz_dependant_executor_to_v!( make_rgb_xyz_rgb_transform_avx2_opt, TransformShaperRgbOptAvx, TransformMatrixShaperOptimized ); #[cfg(all(target_arch = "x86_64", feature = "avx512"))] use crate::conversions::avx512::TransformShaperRgbOptAvx512; #[cfg(all(target_arch = "x86_64", feature = "avx512"))] create_rgb_xyz_dependant_executor!( make_rgb_xyz_rgb_transform_avx512_opt, TransformShaperRgbOptAvx512, TransformMatrixShaperOptimized ); #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))] pub(crate) fn make_rgb_xyz_rgb_transform< T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default, const LINEAR_CAP: usize, >( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaper, gamma_lut: usize, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where u32: AsPrimitive, { #[cfg(all(feature = "avx", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") { return make_rgb_xyz_rgb_transform_avx2::( src_layout, dst_layout, profile, gamma_lut, bit_depth, ); } #[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))] if std::arch::is_x86_feature_detected!("sse4.1") { return make_rgb_xyz_rgb_transform_sse_41::( src_layout, dst_layout, profile, gamma_lut, bit_depth, ); } if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) { return Ok(Box::new(TransformMatrixShaperScalar::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, > { profile, gamma_lut, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) { return Ok(Box::new(TransformMatrixShaperScalar::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, > { profile, gamma_lut, bit_depth, })); } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) { return Ok(Box::new(TransformMatrixShaperScalar::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, > { profile, gamma_lut, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) { return Ok(Box::new(TransformMatrixShaperScalar::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, > { profile, gamma_lut, bit_depth, })); } Err(CmsError::UnsupportedProfileConnection) } #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))] pub(crate) fn make_rgb_xyz_rgb_transform_opt< T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default, const LINEAR_CAP: usize, >( src_layout: Layout, dst_layout: Layout, profile: TransformMatrixShaperOptimized, gamma_lut: usize, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where u32: AsPrimitive, { #[cfg(all(feature = "avx512", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512bw") && std::arch::is_x86_feature_detected!("avx512vl") && std::arch::is_x86_feature_detected!("fma") { return make_rgb_xyz_rgb_transform_avx512_opt::( src_layout, dst_layout, profile, gamma_lut, bit_depth, ); } #[cfg(all(feature = "avx", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma") { return make_rgb_xyz_rgb_transform_avx2_opt::( src_layout, dst_layout, profile, gamma_lut, bit_depth, ); } #[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))] if std::arch::is_x86_feature_detected!("sse4.1") { return make_rgb_xyz_rgb_transform_sse_41_opt::( src_layout, dst_layout, profile, gamma_lut, bit_depth, ); } if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) { return Ok(Box::new(TransformMatrixShaperOptScalar::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, > { profile, gamma_lut, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) { return Ok(Box::new(TransformMatrixShaperOptScalar::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, > { profile, gamma_lut, bit_depth, })); } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) { return Ok(Box::new(TransformMatrixShaperOptScalar::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, > { profile, gamma_lut, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) { return Ok(Box::new(TransformMatrixShaperOptScalar::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, > { profile, gamma_lut, bit_depth, })); } Err(CmsError::UnsupportedProfileConnection) } #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] use crate::conversions::neon::{TransformShaperRgbNeon, TransformShaperRgbOptNeon}; use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFpOptVec; use crate::conversions::rgbxyz_fixed::{ TransformMatrixShaperFixedPoint, TransformMatrixShaperFixedPointOpt, TransformMatrixShaperFp, }; use crate::transform::PointeeSizeExpressible; #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] create_rgb_xyz_dependant_executor_to_v!( make_rgb_xyz_rgb_transform, TransformShaperRgbNeon, TransformMatrixShaper ); #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] create_rgb_xyz_dependant_executor_to_v!( make_rgb_xyz_rgb_transform_opt, TransformShaperRgbOptNeon, TransformMatrixShaperOptimized ); #[allow(unused)] impl< T: Clone + PointeeSizeExpressible + Copy + Default + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > TransformExecutor for TransformMatrixShaperScalar where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { use crate::mlaf::mlaf; let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let transform = self.profile.adaptation_matrix; let scale = (self.gamma_lut - 1) as f32; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()]; let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()]; let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()]; let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let new_r = mlaf( 0.5f32, mlaf( mlaf(r * transform.v[0][0], g, transform.v[0][1]), b, transform.v[0][2], ) .max(0f32) .min(1f32), scale, ); let new_g = mlaf( 0.5f32, mlaf( mlaf(r * transform.v[1][0], g, transform.v[1][1]), b, transform.v[1][2], ) .max(0f32) .min(1f32), scale, ); let new_b = mlaf( 0.5f32, mlaf( mlaf(r * transform.v[2][0], g, transform.v[2][1]), b, transform.v[2][2], ) .max(0f32) .min(1f32), scale, ); dst[dst_cn.r_i()] = self.profile.r_gamma[(new_r as u16) as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[(new_g as u16) as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[(new_b as u16) as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } Ok(()) } } #[allow(unused)] impl< T: Clone + PointeeSizeExpressible + Copy + Default + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > TransformExecutor for TransformMatrixShaperOptScalar where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { use crate::mlaf::mlaf; let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let transform = self.profile.adaptation_matrix; let scale = (self.gamma_lut - 1) as f32; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = self.profile.linear[src[src_cn.r_i()]._as_usize()]; let g = self.profile.linear[src[src_cn.g_i()]._as_usize()]; let b = self.profile.linear[src[src_cn.b_i()]._as_usize()]; let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let new_r = mlaf( 0.5f32, mlaf( mlaf(r * transform.v[0][0], g, transform.v[0][1]), b, transform.v[0][2], ) .max(0f32) .min(1f32), scale, ); let new_g = mlaf( 0.5f32, mlaf( mlaf(r * transform.v[1][0], g, transform.v[1][1]), b, transform.v[1][2], ) .max(0f32) .min(1f32), scale, ); let new_b = mlaf( 0.5f32, mlaf( mlaf(r * transform.v[2][0], g, transform.v[2][1]), b, transform.v[2][2], ) .max(0f32) .min(1f32), scale, ); dst[dst_cn.r_i()] = self.profile.gamma[(new_r as u16) as usize]; dst[dst_cn.g_i()] = self.profile.gamma[(new_g as u16) as usize]; dst[dst_cn.b_i()] = self.profile.gamma[(new_b as u16) as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } Ok(()) } } moxcms-0.7.7/src/conversions/rgbxyz_fixed.rs000064400000000000000000000474661046102023000173770ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::Layout; use crate::conversions::TransformMatrixShaper; use crate::matrix::Matrix3; use crate::{CmsError, TransformExecutor}; use num_traits::AsPrimitive; /// Fixed point conversion Q2.13 pub(crate) struct TransformMatrixShaperFixedPoint { pub(crate) r_linear: Box<[R; LINEAR_CAP]>, pub(crate) g_linear: Box<[R; LINEAR_CAP]>, pub(crate) b_linear: Box<[R; LINEAR_CAP]>, pub(crate) r_gamma: Box<[T; 65536]>, pub(crate) g_gamma: Box<[T; 65536]>, pub(crate) b_gamma: Box<[T; 65536]>, pub(crate) adaptation_matrix: Matrix3, } /// Fixed point conversion Q2.13 #[allow(dead_code)] pub(crate) struct TransformMatrixShaperFp { pub(crate) r_linear: Vec, pub(crate) g_linear: Vec, pub(crate) b_linear: Vec, pub(crate) r_gamma: Box<[T; 65536]>, pub(crate) g_gamma: Box<[T; 65536]>, pub(crate) b_gamma: Box<[T; 65536]>, pub(crate) adaptation_matrix: Matrix3, } /// Fixed point conversion Q2.13 /// /// Optimized routine for *all same curves* matrix shaper. pub(crate) struct TransformMatrixShaperFixedPointOpt { pub(crate) linear: Box<[R; LINEAR_CAP]>, pub(crate) gamma: Box<[T; 65536]>, pub(crate) adaptation_matrix: Matrix3, } /// Fixed point conversion Q2.13 /// /// Optimized routine for *all same curves* matrix shaper. #[allow(dead_code)] pub(crate) struct TransformMatrixShaperFpOptVec { pub(crate) linear: Vec, pub(crate) gamma: Box<[T; 65536]>, pub(crate) adaptation_matrix: Matrix3, } #[allow(unused)] struct TransformMatrixShaperQ2_13< T: Copy, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, const PRECISION: i32, > { pub(crate) profile: TransformMatrixShaperFixedPoint, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } #[allow(unused)] struct TransformMatrixShaperQ2_13Optimized< T: Copy, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, const PRECISION: i32, > { pub(crate) profile: TransformMatrixShaperFixedPointOpt, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } #[allow(unused)] impl< T: Clone + PointeeSizeExpressible + Copy + Default + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, const PRECISION: i32, > TransformExecutor for TransformMatrixShaperQ2_13 where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let transform = self.profile.adaptation_matrix; let max_colors: T = ((1 << self.bit_depth as u32) - 1u32).as_(); let rnd: i32 = (1i32 << (PRECISION - 1)); let v_gamma_max = self.gamma_lut as i32 - 1; for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()]; let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()]; let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()]; let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let new_r = r as i32 * transform.v[0][0] as i32 + g as i32 * transform.v[0][1] as i32 + b as i32 * transform.v[0][2] as i32 + rnd; let r_q2_13 = (new_r >> PRECISION).min(v_gamma_max).max(0) as u16; let new_g = r as i32 * transform.v[1][0] as i32 + g as i32 * transform.v[1][1] as i32 + b as i32 * transform.v[1][2] as i32 + rnd; let g_q2_13 = (new_g >> PRECISION).min(v_gamma_max).max(0) as u16; let new_b = r as i32 * transform.v[2][0] as i32 + g as i32 * transform.v[2][1] as i32 + b as i32 * transform.v[2][2] as i32 + rnd; let b_q2_13 = (new_b >> PRECISION).min(v_gamma_max).max(0) as u16; dst[dst_cn.r_i()] = self.profile.r_gamma[r_q2_13 as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[g_q2_13 as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[b_q2_13 as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } Ok(()) } } #[allow(unused)] impl< T: Clone + PointeeSizeExpressible + Copy + Default + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, const PRECISION: i32, > TransformExecutor for TransformMatrixShaperQ2_13Optimized where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let transform = self.profile.adaptation_matrix; let max_colors: T = ((1 << self.bit_depth as u32) - 1u32).as_(); let rnd: i32 = (1i32 << (PRECISION - 1)); let v_gamma_max = self.gamma_lut as i32 - 1; for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = self.profile.linear[src[src_cn.r_i()]._as_usize()]; let g = self.profile.linear[src[src_cn.g_i()]._as_usize()]; let b = self.profile.linear[src[src_cn.b_i()]._as_usize()]; let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let new_r = r as i32 * transform.v[0][0] as i32 + g as i32 * transform.v[0][1] as i32 + b as i32 * transform.v[0][2] as i32 + rnd; let r_q2_13 = (new_r >> PRECISION).min(v_gamma_max).max(0) as u16; let new_g = r as i32 * transform.v[1][0] as i32 + g as i32 * transform.v[1][1] as i32 + b as i32 * transform.v[1][2] as i32 + rnd; let g_q2_13 = (new_g >> PRECISION).min(v_gamma_max).max(0) as u16; let new_b = r as i32 * transform.v[2][0] as i32 + g as i32 * transform.v[2][1] as i32 + b as i32 * transform.v[2][2] as i32 + rnd; let b_q2_13 = (new_b >> PRECISION).min(v_gamma_max).max(0) as u16; dst[dst_cn.r_i()] = self.profile.gamma[r_q2_13 as usize]; dst[dst_cn.g_i()] = self.profile.gamma[g_q2_13 as usize]; dst[dst_cn.b_i()] = self.profile.gamma[b_q2_13 as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } Ok(()) } } #[allow(unused_macros)] macro_rules! create_rgb_xyz_dependant_q2_13_executor { ($dep_name: ident, $dependant: ident, $resolution: ident, $shaper: ident) => { pub(crate) fn $dep_name< T: Clone + Send + Sync + AsPrimitive + Default + PointeeSizeExpressible, const LINEAR_CAP: usize, const PRECISION: i32, >( src_layout: Layout, dst_layout: Layout, profile: $shaper, gamma_lut: usize, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where u32: AsPrimitive, { let q2_13_profile = profile.to_q2_13_n::<$resolution, PRECISION, LINEAR_CAP>(gamma_lut, bit_depth); if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, PRECISION, > { profile: q2_13_profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, PRECISION, > { profile: q2_13_profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, PRECISION, > { profile: q2_13_profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, PRECISION, > { profile: q2_13_profile, bit_depth, gamma_lut, })); } Err(CmsError::UnsupportedProfileConnection) } }; } #[allow(unused_macros)] macro_rules! create_rgb_xyz_dependant_q2_13_executor_fp { ($dep_name: ident, $dependant: ident, $resolution: ident, $shaper: ident) => { pub(crate) fn $dep_name< T: Clone + Send + Sync + AsPrimitive + Default + PointeeSizeExpressible, const LINEAR_CAP: usize, const PRECISION: i32, >( src_layout: Layout, dst_layout: Layout, profile: $shaper, gamma_lut: usize, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where u32: AsPrimitive, { let q2_13_profile = profile.to_q2_13_i::<$resolution, PRECISION>(gamma_lut, bit_depth); if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, PRECISION, > { profile: q2_13_profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, PRECISION, > { profile: q2_13_profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, PRECISION, > { profile: q2_13_profile, bit_depth, gamma_lut, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, PRECISION, > { profile: q2_13_profile, bit_depth, gamma_lut, })); } Err(CmsError::UnsupportedProfileConnection) } }; } #[cfg(all(target_arch = "aarch64", feature = "neon"))] macro_rules! create_rgb_xyz_dependant_q1_30_executor { ($dep_name: ident, $dependant: ident, $resolution: ident, $shaper: ident) => { pub(crate) fn $dep_name< T: Clone + Send + Sync + AsPrimitive + Default + PointeeSizeExpressible, const LINEAR_CAP: usize, const PRECISION: i32, >( src_layout: Layout, dst_layout: Layout, profile: $shaper, gamma_lut: usize, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where u32: AsPrimitive, { let q1_30_profile = profile.to_q1_30_n::<$resolution, PRECISION>(gamma_lut, bit_depth); if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, > { profile: q1_30_profile, gamma_lut, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, > { profile: q1_30_profile, gamma_lut, bit_depth, })); } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, > { profile: q1_30_profile, gamma_lut, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) { return Ok(Box::new($dependant::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, > { profile: q1_30_profile, gamma_lut, bit_depth, })); } Err(CmsError::UnsupportedProfileConnection) } }; } #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] use crate::conversions::neon::{ TransformShaperQ1_30NeonOpt, TransformShaperQ2_13Neon, TransformShaperQ2_13NeonOpt, }; #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] create_rgb_xyz_dependant_q2_13_executor_fp!( make_rgb_xyz_q2_13, TransformShaperQ2_13Neon, i16, TransformMatrixShaper ); #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] create_rgb_xyz_dependant_q2_13_executor_fp!( make_rgb_xyz_q2_13_opt, TransformShaperQ2_13NeonOpt, i16, TransformMatrixShaperOptimized ); #[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))] create_rgb_xyz_dependant_q1_30_executor!( make_rgb_xyz_q1_30_opt, TransformShaperQ1_30NeonOpt, i32, TransformMatrixShaperOptimized ); #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))] create_rgb_xyz_dependant_q2_13_executor!( make_rgb_xyz_q2_13, TransformMatrixShaperQ2_13, i16, TransformMatrixShaper ); #[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))] create_rgb_xyz_dependant_q2_13_executor!( make_rgb_xyz_q2_13_opt, TransformMatrixShaperQ2_13Optimized, i16, TransformMatrixShaperOptimized ); #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] use crate::conversions::sse::{TransformShaperQ2_13OptSse, TransformShaperQ2_13Sse}; #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] create_rgb_xyz_dependant_q2_13_executor_fp!( make_rgb_xyz_q2_13_transform_sse_41, TransformShaperQ2_13Sse, i32, TransformMatrixShaper ); #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))] create_rgb_xyz_dependant_q2_13_executor_fp!( make_rgb_xyz_q2_13_transform_sse_41_opt, TransformShaperQ2_13OptSse, i32, TransformMatrixShaperOptimized ); #[cfg(all(target_arch = "x86_64", feature = "avx"))] use crate::conversions::avx::{TransformShaperRgbQ2_13Avx, TransformShaperRgbQ2_13OptAvx}; use crate::conversions::rgbxyz::TransformMatrixShaperOptimized; use crate::transform::PointeeSizeExpressible; #[cfg(all(target_arch = "x86_64", feature = "avx"))] create_rgb_xyz_dependant_q2_13_executor_fp!( make_rgb_xyz_q2_13_transform_avx2, TransformShaperRgbQ2_13Avx, i32, TransformMatrixShaper ); #[cfg(all(target_arch = "x86_64", feature = "avx"))] create_rgb_xyz_dependant_q2_13_executor_fp!( make_rgb_xyz_q2_13_transform_avx2_opt, TransformShaperRgbQ2_13OptAvx, i32, TransformMatrixShaperOptimized ); #[cfg(all(target_arch = "x86_64", feature = "avx512"))] use crate::conversions::avx512::TransformShaperRgbQ2_13OptAvx512; #[cfg(all(target_arch = "x86_64", feature = "avx512"))] create_rgb_xyz_dependant_q2_13_executor!( make_rgb_xyz_q2_13_transform_avx512_opt, TransformShaperRgbQ2_13OptAvx512, i32, TransformMatrixShaperOptimized ); moxcms-0.7.7/src/conversions/rgbxyz_float.rs000064400000000000000000000260001046102023000173620ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::trc::ToneCurveEvaluator; use crate::{CmsError, Layout, Matrix3f, PointeeSizeExpressible, Rgb, TransformExecutor}; use num_traits::AsPrimitive; use std::marker::PhantomData; pub(crate) struct TransformShaperRgbFloat { pub(crate) r_linear: Box<[f32; BUCKET]>, pub(crate) g_linear: Box<[f32; BUCKET]>, pub(crate) b_linear: Box<[f32; BUCKET]>, pub(crate) gamma_evaluator: Box, pub(crate) adaptation_matrix: Matrix3f, pub(crate) phantom_data: PhantomData, } pub(crate) struct TransformShaperFloatInOut { pub(crate) linear_evaluator: Box, pub(crate) gamma_evaluator: Box, pub(crate) adaptation_matrix: Matrix3f, pub(crate) phantom_data: PhantomData, } struct TransformShaperFloatScalar< T: Clone, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > { pub(crate) profile: TransformShaperRgbFloat, pub(crate) bit_depth: usize, } struct TransformShaperRgbFloatInOut { pub(crate) profile: TransformShaperFloatInOut, pub(crate) bit_depth: usize, } pub(crate) fn make_rgb_xyz_rgb_transform_float< T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default, const LINEAR_CAP: usize, >( src_layout: Layout, dst_layout: Layout, profile: TransformShaperRgbFloat, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where u32: AsPrimitive, f32: AsPrimitive, { if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) { return Ok(Box::new(TransformShaperFloatScalar::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, > { profile, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) { return Ok(Box::new(TransformShaperFloatScalar::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, LINEAR_CAP, > { profile, bit_depth, })); } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) { return Ok(Box::new(TransformShaperFloatScalar::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, > { profile, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) { return Ok(Box::new(TransformShaperFloatScalar::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, LINEAR_CAP, > { profile, bit_depth, })); } Err(CmsError::UnsupportedProfileConnection) } pub(crate) fn make_rgb_xyz_rgb_transform_float_in_out< T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default + AsPrimitive, >( src_layout: Layout, dst_layout: Layout, profile: TransformShaperFloatInOut, bit_depth: usize, ) -> Result + Send + Sync>, CmsError> where u32: AsPrimitive, f32: AsPrimitive, { if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) { return Ok(Box::new(TransformShaperRgbFloatInOut::< T, { Layout::Rgba as u8 }, { Layout::Rgba as u8 }, > { profile, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) { return Ok(Box::new(TransformShaperRgbFloatInOut::< T, { Layout::Rgb as u8 }, { Layout::Rgba as u8 }, > { profile, bit_depth, })); } else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) { return Ok(Box::new(TransformShaperRgbFloatInOut::< T, { Layout::Rgba as u8 }, { Layout::Rgb as u8 }, > { profile, bit_depth, })); } else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) { return Ok(Box::new(TransformShaperRgbFloatInOut::< T, { Layout::Rgb as u8 }, { Layout::Rgb as u8 }, > { profile, bit_depth, })); } Err(CmsError::UnsupportedProfileConnection) } impl< T: Clone + PointeeSizeExpressible + Copy + Default + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > TransformExecutor for TransformShaperFloatScalar where u32: AsPrimitive, f32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { use crate::mlaf::mlaf; let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let transform = self.profile.adaptation_matrix; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()]; let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()]; let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()]; let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let new_r = mlaf( mlaf(r * transform.v[0][0], g, transform.v[0][1]), b, transform.v[0][2], ); let new_g = mlaf( mlaf(r * transform.v[1][0], g, transform.v[1][1]), b, transform.v[1][2], ); let new_b = mlaf( mlaf(r * transform.v[2][0], g, transform.v[2][1]), b, transform.v[2][2], ); let mut rgb = Rgb::new(new_r, new_g, new_b); rgb = self.profile.gamma_evaluator.evaluate_tristimulus(rgb); dst[dst_cn.r_i()] = rgb.r.as_(); dst[dst_cn.g_i()] = rgb.g.as_(); dst[dst_cn.b_i()] = rgb.b.as_(); if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } Ok(()) } } impl< T: Clone + PointeeSizeExpressible + Copy + Default + 'static + AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformShaperRgbFloatInOut where u32: AsPrimitive, f32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { use crate::mlaf::mlaf; let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let transform = self.profile.adaptation_matrix; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let mut src_rgb = Rgb::new( src[src_cn.r_i()].as_(), src[src_cn.g_i()].as_(), src[src_cn.b_i()].as_(), ); src_rgb = self.profile.linear_evaluator.evaluate_tristimulus(src_rgb); let r = src_rgb.r; let g = src_rgb.g; let b = src_rgb.b; let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; let new_r = mlaf( mlaf(r * transform.v[0][0], g, transform.v[0][1]), b, transform.v[0][2], ); let new_g = mlaf( mlaf(r * transform.v[1][0], g, transform.v[1][1]), b, transform.v[1][2], ); let new_b = mlaf( mlaf(r * transform.v[2][0], g, transform.v[2][1]), b, transform.v[2][2], ); let mut rgb = Rgb::new(new_r, new_g, new_b); rgb = self.profile.gamma_evaluator.evaluate_tristimulus(rgb); dst[dst_cn.r_i()] = rgb.r.as_(); dst[dst_cn.g_i()] = rgb.g.as_(); dst[dst_cn.b_i()] = rgb.b.as_(); if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } Ok(()) } } moxcms-0.7.7/src/conversions/sse/interpolator.rs000064400000000000000000000332771046102023000202020ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::interpolator::BarycentricWeight; use crate::math::FusedMultiplyAdd; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; use std::ops::{Add, Mul, Sub}; #[repr(align(16), C)] pub(crate) struct SseAlignedF32(pub(crate) [f32; 4]); #[cfg(feature = "options")] pub(crate) struct TetrahedralSse {} #[cfg(feature = "options")] pub(crate) struct PyramidalSse {} #[cfg(feature = "options")] pub(crate) struct PrismaticSse {} pub(crate) struct TrilinearSse {} trait Fetcher { fn fetch(&self, x: i32, y: i32, z: i32) -> T; } #[derive(Copy, Clone)] #[repr(transparent)] pub(crate) struct SseVector { pub(crate) v: __m128, } impl From for SseVector { #[inline(always)] fn from(v: f32) -> Self { SseVector { v: unsafe { _mm_set1_ps(v) }, } } } impl Sub for SseVector { type Output = Self; #[inline(always)] fn sub(self, rhs: SseVector) -> Self::Output { SseVector { v: unsafe { _mm_sub_ps(self.v, rhs.v) }, } } } impl Add for SseVector { type Output = Self; #[inline(always)] fn add(self, rhs: SseVector) -> Self::Output { SseVector { v: unsafe { _mm_add_ps(self.v, rhs.v) }, } } } impl Mul for SseVector { type Output = Self; #[inline(always)] fn mul(self, rhs: SseVector) -> Self::Output { SseVector { v: unsafe { _mm_mul_ps(self.v, rhs.v) }, } } } impl FusedMultiplyAdd for SseVector { #[inline(always)] fn mla(&self, b: SseVector, c: SseVector) -> SseVector { SseVector { v: unsafe { _mm_add_ps(self.v, _mm_mul_ps(b.v, c.v)) }, } } } struct TetrahedralSseFetchVector<'a, const GRID_SIZE: usize> { cube: &'a [SseAlignedF32], } impl Fetcher for TetrahedralSseFetchVector<'_, GRID_SIZE> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> SseVector { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx = unsafe { self.cube.get_unchecked(offset..) }; SseVector { v: unsafe { _mm_load_ps(jx.as_ptr() as *const _) }, } } } pub(crate) trait SseMdInterpolation { fn inter3_sse( &self, table: &[SseAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> SseVector; } #[cfg(feature = "options")] impl TetrahedralSse { #[target_feature(enable = "sse4.1")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> SseVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = r.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z); c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n); c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, SseVector::from(rx)); let s1 = s0.mla(c2, SseVector::from(ry)); s1.mla(c3, SseVector::from(rz)) } } macro_rules! define_inter_sse { ($interpolator: ident) => { impl SseMdInterpolation for $interpolator { fn inter3_sse( &self, table: &[SseAlignedF32], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> SseVector { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralSseFetchVector:: { cube: table }, ) } } } }; } #[cfg(feature = "options")] define_inter_sse!(TetrahedralSse); #[cfg(feature = "options")] define_inter_sse!(PyramidalSse); #[cfg(feature = "options")] define_inter_sse!(PrismaticSse); define_inter_sse!(TrilinearSse); #[cfg(feature = "options")] impl PyramidalSse { #[target_feature(enable = "sse4.1")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> SseVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); if dr > db && dg > db { let x0 = r.fetch(x_n, y_n, z_n); let x1 = r.fetch(x_n, y_n, z); let x2 = r.fetch(x_n, y, z); let x3 = r.fetch(x, y_n, z); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); s2.mla(c4, SseVector::from(dr * dg)) } else if db > dr && dg > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y_n, z_n); let x2 = r.fetch(x, y_n, z_n); let x3 = r.fetch(x, y_n, z); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); s2.mla(c4, SseVector::from(dg * db)) } else { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z); let x2 = r.fetch(x_n, y, z_n); let x3 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); s2.mla(c4, SseVector::from(db * dr)) } } } #[cfg(feature = "options")] impl PrismaticSse { #[target_feature(enable = "sse4.1")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> SseVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); if db > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x, y_n, z_n); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); let s3 = s2.mla(c4, SseVector::from(dg * db)); s3.mla(c5, SseVector::from(dr * dg)) } else { let x0 = r.fetch(x_n, y, z); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x_n, y_n, z); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); let s3 = s2.mla(c4, SseVector::from(dg * db)); s3.mla(c5, SseVector::from(dr * dg)) } } } impl TrilinearSse { #[target_feature(enable = "sse4.1")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> SseVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let w0 = SseVector::from(dr); let w1 = SseVector::from(dg); let w2 = SseVector::from(db); let c000 = r.fetch(x, y, z); let c100 = r.fetch(x_n, y, z); let c010 = r.fetch(x, y_n, z); let c110 = r.fetch(x_n, y_n, z); let c001 = r.fetch(x, y, z_n); let c101 = r.fetch(x_n, y, z_n); let c011 = r.fetch(x, y_n, z_n); let c111 = r.fetch(x_n, y_n, z_n); let dx = SseVector::from(1.0 - dr); let c00 = (c000 * dx).mla(c100, w0); let c10 = (c010 * dx).mla(c110, w0); let c01 = (c001 * dx).mla(c101, w0); let c11 = (c011 * dx).mla(c111, w0); let dy = SseVector::from(1.0 - dg); let c0 = (c00 * dy).mla(c10, w1); let c1 = (c01 * dy).mla(c11, w1); let dz = SseVector::from(1.0 - db); (c0 * dz).mla(c1, w2) } } moxcms-0.7.7/src/conversions/sse/interpolator_q0_15.rs000064400000000000000000000340251046102023000210770ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::interpolator::BarycentricWeight; use crate::math::FusedMultiplyAdd; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; use std::ops::{Add, Mul, Sub}; #[repr(align(8), C)] pub(crate) struct SseAlignedI16x4(pub(crate) [i16; 4]); #[cfg(feature = "options")] pub(crate) struct TetrahedralSseQ0_15 {} #[cfg(feature = "options")] pub(crate) struct PyramidalSseQ0_15 {} #[cfg(feature = "options")] pub(crate) struct PrismaticSseQ0_15 {} pub(crate) struct TrilinearSseQ0_15 {} trait Fetcher { fn fetch(&self, x: i32, y: i32, z: i32) -> T; } #[derive(Copy, Clone)] #[repr(transparent)] pub(crate) struct SseVector { pub(crate) v: __m128i, } impl From for SseVector { #[inline(always)] fn from(v: i16) -> Self { SseVector { v: unsafe { _mm_set1_epi16(v) }, } } } impl Sub for SseVector { type Output = Self; #[inline(always)] fn sub(self, rhs: SseVector) -> Self::Output { SseVector { v: unsafe { _mm_sub_epi16(self.v, rhs.v) }, } } } impl Add for SseVector { type Output = Self; #[inline(always)] fn add(self, rhs: SseVector) -> Self::Output { SseVector { v: unsafe { _mm_add_epi16(self.v, rhs.v) }, } } } impl Mul for SseVector { type Output = Self; #[inline(always)] fn mul(self, rhs: SseVector) -> Self::Output { SseVector { v: unsafe { _mm_mulhrs_epi16(self.v, rhs.v) }, } } } impl FusedMultiplyAdd for SseVector { #[inline(always)] fn mla(&self, b: SseVector, c: SseVector) -> SseVector { SseVector { v: unsafe { _mm_add_epi16(self.v, _mm_mulhrs_epi16(b.v, c.v)) }, } } } struct TetrahedralSseQ0_15FetchVector<'a, const GRID_SIZE: usize> { cube: &'a [SseAlignedI16x4], } impl Fetcher for TetrahedralSseQ0_15FetchVector<'_, GRID_SIZE> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> SseVector { let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32) + y as u32 * GRID_SIZE as u32 + z as u32) as usize; let jx = unsafe { self.cube.get_unchecked(offset..) }; SseVector { v: unsafe { _mm_loadu_si64(jx.as_ptr() as *const _) }, } } } pub(crate) trait SseMdInterpolationQ0_15 { fn inter3_sse( &self, cube: &[SseAlignedI16x4], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> SseVector; } #[cfg(feature = "options")] impl TetrahedralSseQ0_15 { #[target_feature(enable = "sse4.1")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> SseVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let rx = lut_r.w; let ry = lut_g.w; let rz = lut_b.w; let c0 = r.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z); c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z) - c0; c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n); c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z) - c0; c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n); c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n); c3 = r.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, SseVector::from(rx)); let s1 = s0.mla(c2, SseVector::from(ry)); s1.mla(c3, SseVector::from(rz)) } } macro_rules! define_inter_sse { ($interpolator: ident) => { impl SseMdInterpolationQ0_15 for $interpolator { fn inter3_sse( &self, table: &[SseAlignedI16x4], in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], ) -> SseVector { unsafe { self.interpolate( in_r, in_g, in_b, lut, TetrahedralSseQ0_15FetchVector:: { cube: table }, ) } } } }; } #[cfg(feature = "options")] define_inter_sse!(TetrahedralSseQ0_15); #[cfg(feature = "options")] define_inter_sse!(PyramidalSseQ0_15); #[cfg(feature = "options")] define_inter_sse!(PrismaticSseQ0_15); define_inter_sse!(TrilinearSseQ0_15); #[cfg(feature = "options")] impl PyramidalSseQ0_15 { #[target_feature(enable = "sse4.1")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> SseVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); if dr > db && dg > db { let x0 = r.fetch(x_n, y_n, z_n); let x1 = r.fetch(x_n, y_n, z); let x2 = r.fetch(x_n, y, z); let x3 = r.fetch(x, y_n, z); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); s2.mla(c4, SseVector::from(dr) * SseVector::from(dg)) } else if db > dr && dg > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y_n, z_n); let x2 = r.fetch(x, y_n, z_n); let x3 = r.fetch(x, y_n, z); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); s2.mla(c4, SseVector::from(dg) * SseVector::from(db)) } else { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z); let x2 = r.fetch(x_n, y, z_n); let x3 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); s2.mla(c4, SseVector::from(db) * SseVector::from(dr)) } } } #[cfg(feature = "options")] impl PrismaticSseQ0_15 { #[target_feature(enable = "sse4.1")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> SseVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; let c0 = r.fetch(x, y, z); if db > dr { let x0 = r.fetch(x, y, z_n); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x, y_n, z_n); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); let s3 = s2.mla(c4, SseVector::from(dg) * SseVector::from(db)); s3.mla(c5, SseVector::from(dr) * SseVector::from(dg)) } else { let x0 = r.fetch(x_n, y, z); let x1 = r.fetch(x_n, y, z_n); let x2 = r.fetch(x, y_n, z); let x3 = r.fetch(x_n, y_n, z); let x4 = r.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, SseVector::from(db)); let s1 = s0.mla(c2, SseVector::from(dr)); let s2 = s1.mla(c3, SseVector::from(dg)); let s3 = s2.mla(c4, SseVector::from(dg) * SseVector::from(db)); s3.mla(c5, SseVector::from(dr) * SseVector::from(dg)) } } } impl TrilinearSseQ0_15 { #[target_feature(enable = "sse4.1")] unsafe fn interpolate( &self, in_r: usize, in_g: usize, in_b: usize, lut: &[BarycentricWeight], r: impl Fetcher, ) -> SseVector { let lut_r = unsafe { *lut.get_unchecked(in_r) }; let lut_g = unsafe { *lut.get_unchecked(in_g) }; let lut_b = unsafe { *lut.get_unchecked(in_b) }; let x: i32 = lut_r.x; let y: i32 = lut_g.x; let z: i32 = lut_b.x; let x_n: i32 = lut_r.x_n; let y_n: i32 = lut_g.x_n; let z_n: i32 = lut_b.x_n; let dr = lut_r.w; let dg = lut_g.w; let db = lut_b.w; const Q_MAX: i16 = ((1i32 << 15i32) - 1) as i16; let q_max = SseVector::from(Q_MAX); let w0 = SseVector::from(dr); let w1 = SseVector::from(dg); let w2 = SseVector::from(db); let dx = q_max - SseVector::from(dr); let dy = q_max - SseVector::from(dg); let dz = q_max - SseVector::from(db); let c000 = r.fetch(x, y, z); let c100 = r.fetch(x_n, y, z); let c010 = r.fetch(x, y_n, z); let c110 = r.fetch(x_n, y_n, z); let c001 = r.fetch(x, y, z_n); let c101 = r.fetch(x_n, y, z_n); let c011 = r.fetch(x, y_n, z_n); let c111 = r.fetch(x_n, y_n, z_n); let c00 = (c000 * dx).mla(c100, w0); let c10 = (c010 * dx).mla(c110, w0); let c01 = (c001 * dx).mla(c101, w0); let c11 = (c011 * dx).mla(c111, w0); let c0 = (c00 * dy).mla(c10, w1); let c1 = (c01 * dy).mla(c11, w1); (c0 * dz).mla(c1, w2) } } moxcms-0.7.7/src/conversions/sse/lut4_to_3.rs000064400000000000000000000311211046102023000172560ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::lut_transforms::Lut4x3Factory; use crate::conversions::sse::interpolator::*; use crate::conversions::sse::interpolator_q0_15::SseAlignedI16x4; use crate::conversions::sse::lut4_to_3_q0_15::TransformLut4To3SseQ0_15; use crate::transform::PointeeSizeExpressible; use crate::{ BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; use std::marker::PhantomData; struct TransformLut4To3Sse< T, U, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { lut: Vec, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: InterpolationMethod, weights: Box<[BarycentricWeight; BINS]>, color_space: DataColorSpace, is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut4To3Sse where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[target_feature(enable = "sse4.1")] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { let cn = Layout::from(LAYOUT); let channels = cn.channels(); let grid_size = GRID_SIZE as i32; let grid_size3 = grid_size * grid_size * grid_size; let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) }; let max_value = ((1 << BIT_DEPTH) - 1u32).as_(); for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) { let c = <() as LutBarycentricReduction>::reduce::( src[0], ); let m = <() as LutBarycentricReduction>::reduce::( src[1], ); let y = <() as LutBarycentricReduction>::reduce::( src[2], ); let k = <() as LutBarycentricReduction>::reduce::( src[3], ); let k_weights = self.weights[k.as_()]; let w: i32 = k_weights.x; let w_n: i32 = k_weights.x_n; let t: f32 = k_weights.w; let table1 = &self.lut[(w * grid_size3) as usize..]; let table2 = &self.lut[(w_n * grid_size3) as usize..]; let a0 = interpolator .inter3_sse(table1, c.as_(), m.as_(), y.as_(), self.weights.as_slice()) .v; let b0 = interpolator .inter3_sse(table2, c.as_(), m.as_(), y.as_(), self.weights.as_slice()) .v; if T::FINITE { unsafe { let t0 = _mm_set1_ps(t); let ones = _mm_set1_ps(1f32); let hp = _mm_mul_ps(a0, _mm_sub_ps(ones, t0)); let mut v = _mm_add_ps(_mm_mul_ps(b0, t0), hp); v = _mm_max_ps(v, _mm_setzero_ps()); v = _mm_mul_ps(v, value_scale); v = _mm_min_ps(v, value_scale); let jvz = _mm_cvtps_epi32(v); let x = _mm_extract_epi32::<0>(jvz); let y = _mm_extract_epi32::<1>(jvz); let z = _mm_extract_epi32::<2>(jvz); dst[cn.r_i()] = (x as u32).as_(); dst[cn.g_i()] = (y as u32).as_(); dst[cn.b_i()] = (z as u32).as_(); } } else { unsafe { let t0 = _mm_set1_ps(t); let ones = _mm_set1_ps(1f32); let hp = _mm_mul_ps(a0, _mm_sub_ps(ones, t0)); let v = _mm_add_ps(_mm_mul_ps(b0, t0), hp); dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v) as u32).as_(); dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v) as u32).as_(); dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v) as u32).as_(); } } if channels == 4 { dst[cn.a_i()] = max_value; } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut4To3Sse where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let cn = Layout::from(LAYOUT); let channels = cn.channels(); if src.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / 4; let dst_chunks = dst.len() / channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearSse:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk(src, dst, Box::new(TetrahedralSse:: {})); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk(src, dst, Box::new(PyramidalSse:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk(src, dst, Box::new(PrismaticSse:: {})); } InterpolationMethod::Linear => { self.transform_chunk(src, dst, Box::new(TrilinearSse:: {})); } } } } Ok(()) } } pub(crate) struct SseLut4x3Factory {} impl Lut4x3Factory for SseLut4x3Factory { fn make_transform_4x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Sync + Send> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { if options.prefer_fixed_point && BIT_DEPTH < 16 { let q: f32 = if T::FINITE { ((1i32 << BIT_DEPTH as i32) - 1) as f32 } else { ((1i32 << 14i32) - 1) as f32 }; let lut = lut .chunks_exact(3) .map(|x| { SseAlignedI16x4([ (x[0] * q).round() as i16, (x[1] * q).round() as i16, (x[2] * q).round() as i16, 0, ]) }) .collect::>(); return match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut4To3SseQ0_15::< T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), _phantom: PhantomData, _phantom1: PhantomData, color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut4To3SseQ0_15::< T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), _phantom: PhantomData, _phantom1: PhantomData, color_space, is_linear, }), }; } let lut = lut .chunks_exact(3) .map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32])) .collect::>(); match options.barycentric_weight_scale { BarycentricWeightScale::Low => { Box::new( TransformLut4To3Sse:: { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }, ) } #[cfg(feature = "options")] BarycentricWeightScale::High => { Box::new( TransformLut4To3Sse:: { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }, ) } } } } moxcms-0.7.7/src/conversions/sse/lut4_to_3_q0_15.rs000064400000000000000000000212711046102023000201700ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::sse::interpolator_q0_15::*; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor}; use num_traits::AsPrimitive; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; use std::marker::PhantomData; pub(crate) struct TransformLut4To3SseQ0_15< T, U, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { pub(crate) lut: Vec, pub(crate) _phantom: PhantomData, pub(crate) _phantom1: PhantomData, pub(crate) interpolation_method: InterpolationMethod, pub(crate) weights: Box<[BarycentricWeight; BINS]>, pub(crate) color_space: DataColorSpace, pub(crate) is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut4To3SseQ0_15 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[target_feature(enable = "sse4.1")] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { unsafe { let cn = Layout::from(LAYOUT); let channels = cn.channels(); let grid_size = GRID_SIZE as i32; let grid_size3 = grid_size * grid_size * grid_size; let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32); let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); let v_max_scale = if T::FINITE { _mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16) } else { _mm_set1_epi16(((1i32 << 14i32) - 1) as i16) }; for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) { let c = <() as LutBarycentricReduction>::reduce::( src[0], ); let m = <() as LutBarycentricReduction>::reduce::( src[1], ); let y = <() as LutBarycentricReduction>::reduce::( src[2], ); let k = <() as LutBarycentricReduction>::reduce::( src[3], ); let k_weights = self.weights[k.as_()]; let w: i32 = k_weights.x; let w_n: i32 = k_weights.x_n; const Q: i16 = ((1i32 << 15) - 1) as i16; let t: i16 = k_weights.w; let t_n: i16 = Q - t; let table1 = &self.lut[(w * grid_size3) as usize..]; let table2 = &self.lut[(w_n * grid_size3) as usize..]; let a0 = interpolator .inter3_sse(table1, c.as_(), m.as_(), y.as_(), self.weights.as_slice()) .v; let b0 = interpolator .inter3_sse(table2, c.as_(), m.as_(), y.as_(), self.weights.as_slice()) .v; let hp = _mm_mulhrs_epi16(_mm_set1_epi16(t_n), a0); let v = _mm_add_epi16(hp, _mm_mulhrs_epi16(b0, _mm_set1_epi16(t))); if T::FINITE { let mut o = _mm_max_epi16(v, _mm_setzero_si128()); o = _mm_min_epi16(o, v_max_scale); let x = _mm_extract_epi16::<0>(o); let y = _mm_extract_epi16::<1>(o); let z = _mm_extract_epi16::<2>(o); dst[cn.r_i()] = (x as u32).as_(); dst[cn.g_i()] = (y as u32).as_(); dst[cn.b_i()] = (z as u32).as_(); } else { let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v)); r = _mm_mul_ps(r, f_value_scale); dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_(); dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_(); dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_(); } if channels == 4 { dst[cn.a_i()] = max_value; } } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut4To3SseQ0_15 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let cn = Layout::from(LAYOUT); let channels = cn.channels(); if src.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / 4; let dst_chunks = dst.len() / channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearSseQ0_15:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk( src, dst, Box::new(TetrahedralSseQ0_15:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk(src, dst, Box::new(PyramidalSseQ0_15:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk(src, dst, Box::new(PrismaticSseQ0_15:: {})); } InterpolationMethod::Linear => { self.transform_chunk(src, dst, Box::new(TrilinearSseQ0_15:: {})); } } } } Ok(()) } } moxcms-0.7.7/src/conversions/sse/mod.rs000064400000000000000000000041471046102023000162310ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ mod interpolator; mod interpolator_q0_15; mod lut4_to_3; mod lut4_to_3_q0_15; mod rgb_xyz; mod rgb_xyz_opt; mod rgb_xyz_q2_13; mod rgb_xyz_q2_13_opt; mod t_lut3_to_3; mod t_lut3_to_3_q0_15; pub(crate) use lut4_to_3::SseLut4x3Factory; pub(crate) use rgb_xyz::TransformShaperRgbSse; pub(crate) use rgb_xyz_opt::TransformShaperRgbOptSse; pub(crate) use rgb_xyz_q2_13::TransformShaperQ2_13Sse; pub(crate) use rgb_xyz_q2_13_opt::TransformShaperQ2_13OptSse; pub(crate) use t_lut3_to_3::SseLut3x3Factory; moxcms-0.7.7/src/conversions/sse/rgb_xyz.rs000064400000000000000000000135711046102023000171370ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::TransformMatrixShaper; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; #[repr(align(16), C)] pub(crate) struct SseAlignedU16(pub(crate) [u16; 8]); pub(crate) struct TransformShaperRgbSse< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, // deleting linear cap is in effective here const LINEAR_CAP: usize, > { // removing linear cap here is not worth it, at least in previous attempts pub(crate) profile: TransformMatrixShaper, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > TransformShaperRgbSse where u32: AsPrimitive, { #[target_feature(enable = "sse4.1")] unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary = SseAlignedU16([0; 8]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let scale = (self.gamma_lut - 1) as f32; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); unsafe { let m0 = _mm_setr_ps(t.v[0][0], t.v[0][1], t.v[0][2], 0f32); let m1 = _mm_setr_ps(t.v[1][0], t.v[1][1], t.v[1][2], 0f32); let m2 = _mm_setr_ps(t.v[2][0], t.v[2][1], t.v[2][2], 0f32); let zeros = _mm_setzero_ps(); let v_scale = _mm_set1_ps(scale); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let rp = &self.profile.r_linear[src[src_cn.r_i()]._as_usize()]; let gp = &self.profile.g_linear[src[src_cn.g_i()]._as_usize()]; let bp = &self.profile.b_linear[src[src_cn.b_i()]._as_usize()]; let mut r = _mm_load_ss(rp); let mut g = _mm_load_ss(gp); let mut b = _mm_load_ss(bp); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; r = _mm_shuffle_ps::<0>(r, r); g = _mm_shuffle_ps::<0>(g, g); b = _mm_shuffle_ps::<0>(b, b); let v0 = _mm_mul_ps(r, m0); let v1 = _mm_mul_ps(g, m1); let v2 = _mm_mul_ps(b, m2); let mut v = _mm_add_ps(_mm_add_ps(v0, v1), v2); v = _mm_max_ps(v, zeros); v = _mm_mul_ps(v, v_scale); v = _mm_min_ps(v, v_scale); let zx = _mm_cvtps_epi32(v); _mm_store_si128(temporary.0.as_mut_ptr() as *mut _, zx); dst[dst_cn.r_i()] = self.profile.r_gamma[temporary.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[temporary.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[temporary.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const LINEAR_CAP: usize, > TransformExecutor for TransformShaperRgbSse where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { self.transform_impl(src, dst) } } } moxcms-0.7.7/src/conversions/sse/rgb_xyz_opt.rs000064400000000000000000000137251046102023000200220ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::rgbxyz::TransformMatrixShaperOptimizedV; use crate::conversions::sse::rgb_xyz::SseAlignedU16; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; pub(crate) struct TransformShaperRgbOptSse< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > { pub(crate) profile: TransformMatrixShaperOptimizedV, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformShaperRgbOptSse where u32: AsPrimitive, { #[target_feature(enable = "sse4.1")] unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary = SseAlignedU16([0; 8]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let scale = (self.gamma_lut - 1) as f32; let max_colors: T = ((1 << self.bit_depth) - 1).as_(); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.linear.len() >= cap); } else { assert!(self.profile.linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let lut_lin = &self.profile.linear; unsafe { let m0 = _mm_setr_ps(t.v[0][0], t.v[0][1], t.v[0][2], 0f32); let m1 = _mm_setr_ps(t.v[1][0], t.v[1][1], t.v[1][2], 0f32); let m2 = _mm_setr_ps(t.v[2][0], t.v[2][1], t.v[2][2], 0f32); let zeros = _mm_setzero_ps(); let v_scale = _mm_set1_ps(scale); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let rp = lut_lin.get_unchecked(src[src_cn.r_i()]._as_usize()); let gp = lut_lin.get_unchecked(src[src_cn.g_i()]._as_usize()); let bp = lut_lin.get_unchecked(src[src_cn.b_i()]._as_usize()); let mut r = _mm_load_ss(rp); let mut g = _mm_load_ss(gp); let mut b = _mm_load_ss(bp); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; r = _mm_shuffle_ps::<0>(r, r); g = _mm_shuffle_ps::<0>(g, g); b = _mm_shuffle_ps::<0>(b, b); let v0 = _mm_mul_ps(r, m0); let v1 = _mm_mul_ps(g, m1); let v2 = _mm_mul_ps(b, m2); let mut v = _mm_add_ps(_mm_add_ps(v0, v1), v2); v = _mm_max_ps(v, zeros); v = _mm_mul_ps(v, v_scale); v = _mm_min_ps(v, v_scale); let zx = _mm_cvtps_epi32(v); _mm_store_si128(temporary.0.as_mut_ptr() as *mut _, zx); dst[dst_cn.r_i()] = self.profile.gamma[temporary.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Clone + Copy + 'static + PointeeSizeExpressible + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, > TransformExecutor for TransformShaperRgbOptSse where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { self.transform_impl(src, dst) } } } moxcms-0.7.7/src/conversions/sse/rgb_xyz_q2_13.rs000064400000000000000000000153661046102023000200500ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFp; use crate::conversions::sse::rgb_xyz::SseAlignedU16; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; pub(crate) struct TransformShaperQ2_13Sse< T: Copy, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > { pub(crate) profile: TransformMatrixShaperFp, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } #[inline(always)] pub(crate) unsafe fn _xmm_load_epi32(f: &i32) -> __m128i { let float_ref: &f32 = unsafe { &*(f as *const i32 as *const f32) }; unsafe { _mm_castps_si128(_mm_load_ss(float_ref)) } } impl< T: Copy + PointeeSizeExpressible + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformShaperQ2_13Sse where u32: AsPrimitive, { #[target_feature(enable = "sse4.1")] unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary = SseAlignedU16([0; 8]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let max_colors = ((1 << self.bit_depth) - 1).as_(); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.r_linear.len() >= cap); assert!(self.profile.g_linear.len() >= cap); assert!(self.profile.b_linear.len() >= cap); } else { assert!(self.profile.r_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); assert!(self.profile.g_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); assert!(self.profile.b_linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let r_lin = &self.profile.r_linear; let g_lin = &self.profile.g_linear; let b_lin = &self.profile.b_linear; unsafe { let m0 = _mm_setr_epi16( t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, ); let m2 = _mm_setr_epi16(t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0); let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes(); let rnd = _mm_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]])); let v_max_value = _mm_set1_epi32(self.gamma_lut as i32 - 1); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let rp = r_lin.get_unchecked(src[src_cn.r_i()]._as_usize()); let gp = g_lin.get_unchecked(src[src_cn.g_i()]._as_usize()); let bp = b_lin.get_unchecked(src[src_cn.b_i()]._as_usize()); let mut r = _xmm_load_epi32(rp); let mut g = _xmm_load_epi32(gp); let mut b = _xmm_load_epi32(bp); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; r = _mm_shuffle_epi32::<0>(r); g = _mm_shuffle_epi32::<0>(g); b = _mm_shuffle_epi32::<0>(b); g = _mm_slli_epi32::<16>(g); let zrg0 = _mm_or_si128(r, g); let zbz0 = _mm_or_si128(b, rnd); let v0 = _mm_madd_epi16(zrg0, m0); let v1 = _mm_madd_epi16(zbz0, m2); let mut v = _mm_add_epi32(v0, v1); v = _mm_srai_epi32::(v); v = _mm_max_epi32(v, _mm_setzero_si128()); v = _mm_min_epi32(v, v_max_value); _mm_store_si128(temporary.0.as_mut_ptr() as *mut _, v); dst[dst_cn.r_i()] = self.profile.r_gamma[temporary.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.g_gamma[temporary.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.b_gamma[temporary.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Copy + PointeeSizeExpressible + 'static + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformExecutor for TransformShaperQ2_13Sse where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { self.transform_impl(src, dst) } } } moxcms-0.7.7/src/conversions/sse/rgb_xyz_q2_13_opt.rs000064400000000000000000000144451046102023000207270ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFpOptVec; use crate::conversions::sse::rgb_xyz::SseAlignedU16; use crate::conversions::sse::rgb_xyz_q2_13::_xmm_load_epi32; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, Layout, TransformExecutor}; use num_traits::AsPrimitive; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; pub(crate) struct TransformShaperQ2_13OptSse< T: Copy, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > { pub(crate) profile: TransformMatrixShaperFpOptVec, pub(crate) bit_depth: usize, pub(crate) gamma_lut: usize, } impl< T: Copy + PointeeSizeExpressible + 'static, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformShaperQ2_13OptSse where u32: AsPrimitive, { #[target_feature(enable = "sse4.1")] unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let dst_cn = Layout::from(DST_LAYOUT); let src_channels = src_cn.channels(); let dst_channels = dst_cn.channels(); let mut temporary = SseAlignedU16([0; 8]); if src.len() / src_channels != dst.len() / dst_channels { return Err(CmsError::LaneSizeMismatch); } if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let t = self.profile.adaptation_matrix.transpose(); let max_colors = ((1 << self.bit_depth) - 1).as_(); unsafe { let m0 = _mm_setr_epi16( t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, ); let m2 = _mm_setr_epi16(t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0); let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes(); let rnd = _mm_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]])); let v_max_value = _mm_set1_epi32(self.gamma_lut as i32 - 1); // safety precondition for linearization table if T::FINITE { let cap = (1 << self.bit_depth) - 1; assert!(self.profile.linear.len() >= cap); } else { assert!(self.profile.linear.len() >= T::NOT_FINITE_LINEAR_TABLE_SIZE); } let lut_lin = &self.profile.linear; for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let rp = lut_lin.get_unchecked(src[src_cn.r_i()]._as_usize()); let gp = lut_lin.get_unchecked(src[src_cn.g_i()]._as_usize()); let bp = lut_lin.get_unchecked(src[src_cn.b_i()]._as_usize()); let mut r = _xmm_load_epi32(rp); let mut g = _xmm_load_epi32(gp); let mut b = _xmm_load_epi32(bp); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_colors }; r = _mm_shuffle_epi32::<0>(r); g = _mm_shuffle_epi32::<0>(g); b = _mm_shuffle_epi32::<0>(b); g = _mm_slli_epi32::<16>(g); let zrg0 = _mm_or_si128(r, g); let zbz0 = _mm_or_si128(b, rnd); let v0 = _mm_madd_epi16(zrg0, m0); let v1 = _mm_madd_epi16(zbz0, m2); let mut v = _mm_add_epi32(v0, v1); v = _mm_srai_epi32::(v); v = _mm_max_epi32(v, _mm_setzero_si128()); v = _mm_min_epi32(v, v_max_value); _mm_store_si128(temporary.0.as_mut_ptr() as *mut _, v); dst[dst_cn.r_i()] = self.profile.gamma[temporary.0[0] as usize]; dst[dst_cn.g_i()] = self.profile.gamma[temporary.0[2] as usize]; dst[dst_cn.b_i()] = self.profile.gamma[temporary.0[4] as usize]; if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } Ok(()) } } impl< T: Copy + PointeeSizeExpressible + 'static + Default, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const PRECISION: i32, > TransformExecutor for TransformShaperQ2_13OptSse where u32: AsPrimitive, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { unsafe { self.transform_impl(src, dst) } } } moxcms-0.7.7/src/conversions/sse/t_lut3_to_3.rs000064400000000000000000000305551046102023000176120ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::lut_transforms::Lut3x3Factory; use crate::conversions::sse::interpolator::*; use crate::conversions::sse::interpolator_q0_15::SseAlignedI16x4; use crate::conversions::sse::t_lut3_to_3_q0_15::TransformLut3x3SseQ0_15; use crate::transform::PointeeSizeExpressible; use crate::{ BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; use std::marker::PhantomData; struct TransformLut3x3Sse< T, U, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { lut: Vec, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: InterpolationMethod, weights: Box<[BarycentricWeight; BINS]>, color_space: DataColorSpace, is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut3x3Sse where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[target_feature(enable = "sse4.1")] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) }; let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let x = <() as LutBarycentricReduction>::reduce::( src[src_cn.r_i()], ); let y = <() as LutBarycentricReduction>::reduce::( src[src_cn.g_i()], ); let z = <() as LutBarycentricReduction>::reduce::( src[src_cn.b_i()], ); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_value }; let v = interpolator.inter3_sse( &self.lut, x.as_(), y.as_(), z.as_(), self.weights.as_slice(), ); if T::FINITE { unsafe { let mut r = _mm_mul_ps(v.v, value_scale); r = _mm_max_ps(r, _mm_setzero_ps()); r = _mm_min_ps(r, value_scale); let jvz = _mm_cvtps_epi32(r); let x = _mm_extract_epi32::<0>(jvz); let y = _mm_extract_epi32::<1>(jvz); let z = _mm_extract_epi32::<2>(jvz); dst[dst_cn.r_i()] = (x as u32).as_(); dst[dst_cn.g_i()] = (y as u32).as_(); dst[dst_cn.b_i()] = (z as u32).as_(); } } else { unsafe { dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v.v) as u32).as_(); dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v.v) as u32).as_(); dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v.v) as u32).as_(); } } if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut3x3Sse< T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS, > where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / src_channels; let dst_chunks = dst.len() / dst_channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearSse:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk(src, dst, Box::new(TetrahedralSse:: {})); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk(src, dst, Box::new(PyramidalSse:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk(src, dst, Box::new(PrismaticSse:: {})); } InterpolationMethod::Linear => { self.transform_chunk(src, dst, Box::new(TrilinearSse:: {})); } } } } Ok(()) } } pub(crate) struct SseLut3x3Factory {} impl Lut3x3Factory for SseLut3x3Factory { fn make_transform_3x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Sync + Send> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { if options.prefer_fixed_point && BIT_DEPTH < 16 { let q: f32 = if T::FINITE { ((1i32 << BIT_DEPTH as i32) - 1) as f32 } else { ((1i32 << 14i32) - 1) as f32 }; let lut = lut .chunks_exact(3) .map(|x| { SseAlignedI16x4([ (x[0] * q).round() as i16, (x[1] * q).round() as i16, (x[2] * q).round() as i16, 0, ]) }) .collect::>(); return match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut3x3SseQ0_15::< T, u8, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut3x3SseQ0_15::< T, u16, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), }; } let lut = lut .chunks_exact(3) .map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32])) .collect::>(); match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut3x3Sse::< T, u8, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut3x3Sse::< T, u16, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom2: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), } } } moxcms-0.7.7/src/conversions/sse/t_lut3_to_3_q0_15.rs000064400000000000000000000211371046102023000205130ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::BarycentricWeight; use crate::conversions::sse::interpolator_q0_15::*; use crate::transform::PointeeSizeExpressible; use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor}; use num_traits::AsPrimitive; #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; use std::marker::PhantomData; pub(crate) struct TransformLut3x3SseQ0_15< T, U, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { pub(crate) lut: Vec, pub(crate) _phantom: PhantomData, pub(crate) _phantom2: PhantomData, pub(crate) interpolation_method: InterpolationMethod, pub(crate) weights: Box<[BarycentricWeight; BINS]>, pub(crate) color_space: DataColorSpace, pub(crate) is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut3x3SseQ0_15< T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS, > where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[allow(unused_unsafe)] #[target_feature(enable = "sse4.1")] #[inline(never)] unsafe fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { unsafe { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32); let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); let v_max_scale = if T::FINITE { _mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16) } else { _mm_set1_epi16(((1i32 << 14i32) - 1) as i16) }; for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let x = <() as LutBarycentricReduction>::reduce::( src[src_cn.r_i()], ); let y = <() as LutBarycentricReduction>::reduce::( src[src_cn.g_i()], ); let z = <() as LutBarycentricReduction>::reduce::( src[src_cn.b_i()], ); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_value }; let v = interpolator.inter3_sse( &self.lut, x.as_(), y.as_(), z.as_(), self.weights.as_slice(), ); if T::FINITE { let mut o = _mm_max_epi16(v.v, _mm_setzero_si128()); o = _mm_min_epi16(o, v_max_scale); let x = _mm_extract_epi16::<0>(o); let y = _mm_extract_epi16::<1>(o); let z = _mm_extract_epi16::<2>(o); dst[dst_cn.r_i()] = (x as u32).as_(); dst[dst_cn.g_i()] = (y as u32).as_(); dst[dst_cn.b_i()] = (z as u32).as_(); } else { let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v.v)); r = _mm_mul_ps(r, f_value_scale); dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_(); dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_(); dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_(); } if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut3x3SseQ0_15< T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS, > where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / src_channels; let dst_chunks = dst.len() / dst_channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } unsafe { if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { self.transform_chunk(src, dst, Box::new(TrilinearSseQ0_15:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { self.transform_chunk( src, dst, Box::new(TetrahedralSseQ0_15:: {}), ); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { self.transform_chunk(src, dst, Box::new(PyramidalSseQ0_15:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { self.transform_chunk(src, dst, Box::new(PrismaticSseQ0_15:: {})); } InterpolationMethod::Linear => { self.transform_chunk(src, dst, Box::new(TrilinearSseQ0_15:: {})); } } } } Ok(()) } } moxcms-0.7.7/src/conversions/transform_lut3_to_3.rs000064400000000000000000000232211046102023000205600ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #![allow(dead_code)] use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::{BarycentricWeight, MultidimensionalInterpolation}; use crate::conversions::lut_transforms::Lut3x3Factory; use crate::transform::PointeeSizeExpressible; use crate::{ BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; use std::marker::PhantomData; pub(crate) struct TransformLut3x3< T, U, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { pub(crate) lut: Vec, pub(crate) _phantom: PhantomData, pub(crate) _phantom1: PhantomData, pub(crate) interpolation_method: InterpolationMethod, pub(crate) weights: Box<[BarycentricWeight; BINS]>, pub(crate) color_space: DataColorSpace, pub(crate) is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut3x3 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[inline(never)] fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); let value_scale = ((1 << BIT_DEPTH) - 1) as f32; let max_value = ((1u32 << BIT_DEPTH) - 1).as_(); for (src, dst) in src .chunks_exact(src_channels) .zip(dst.chunks_exact_mut(dst_channels)) { let x = <() as LutBarycentricReduction>::reduce::( src[src_cn.r_i()], ); let y = <() as LutBarycentricReduction>::reduce::( src[src_cn.g_i()], ); let z = <() as LutBarycentricReduction>::reduce::( src[src_cn.b_i()], ); let a = if src_channels == 4 { src[src_cn.a_i()] } else { max_value }; let v = interpolator.inter3( &self.lut, &self.weights[x.as_()], &self.weights[y.as_()], &self.weights[z.as_()], ); if T::FINITE { let r = v * value_scale + 0.5; dst[dst_cn.r_i()] = r.v[0].min(value_scale).max(0.).as_(); dst[dst_cn.g_i()] = r.v[1].min(value_scale).max(0.).as_(); dst[dst_cn.b_i()] = r.v[2].min(value_scale).max(0.).as_(); if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } else { dst[dst_cn.r_i()] = v.v[0].as_(); dst[dst_cn.g_i()] = v.v[1].as_(); dst[dst_cn.b_i()] = v.v[2].as_(); if dst_channels == 4 { dst[dst_cn.a_i()] = a; } } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut3x3 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let src_cn = Layout::from(SRC_LAYOUT); let src_channels = src_cn.channels(); let dst_cn = Layout::from(DST_LAYOUT); let dst_channels = dst_cn.channels(); if src.len() % src_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % dst_channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / src_channels; let dst_chunks = dst.len() / dst_channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { use crate::conversions::interpolator::Trilinear; self.transform_chunk(src, dst, Box::new(Trilinear:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { use crate::conversions::interpolator::Tetrahedral; self.transform_chunk(src, dst, Box::new(Tetrahedral:: {})); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { use crate::conversions::interpolator::Pyramidal; self.transform_chunk(src, dst, Box::new(Pyramidal:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { use crate::conversions::interpolator::Prismatic; self.transform_chunk(src, dst, Box::new(Prismatic:: {})); } InterpolationMethod::Linear => { use crate::conversions::interpolator::Trilinear; self.transform_chunk(src, dst, Box::new(Trilinear:: {})); } } } Ok(()) } } pub(crate) struct DefaultLut3x3Factory {} impl Lut3x3Factory for DefaultLut3x3Factory { fn make_transform_3x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const SRC_LAYOUT: u8, const DST_LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Send + Sync> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut3x3::< T, u8, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut3x3::< T, u16, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), } } } moxcms-0.7.7/src/conversions/transform_lut3_to_4.rs000064400000000000000000000236751046102023000205760ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::LutBarycentricReduction; use crate::conversions::interpolator::{BarycentricWeight, MultidimensionalInterpolation}; use crate::transform::PointeeSizeExpressible; use crate::{ BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor, TransformOptions, }; use num_traits::AsPrimitive; use std::marker::PhantomData; pub(crate) struct TransformLut3x4< T, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { pub(crate) lut: Vec, pub(crate) _phantom: PhantomData, pub(crate) _phantom1: PhantomData, pub(crate) interpolation_method: InterpolationMethod, pub(crate) weights: Box<[BarycentricWeight; BINS]>, pub(crate) color_space: DataColorSpace, pub(crate) is_linear: bool, } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut3x4 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[inline(never)] fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { let cn = Layout::from(LAYOUT); let channels = cn.channels(); let value_scale = ((1 << BIT_DEPTH) - 1) as f32; for (src, dst) in src.chunks_exact(channels).zip(dst.chunks_exact_mut(4)) { let x = <() as LutBarycentricReduction>::reduce::( src[cn.r_i()], ); let y = <() as LutBarycentricReduction>::reduce::( src[cn.g_i()], ); let z = <() as LutBarycentricReduction>::reduce::( src[cn.b_i()], ); let v = interpolator.inter4( &self.lut, &self.weights[x.as_()], &self.weights[y.as_()], &self.weights[z.as_()], ); if T::FINITE { let r = v * value_scale + 0.5; dst[0] = r.v[0].min(value_scale).max(0.).as_(); dst[1] = r.v[1].min(value_scale).max(0.).as_(); dst[2] = r.v[2].min(value_scale).max(0.).as_(); dst[3] = r.v[3].min(value_scale).max(0.).as_(); } else { dst[0] = v.v[0].as_(); dst[1] = v.v[1].as_(); dst[2] = v.v[2].as_(); dst[3] = v.v[3].as_(); } } } } impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut3x4 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let cn = Layout::from(LAYOUT); let channels = cn.channels(); if src.len() % channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / channels; let dst_chunks = dst.len() / 4; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { use crate::conversions::interpolator::Trilinear; self.transform_chunk(src, dst, Box::new(Trilinear:: {})); } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { use crate::conversions::interpolator::Tetrahedral; self.transform_chunk(src, dst, Box::new(Tetrahedral:: {})); } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { use crate::conversions::interpolator::Pyramidal; self.transform_chunk(src, dst, Box::new(Pyramidal:: {})); } #[cfg(feature = "options")] InterpolationMethod::Prism => { use crate::conversions::interpolator::Prismatic; self.transform_chunk(src, dst, Box::new(Prismatic:: {})); } InterpolationMethod::Linear => { use crate::conversions::interpolator::Trilinear; self.transform_chunk(src, dst, Box::new(Trilinear:: {})); } } } Ok(()) } } pub(crate) fn make_transform_3x4< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( layout: Layout, lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Sync + Send> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { match layout { Layout::Rgb => match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut3x4::< T, u8, { Layout::Rgb as u8 }, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut3x4::< T, u16, { Layout::Rgb as u8 }, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), }, Layout::Rgba => match options.barycentric_weight_scale { BarycentricWeightScale::Low => Box::new(TransformLut3x4::< T, u8, { Layout::Rgba as u8 }, GRID_SIZE, BIT_DEPTH, 256, 256, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }), #[cfg(feature = "options")] BarycentricWeightScale::High => Box::new(TransformLut3x4::< T, u16, { Layout::Rgba as u8 }, GRID_SIZE, BIT_DEPTH, 65536, 65536, > { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }), }, _ => unimplemented!(), } } moxcms-0.7.7/src/conversions/transform_lut4_to_3.rs000064400000000000000000000306131046102023000205640ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::interpolator::*; use crate::conversions::lut_transforms::Lut4x3Factory; use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd, m_clamp}; use crate::{ BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout, PointeeSizeExpressible, TransformExecutor, TransformOptions, Vector3f, }; use num_traits::AsPrimitive; use std::marker::PhantomData; pub(crate) trait Vector3fCmykLerp { fn interpolate(a: Vector3f, b: Vector3f, t: f32, scale: f32) -> Vector3f; } #[allow(unused)] #[derive(Copy, Clone, Default)] struct DefaultVector3fLerp; impl Vector3fCmykLerp for DefaultVector3fLerp { #[inline(always)] fn interpolate(a: Vector3f, b: Vector3f, t: f32, scale: f32) -> Vector3f { let t = Vector3f::from(t); let inter = a.neg_mla(a, t).mla(b, t); let mut new_vec = Vector3f::from(0.5).mla(inter, Vector3f::from(scale)); new_vec.v[0] = m_clamp(new_vec.v[0], 0.0, scale); new_vec.v[1] = m_clamp(new_vec.v[1], 0.0, scale); new_vec.v[2] = m_clamp(new_vec.v[2], 0.0, scale); new_vec } } #[allow(unused)] #[derive(Copy, Clone, Default)] pub(crate) struct NonFiniteVector3fLerp; impl Vector3fCmykLerp for NonFiniteVector3fLerp { #[inline(always)] fn interpolate(a: Vector3f, b: Vector3f, t: f32, _: f32) -> Vector3f { let t = Vector3f::from(t); a.neg_mla(a, t).mla(b, t) } } #[allow(unused)] #[derive(Copy, Clone, Default)] pub(crate) struct NonFiniteVector3fLerpUnbound; impl Vector3fCmykLerp for NonFiniteVector3fLerpUnbound { #[inline(always)] fn interpolate(a: Vector3f, b: Vector3f, t: f32, _: f32) -> Vector3f { let t = Vector3f::from(t); a.neg_mla(a, t).mla(b, t) } } #[allow(unused)] struct TransformLut4To3< T, U, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > { lut: Vec, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: InterpolationMethod, weights: Box<[BarycentricWeight; BINS]>, color_space: DataColorSpace, is_linear: bool, } #[allow(unused)] impl< T: Copy + AsPrimitive + Default, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformLut4To3 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { #[inline(never)] fn transform_chunk( &self, src: &[T], dst: &mut [T], interpolator: Box, ) { let cn = Layout::from(LAYOUT); let channels = cn.channels(); let grid_size = GRID_SIZE as i32; let grid_size3 = grid_size * grid_size * grid_size; let value_scale = ((1 << BIT_DEPTH) - 1) as f32; let max_value = ((1 << BIT_DEPTH) - 1u32).as_(); for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) { let c = <() as LutBarycentricReduction>::reduce::( src[0], ); let m = <() as LutBarycentricReduction>::reduce::( src[1], ); let y = <() as LutBarycentricReduction>::reduce::( src[2], ); let k = <() as LutBarycentricReduction>::reduce::( src[3], ); let k_weights = self.weights[k.as_()]; let w: i32 = k_weights.x; let w_n: i32 = k_weights.x_n; let t: f32 = k_weights.w; let table1 = &self.lut[(w * grid_size3 * 3) as usize..]; let table2 = &self.lut[(w_n * grid_size3 * 3) as usize..]; let r1 = interpolator.inter3( table1, &self.weights[c.as_()], &self.weights[m.as_()], &self.weights[y.as_()], ); let r2 = interpolator.inter3( table2, &self.weights[c.as_()], &self.weights[m.as_()], &self.weights[y.as_()], ); let r = Interpolation::interpolate(r1, r2, t, value_scale); dst[cn.r_i()] = r.v[0].as_(); dst[cn.g_i()] = r.v[1].as_(); dst[cn.b_i()] = r.v[2].as_(); if channels == 4 { dst[cn.a_i()] = max_value; } } } } #[allow(unused)] impl< T: Copy + AsPrimitive + Default + PointeeSizeExpressible, U: AsPrimitive, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, const BINS: usize, const BARYCENTRIC_BINS: usize, > TransformExecutor for TransformLut4To3 where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, { fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> { let cn = Layout::from(LAYOUT); let channels = cn.channels(); if src.len() % 4 != 0 { return Err(CmsError::LaneMultipleOfChannels); } if dst.len() % channels != 0 { return Err(CmsError::LaneMultipleOfChannels); } let src_chunks = src.len() / 4; let dst_chunks = dst.len() / channels; if src_chunks != dst_chunks { return Err(CmsError::LaneSizeMismatch); } if self.color_space == DataColorSpace::Lab || (self.is_linear && self.color_space == DataColorSpace::Rgb) || self.color_space == DataColorSpace::Xyz { if T::FINITE { self.transform_chunk::( src, dst, Box::new(Trilinear:: {}), ); } else { self.transform_chunk::( src, dst, Box::new(Trilinear:: {}), ); } } else { match self.interpolation_method { #[cfg(feature = "options")] InterpolationMethod::Tetrahedral => { if T::FINITE { self.transform_chunk::( src, dst, Box::new(Tetrahedral:: {}), ); } else { self.transform_chunk::( src, dst, Box::new(Tetrahedral:: {}), ); } } #[cfg(feature = "options")] InterpolationMethod::Pyramid => { if T::FINITE { self.transform_chunk::( src, dst, Box::new(Pyramidal:: {}), ); } else { self.transform_chunk::( src, dst, Box::new(Pyramidal:: {}), ); } } #[cfg(feature = "options")] InterpolationMethod::Prism => { if T::FINITE { self.transform_chunk::( src, dst, Box::new(Prismatic:: {}), ); } else { self.transform_chunk::( src, dst, Box::new(Prismatic:: {}), ); } } InterpolationMethod::Linear => { if T::FINITE { self.transform_chunk::( src, dst, Box::new(Trilinear:: {}), ); } else { self.transform_chunk::( src, dst, Box::new(Trilinear:: {}), ); } } } } Ok(()) } } #[allow(dead_code)] pub(crate) struct DefaultLut4x3Factory {} #[allow(dead_code)] impl Lut4x3Factory for DefaultLut4x3Factory { fn make_transform_4x3< T: Copy + AsPrimitive + Default + PointeeSizeExpressible + 'static + Send + Sync, const LAYOUT: u8, const GRID_SIZE: usize, const BIT_DEPTH: usize, >( lut: Vec, options: TransformOptions, color_space: DataColorSpace, is_linear: bool, ) -> Box + Sync + Send> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { match options.barycentric_weight_scale { BarycentricWeightScale::Low => { Box::new( TransformLut4To3:: { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_ranged_256::(), color_space, is_linear, }, ) } #[cfg(feature = "options")] BarycentricWeightScale::High => { Box::new( TransformLut4To3:: { lut, _phantom: PhantomData, _phantom1: PhantomData, interpolation_method: options.interpolation_method, weights: BarycentricWeight::::create_binned::(), color_space, is_linear, }, ) } } } } moxcms-0.7.7/src/conversions/xyz_lab.rs000064400000000000000000000047571046102023000163370ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::{CmsError, InPlaceStage, Lab, Xyz}; #[derive(Default)] pub(crate) struct StageLabToXyz {} impl InPlaceStage for StageLabToXyz { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { for dst in dst.chunks_exact_mut(3) { let lab = Lab::new(dst[0], dst[1], dst[2]); let xyz = lab.to_pcs_xyz(); dst[0] = xyz.x; dst[1] = xyz.y; dst[2] = xyz.z; } Ok(()) } } #[derive(Default)] pub(crate) struct StageXyzToLab {} impl InPlaceStage for StageXyzToLab { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> { for dst in dst.chunks_exact_mut(3) { let xyz = Xyz::new(dst[0], dst[1], dst[2]); let lab = Lab::from_pcs_xyz(xyz); dst[0] = lab.l; dst[1] = lab.a; dst[2] = lab.b; } Ok(()) } } moxcms-0.7.7/src/dat.rs000064400000000000000000000120531046102023000130530ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::CmsError; use crate::writer::write_u16_be; use std::time::{SystemTime, UNIX_EPOCH}; #[repr(C)] #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Default)] pub struct ColorDateTime { pub year: u16, pub month: u16, pub day_of_the_month: u16, pub hours: u16, pub minutes: u16, pub seconds: u16, } fn is_leap(year: i32) -> bool { (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) } fn days_in_month(year: i32, month: i32) -> i32 { match month { 1 => 31, 2 => { if is_leap(year) { 29 } else { 28 } } 3 => 31, 4 => 30, 5 => 31, 6 => 30, 7 => 31, 8 => 31, 9 => 30, 10 => 31, 11 => 30, 12 => 31, _ => unreachable!("Unknown month"), } } impl ColorDateTime { /// Parses slice for date time pub fn new_from_slice(slice: &[u8]) -> Result { if slice.len() != 12 { return Err(CmsError::InvalidProfile); } let year = u16::from_be_bytes([slice[0], slice[1]]); let month = u16::from_be_bytes([slice[2], slice[3]]); let day_of_the_month = u16::from_be_bytes([slice[4], slice[5]]); let hours = u16::from_be_bytes([slice[6], slice[7]]); let minutes = u16::from_be_bytes([slice[8], slice[9]]); let seconds = u16::from_be_bytes([slice[10], slice[11]]); Ok(ColorDateTime { year, month, day_of_the_month, hours, minutes, seconds, }) } /// Creates a new `ColorDateTime` from the current system time (UTC) pub fn now() -> Self { let now = match SystemTime::now().duration_since(UNIX_EPOCH) { Ok(v) => v, Err(_) => return Self::default(), }; let mut days = (now.as_secs() / 86_400) as i64; let secs_of_day = (now.as_secs() % 86_400) as i64; let mut year = 1970; loop { let year_days = if is_leap(year) { 366 } else { 365 }; if days >= year_days { days -= year_days; year += 1; } else { break; } } let mut month = 1; loop { let mdays = days_in_month(year, month); if days >= mdays as i64 { days -= mdays as i64; month += 1; } else { break; } } let day = days + 1; // days from zero based to 1 base let hour = secs_of_day / 3600; let min = (secs_of_day % 3600) / 60; let sec = secs_of_day % 60; Self { year: year as u16, month: month as u16, day_of_the_month: day as u16, hours: hour as u16, minutes: min as u16, seconds: sec as u16, } } #[inline] pub(crate) fn encode(&self, into: &mut Vec) { let year = self.year; let month = self.month; let day_of_the_month = self.day_of_the_month; let hours = self.hours; let minutes = self.minutes; let seconds = self.seconds; write_u16_be(into, year); write_u16_be(into, month); write_u16_be(into, day_of_the_month); write_u16_be(into, hours); write_u16_be(into, minutes); write_u16_be(into, seconds); } } moxcms-0.7.7/src/defaults.rs000064400000000000000000000503311046102023000141130ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::chad::BRADFORD_D; use crate::cicp::create_rec709_parametric; use crate::trc::{ToneReprCurve, curve_from_gamma}; use crate::{ CicpColorPrimaries, CicpProfile, ColorPrimaries, ColorProfile, DataColorSpace, LocalizableString, Matrix3d, MatrixCoefficients, ProfileClass, ProfileText, RenderingIntent, TransferCharacteristics, XyY, }; use pxfm::{copysignk, exp, floor, pow}; /// From lcms: `cmsWhitePointFromTemp` /// tempK must be >= 4000. and <= 25000. /// Invalid values of tempK will return /// (x,y,Y) = (-1.0, -1.0, -1.0) /// similar to argyll: `icx_DTEMP2XYZ()` const fn white_point_from_temperature(temp_k: i32) -> XyY { let mut white_point = XyY { x: 0., y: 0., yb: 0., }; // No optimization provided. let temp_k = temp_k as f64; // Square let temp_k2 = temp_k * temp_k; // Cube let temp_k3 = temp_k2 * temp_k; // For correlated color temperature (T) between 4000K and 7000K: let x = if temp_k > 4000.0 && temp_k <= 7000.0 { -4.6070 * (1E9 / temp_k3) + 2.9678 * (1E6 / temp_k2) + 0.09911 * (1E3 / temp_k) + 0.244063 } else if temp_k > 7000.0 && temp_k <= 25000.0 { -2.0064 * (1E9 / temp_k3) + 1.9018 * (1E6 / temp_k2) + 0.24748 * (1E3 / temp_k) + 0.237040 } else { // or for correlated color temperature (T) between 7000K and 25000K: // Invalid tempK white_point.x = -1.0; white_point.y = -1.0; white_point.yb = -1.0; debug_assert!(false, "invalid temp"); return white_point; }; // Obtain y(x) let y = -3.000 * (x * x) + 2.870 * x - 0.275; // wave factors (not used, but here for futures extensions) // let M1 = (-1.3515 - 1.7703*x + 5.9114 *y)/(0.0241 + 0.2562*x - 0.7341*y); // let M2 = (0.0300 - 31.4424*x + 30.0717*y)/(0.0241 + 0.2562*x - 0.7341*y); // Fill white_point struct white_point.x = x; white_point.y = y; white_point.yb = 1.0; white_point } pub const WHITE_POINT_D50: XyY = white_point_from_temperature(5003); pub const WHITE_POINT_D60: XyY = white_point_from_temperature(6000); pub const WHITE_POINT_D65: XyY = white_point_from_temperature(6504); pub const WHITE_POINT_DCI_P3: XyY = white_point_from_temperature(6300); // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-F.pdf // Perceptual Quantization / SMPTE standard ST.2084 #[inline] const fn pq_curve(x: f64) -> f64 { const M1: f64 = 2610.0 / 16384.0; const M2: f64 = (2523.0 / 4096.0) * 128.0; const C1: f64 = 3424.0 / 4096.0; const C2: f64 = (2413.0 / 4096.0) * 32.0; const C3: f64 = (2392.0 / 4096.0) * 32.0; if x == 0.0 { return 0.0; } let sign = x; let x = x.abs(); let xpo = pow(x, 1.0 / M2); let num = (xpo - C1).max(0.0); let den = C2 - C3 * xpo; let res = pow(num / den, 1.0 / M1); copysignk(res, sign) } pub(crate) const fn build_trc_table_pq() -> [u16; 4096] { let mut table = [0u16; 4096]; const NUM_ENTRIES: usize = 4096; let mut i = 0usize; while i < NUM_ENTRIES { let x: f64 = i as f64 / (NUM_ENTRIES - 1) as f64; let y: f64 = pq_curve(x); let mut output: f64; output = y * 65535.0 + 0.5; if output > 65535.0 { output = 65535.0 } if output < 0.0 { output = 0.0 } table[i] = floor(output) as u16; i += 1; } table } pub(crate) const fn build_trc_table_hlg() -> [u16; 4096] { let mut table = [0u16; 4096]; const NUM_ENTRIES: usize = 4096; let mut i = 0usize; while i < NUM_ENTRIES { let x: f64 = i as f64 / (NUM_ENTRIES - 1) as f64; let y: f64 = hlg_curve(x); let mut output: f64; output = y * 65535.0 + 0.5; if output > 65535.0 { output = 65535.0 } if output < 0.0 { output = 0.0 } table[i] = floor(output) as u16; i += 1; } table } // https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-F.pdf // Hybrid Log-Gamma const fn hlg_curve(x: f64) -> f64 { const BETA: f64 = 0.04; const RA: f64 = 5.591816309728916; // 1.0 / A where A = 0.17883277 const B: f64 = 0.28466892; // 1.0 - 4.0 * A const C: f64 = 0.5599107295; // 0,5 –aln(4a) let e = (x * (1.0 - BETA) + BETA).max(0.0); if e == 0.0 { return 0.0; } let sign = e.abs(); let res = if e <= 0.5 { e * e / 3.0 } else { (exp((e - C) * RA) + B) / 12.0 }; copysignk(res, sign) } /// Perceptual Quantizer Lookup table pub const PQ_LUT_TABLE: [u16; 4096] = build_trc_table_pq(); /// Hybrid Log Gamma Lookup table pub const HLG_LUT_TABLE: [u16; 4096] = build_trc_table_hlg(); impl ColorProfile { const SRGB_COLORANTS: Matrix3d = ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::BT_709); const DISPLAY_P3_COLORANTS: Matrix3d = ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::SMPTE_432); const ADOBE_RGB_COLORANTS: Matrix3d = ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::ADOBE_RGB); const DCI_P3_COLORANTS: Matrix3d = ColorProfile::colorants_matrix(WHITE_POINT_DCI_P3, ColorPrimaries::DCI_P3); const PRO_PHOTO_RGB_COLORANTS: Matrix3d = ColorProfile::colorants_matrix(WHITE_POINT_D50, ColorPrimaries::PRO_PHOTO_RGB); const BT2020_COLORANTS: Matrix3d = ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::BT_2020); const ACES_2065_1_COLORANTS: Matrix3d = ColorProfile::colorants_matrix(WHITE_POINT_D60, ColorPrimaries::ACES_2065_1); const ACES_CG_COLORANTS: Matrix3d = ColorProfile::colorants_matrix(WHITE_POINT_D60, ColorPrimaries::ACES_CG); #[inline] fn basic_rgb_profile() -> ColorProfile { ColorProfile { profile_class: ProfileClass::DisplayDevice, rendering_intent: RenderingIntent::Perceptual, color_space: DataColorSpace::Rgb, pcs: DataColorSpace::Xyz, chromatic_adaptation: Some(BRADFORD_D), white_point: WHITE_POINT_D50.to_xyzd(), ..Default::default() } } /// Creates new profile from CICP pub fn new_from_cicp(cicp_color_primaries: CicpProfile) -> ColorProfile { let mut basic = ColorProfile::basic_rgb_profile(); basic.update_rgb_colorimetry_from_cicp(cicp_color_primaries); basic } /// Creates new sRGB profile pub fn new_srgb() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::SRGB_COLORANTS); let curve = ToneReprCurve::Parametric(vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd()); profile.cicp = Some(CicpProfile { color_primaries: CicpColorPrimaries::Bt709, transfer_characteristics: TransferCharacteristics::Srgb, matrix_coefficients: MatrixCoefficients::Bt709, full_range: false, }); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "sRGB IEC61966-2.1".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new Adobe RGB profile pub fn new_adobe_rgb() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::ADOBE_RGB_COLORANTS); let curve = curve_from_gamma(2.19921875f32); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd()); profile.white_point = WHITE_POINT_D50.to_xyzd(); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Adobe RGB 1998".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new Display P3 profile pub fn new_display_p3() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::DISPLAY_P3_COLORANTS); let curve = ToneReprCurve::Parametric(vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd()); profile.cicp = Some(CicpProfile { color_primaries: CicpColorPrimaries::Smpte431, transfer_characteristics: TransferCharacteristics::Srgb, matrix_coefficients: MatrixCoefficients::Bt709, full_range: false, }); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Display P3".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new Display P3 PQ profile pub fn new_display_p3_pq() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::DISPLAY_P3_COLORANTS); let curve = ToneReprCurve::Lut(PQ_LUT_TABLE.to_vec()); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd()); profile.cicp = Some(CicpProfile { color_primaries: CicpColorPrimaries::Smpte431, transfer_characteristics: TransferCharacteristics::Smpte2084, matrix_coefficients: MatrixCoefficients::Bt709, full_range: false, }); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Display P3 PQ".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new DCI P3 profile pub fn new_dci_p3() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::DCI_P3_COLORANTS); let curve = curve_from_gamma(2.6f32); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_DCI_P3.to_xyzd()); profile.cicp = Some(CicpProfile { color_primaries: CicpColorPrimaries::Smpte432, transfer_characteristics: TransferCharacteristics::Srgb, matrix_coefficients: MatrixCoefficients::Bt709, full_range: false, }); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "DCI P3".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new ProPhoto RGB profile pub fn new_pro_photo_rgb() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::PRO_PHOTO_RGB_COLORANTS); let curve = curve_from_gamma(1.8f32); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D50.to_xyzd()); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "ProPhoto RGB".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new Bt.2020 profile pub fn new_bt2020() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::BT2020_COLORANTS); let curve = ToneReprCurve::Parametric(create_rec709_parametric().to_vec()); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd()); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Rec.2020".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new Bt.2020 PQ profile pub fn new_bt2020_pq() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::BT2020_COLORANTS); let curve = ToneReprCurve::Lut(PQ_LUT_TABLE.to_vec()); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd()); profile.cicp = Some(CicpProfile { color_primaries: CicpColorPrimaries::Bt2020, transfer_characteristics: TransferCharacteristics::Smpte2084, matrix_coefficients: MatrixCoefficients::Bt709, full_range: false, }); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Rec.2020 PQ".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new Bt.2020 HLG profile pub fn new_bt2020_hlg() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::BT2020_COLORANTS); let curve = ToneReprCurve::Lut(HLG_LUT_TABLE.to_vec()); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd()); profile.cicp = Some(CicpProfile { color_primaries: CicpColorPrimaries::Bt2020, transfer_characteristics: TransferCharacteristics::Hlg, matrix_coefficients: MatrixCoefficients::Bt709, full_range: false, }); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Rec.2020 HLG".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new Monochrome profile pub fn new_gray_with_gamma(gamma: f32) -> ColorProfile { ColorProfile { gray_trc: Some(curve_from_gamma(gamma)), profile_class: ProfileClass::DisplayDevice, rendering_intent: RenderingIntent::Perceptual, color_space: DataColorSpace::Gray, media_white_point: Some(WHITE_POINT_D65.to_xyzd()), white_point: WHITE_POINT_D50.to_xyzd(), chromatic_adaptation: Some(BRADFORD_D), copyright: Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])), ..Default::default() } } /// Creates new ACES 2065-1/AP0 profile pub fn new_aces_aces_2065_1_linear() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::ACES_2065_1_COLORANTS); let curve = ToneReprCurve::Lut(vec![]); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D60.to_xyzd()); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "ACES 2065-1".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } /// Creates new ACEScg profile pub fn new_aces_cg_linear() -> ColorProfile { let mut profile = ColorProfile::basic_rgb_profile(); profile.update_colorants(ColorProfile::ACES_CG_COLORANTS); let curve = ToneReprCurve::Lut(vec![]); profile.red_trc = Some(curve.clone()); profile.blue_trc = Some(curve.clone()); profile.green_trc = Some(curve); profile.media_white_point = Some(WHITE_POINT_D60.to_xyzd()); profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "ACEScg/AP1".to_string(), )])); profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new( "en".to_string(), "US".to_string(), "Public Domain".to_string(), )])); profile } } moxcms-0.7.7/src/dt_ucs.rs000064400000000000000000000245201046102023000135660ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::Xyz; use crate::mlaf::mlaf; use pxfm::{f_atan2f, f_powf, f_sincosf}; /// Darktable UCS JCH ( Darktable Uniform Color Space ) #[derive(Copy, Clone, PartialOrd, PartialEq, Debug)] pub struct DtUchJch { pub j: f32, pub c: f32, pub h: f32, } /// Darktable UCS HSB ( Darktable Uniform Color Space ) #[derive(Copy, Clone, PartialOrd, PartialEq, Debug)] pub struct DtUchHsb { pub h: f32, pub s: f32, pub b: f32, } /// Darktable HCB ( Darktable Uniform Color Space ) #[derive(Copy, Clone, PartialOrd, PartialEq, Debug)] pub struct DtUchHcb { pub h: f32, pub c: f32, pub b: f32, } const DT_UCS_L_STAR_RANGE: f32 = 2.098883786377; #[inline] fn y_to_dt_ucs_l_star(y: f32) -> f32 { let y_hat = f_powf(y, 0.631651345306265); DT_UCS_L_STAR_RANGE * y_hat / (y_hat + 1.12426773749357) } #[inline] fn dt_ucs_l_star_to_y(x: f32) -> f32 { f_powf( 1.12426773749357 * x / (DT_UCS_L_STAR_RANGE - x), 1.5831518565279648, ) } const L_WHITE: f32 = 0.98805060; #[inline] fn dt_ucs_luv_to_ucs_jch( l_star: f32, l_white: f32, u_star_prime: f32, v_star_prime: f32, ) -> DtUchJch { let m2: f32 = mlaf(u_star_prime * u_star_prime, v_star_prime, v_star_prime); // square of colorfulness M // should be JCH[0] = powf(L_star / L_white), cz) but we treat only the case where cz = 1 let j = l_star / l_white; let c = 15.932993652962535 * f_powf(l_star, 0.6523997524738018) * f_powf(m2, 0.6007557017508491) / l_white; let h = f_atan2f(v_star_prime, u_star_prime); DtUchJch::new(j, c, h) } #[inline] fn dt_ucs_xy_to_uv(x: f32, y: f32) -> (f32, f32) { const X_C: [f32; 3] = [-0.783941002840055, 0.745273540913283, 0.318707282433486]; const Y_C: [f32; 3] = [0.277512987809202, -0.205375866083878, 2.16743692732158]; const BIAS: [f32; 3] = [0.153836578598858, -0.165478376301988, 0.291320554395942]; let mut u_c = mlaf(mlaf(BIAS[0], Y_C[0], y), X_C[0], x); let mut v_c = mlaf(mlaf(BIAS[1], Y_C[1], y), X_C[1], x); let d_c = mlaf(mlaf(BIAS[2], Y_C[2], y), X_C[2], x); let div = if d_c >= 0.0 { d_c.max(f32::MIN) } else { d_c.min(-f32::MIN) }; u_c /= div; v_c /= div; const STAR_C: [f32; 2] = [1.39656225667, 1.4513954287]; const STAR_HF_C: [f32; 2] = [1.49217352929, 1.52488637914]; let u_star = STAR_C[0] * u_c / (u_c.abs() + STAR_HF_C[0]); let v_star = STAR_C[1] * v_c / (v_c.abs() + STAR_HF_C[1]); // The following is equivalent to a 2D matrix product let u_star_prime = mlaf(-1.124983854323892 * u_star, -0.980483721769325, v_star); let v_star_prime = mlaf(1.86323315098672 * u_star, 1.971853092390862, v_star); (u_star_prime, v_star_prime) } impl DtUchJch { #[inline] pub fn new(j: f32, c: f32, h: f32) -> DtUchJch { DtUchJch { j, c, h } } #[inline] pub fn from_xyz(xyz: Xyz) -> DtUchJch { DtUchJch::from_xyy(xyz.to_xyy()) } #[inline] pub fn to_xyz(&self) -> Xyz { let xyy = self.to_xyy(); Xyz::from_xyy(xyy) } #[inline] pub fn from_xyy(xyy: [f32; 3]) -> DtUchJch { let l_star = y_to_dt_ucs_l_star(xyy[2]); // let l_white = y_to_dt_ucs_l_star(1.); let (u_star_prime, v_star_prime) = dt_ucs_xy_to_uv(xyy[0], xyy[1]); dt_ucs_luv_to_ucs_jch(l_star, L_WHITE, u_star_prime, v_star_prime) } #[inline] pub fn to_xyy(&self) -> [f32; 3] { // let l_white: f32 = y_to_dt_ucs_l_star(1.0); let l_star = (self.j * L_WHITE).max(0.0).min(2.09885); let m = if l_star != 0. { f_powf( self.c * L_WHITE / (15.932993652962535 * f_powf(l_star, 0.6523997524738018)), 0.8322850678616855, ) } else { 0. }; let sin_cos_h = f_sincosf(self.h); let u_star_prime = m * sin_cos_h.1; let v_star_prime = m * sin_cos_h.0; // The following is equivalent to a 2D matrix product let u_star = mlaf( -5.037522385190711 * u_star_prime, -2.504856328185843, v_star_prime, ); let v_star = mlaf( 4.760029407436461 * u_star_prime, 2.874012963239247, v_star_prime, ); const F: [f32; 2] = [1.39656225667, 1.4513954287]; const HF: [f32; 2] = [1.49217352929, 1.52488637914]; let u_c = -HF[0] * u_star / (u_star.abs() - F[0]); let v_c = -HF[1] * v_star / (v_star.abs() - F[1]); const U_C: [f32; 3] = [0.167171472114775, -0.150959086409163, 0.940254742367256]; const V_C: [f32; 3] = [0.141299802443708, -0.155185060382272, 1.000000000000000]; const BIAS: [f32; 3] = [ -0.00801531300850582, -0.00843312433578007, -0.0256325967652889, ]; let mut x = mlaf(mlaf(BIAS[0], V_C[0], v_c), U_C[0], u_c); let mut y = mlaf(mlaf(BIAS[1], V_C[1], v_c), U_C[1], u_c); let d = mlaf(mlaf(BIAS[2], V_C[2], v_c), U_C[2], u_c); let div = if d >= 0.0 { d.max(f32::MIN) } else { d.min(-f32::MIN) }; x /= div; y /= div; let yb = dt_ucs_l_star_to_y(l_star); [x, y, yb] } } impl DtUchHsb { #[inline] pub fn new(h: f32, s: f32, b: f32) -> DtUchHsb { DtUchHsb { h, s, b } } #[inline] pub fn from_jch(jch: DtUchJch) -> DtUchHsb { let b = jch.j * (f_powf(jch.c, 1.33654221029386) + 1.); let s = if b > 0. { jch.c / b } else { 0. }; let h = jch.h; DtUchHsb::new(h, s, b) } #[inline] pub fn to_jch(&self) -> DtUchJch { let h = self.h; let c = self.s * self.b; let j = self.b / (f_powf(c, 1.33654221029386) + 1.); DtUchJch::new(j, c, h) } } impl DtUchHcb { #[inline] pub fn new(h: f32, c: f32, b: f32) -> DtUchHcb { DtUchHcb { h, c, b } } #[inline] pub fn from_jch(jch: DtUchJch) -> DtUchHcb { let b = jch.j * (f_powf(jch.c, 1.33654221029386) + 1.); let c = jch.c; let h = jch.h; DtUchHcb::new(h, c, b) } #[inline] pub fn to_jch(&self) -> DtUchJch { let h = self.h; let c = self.c; let j = self.b / (f_powf(self.c, 1.33654221029386) + 1.); DtUchJch::new(j, c, h) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_darktable_ucs_jch() { let xyy = [0.4, 0.2, 0.5]; let ucs = DtUchJch::from_xyy(xyy); let xyy_rev = ucs.to_xyy(); assert!( (xyy[0] - xyy_rev[0]).abs() < 1e-5, "Expected {}, got {}", xyy[0], xyy_rev[0] ); assert!( (xyy[1] - xyy_rev[1]).abs() < 1e-5, "Expected {}, got {}", xyy[1], xyy_rev[1] ); assert!( (xyy[2] - xyy_rev[2]).abs() < 1e-5, "Expected {}, got {}", xyy[2], xyy_rev[2] ); } #[test] fn test_darktable_hsb() { let jch = DtUchJch::new(0.3, 0.6, 0.4); let hsb = DtUchHsb::from_jch(jch); let r_jch = hsb.to_jch(); assert!( (r_jch.j - jch.j).abs() < 1e-5, "Expected {}, got {}", jch.j, r_jch.j ); assert!( (r_jch.c - jch.c).abs() < 1e-5, "Expected {}, got {}", jch.c, r_jch.c ); assert!( (r_jch.h - jch.h).abs() < 1e-5, "Expected {}, got {}", jch.h, r_jch.h ); } #[test] fn test_darktable_hcb() { let jch = DtUchJch::new(0.3, 0.6, 0.4); let hcb = DtUchHcb::from_jch(jch); let r_jch = hcb.to_jch(); assert!( (r_jch.j - jch.j).abs() < 1e-5, "Expected {}, got {}", jch.j, r_jch.j ); assert!( (r_jch.c - jch.c).abs() < 1e-5, "Expected {}, got {}", jch.c, r_jch.c ); assert!( (r_jch.h - jch.h).abs() < 1e-5, "Expected {}, got {}", jch.h, r_jch.h ); } #[test] fn test_darktable_ucs_jch_from_xyz() { let xyz = Xyz::new(0.4, 0.2, 0.5); let ucs = DtUchJch::from_xyz(xyz); let xyy_rev = ucs.to_xyz(); assert!( (xyz.x - xyz.x).abs() < 1e-5, "Expected {}, got {}", xyz.x, xyy_rev.x ); assert!( (xyz.y - xyz.y).abs() < 1e-5, "Expected {}, got {}", xyz.y, xyy_rev.y ); assert!( (xyz.z - xyz.z).abs() < 1e-5, "Expected {}, got {}", xyz.z, xyy_rev.z ); } } moxcms-0.7.7/src/err.rs000064400000000000000000000136641046102023000131040ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::RenderingIntent; use std::error::Error; use std::fmt::Display; #[derive(Debug, Copy, Clone, PartialOrd, PartialEq)] pub struct MalformedSize { pub size: usize, pub expected: usize, } #[derive(Debug, Clone, PartialOrd, PartialEq)] pub enum CmsError { LaneSizeMismatch, LaneMultipleOfChannels, InvalidProfile, InvalidTrcCurve, InvalidCicp, CurveLutIsTooLarge, ParametricCurveZeroDivision, InvalidRenderingIntent, DivisionByZero, UnsupportedColorPrimaries(u8), UnsupportedTrc(u8), InvalidLayout, UnsupportedProfileConnection, BuildTransferFunction, UnsupportedChannelConfiguration, UnknownTag(u32), UnknownTagTypeDefinition(u32), UnsupportedLutRenderingIntent(RenderingIntent), InvalidAtoBLut, OverflowingError, LUTTablesInvalidKind, MalformedClut(MalformedSize), MalformedCurveLutTable(MalformedSize), InvalidInksCountForProfile, MalformedTrcCurve(String), OutOfMemory(usize), } impl Display for CmsError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { CmsError::LaneSizeMismatch => f.write_str("Lanes length must match"), CmsError::LaneMultipleOfChannels => { f.write_str("Lane length must not be multiple of channel count") } CmsError::InvalidProfile => f.write_str("Invalid ICC profile"), CmsError::InvalidCicp => { f.write_str("Invalid Code Independent point (CICP) in ICC profile") } CmsError::InvalidTrcCurve => f.write_str("Invalid TRC curve"), CmsError::CurveLutIsTooLarge => f.write_str("Curve Lut is too large"), CmsError::ParametricCurveZeroDivision => { f.write_str("Parametric Curve definition causes division by zero") } CmsError::InvalidRenderingIntent => f.write_str("Invalid rendering intent"), CmsError::DivisionByZero => f.write_str("Division by zero"), CmsError::UnsupportedColorPrimaries(value) => { f.write_fmt(format_args!("Unsupported color primaries, {value}")) } CmsError::UnsupportedTrc(value) => f.write_fmt(format_args!("Unsupported TRC {value}")), CmsError::InvalidLayout => f.write_str("Invalid layout"), CmsError::UnsupportedProfileConnection => f.write_str("Unsupported profile connection"), CmsError::BuildTransferFunction => f.write_str("Can't reconstruct transfer function"), CmsError::UnsupportedChannelConfiguration => { f.write_str("Can't reconstruct channel configuration") } CmsError::UnknownTag(t) => f.write_fmt(format_args!("Unknown tag: {t}")), CmsError::UnknownTagTypeDefinition(t) => { f.write_fmt(format_args!("Unknown tag type definition: {t}")) } CmsError::UnsupportedLutRenderingIntent(intent) => f.write_fmt(format_args!( "Can't find LUT for rendering intent: {intent:?}" )), CmsError::InvalidAtoBLut => f.write_str("Invalid A to B Lut"), CmsError::OverflowingError => { f.write_str("Overflowing was happen, that is not allowed") } CmsError::LUTTablesInvalidKind => f.write_str("All LUT curves must have same kind"), CmsError::MalformedClut(size) => { f.write_fmt(format_args!("Invalid CLUT size: {size:?}")) } CmsError::MalformedCurveLutTable(size) => { f.write_fmt(format_args!("Malformed curve LUT size: {size:?}")) } CmsError::InvalidInksCountForProfile => { f.write_str("Invalid inks count for profile was provided") } CmsError::MalformedTrcCurve(str) => f.write_str(str), CmsError::OutOfMemory(capacity) => f.write_fmt(format_args!( "There is no enough memory to allocate {capacity} bytes" )), } } } impl Error for CmsError {} macro_rules! try_vec { () => { Vec::new() }; ($elem:expr; $n:expr) => {{ let mut v = Vec::new(); v.try_reserve_exact($n) .map_err(|_| crate::err::CmsError::OutOfMemory($n))?; v.resize($n, $elem); v }}; } pub(crate) use try_vec; moxcms-0.7.7/src/gamma.rs000064400000000000000000001022711046102023000133670ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::mlaf::{fmla, mlaf}; use crate::transform::PointeeSizeExpressible; use crate::{Rgb, TransferCharacteristics}; use num_traits::AsPrimitive; use pxfm::{ dirty_powf, f_exp, f_exp10, f_exp10f, f_expf, f_log, f_log10, f_log10f, f_logf, f_pow, f_powf, }; #[inline] /// Linear transfer function for sRGB fn srgb_to_linear(gamma: f64) -> f64 { if gamma < 0f64 { 0f64 } else if gamma < 12.92f64 * 0.0030412825601275209f64 { gamma * (1f64 / 12.92f64) } else if gamma < 1.0f64 { f_pow( (gamma + 0.0550107189475866f64) / 1.0550107189475866f64, 2.4f64, ) } else { 1.0f64 } } #[inline] /// Linear transfer function for sRGB fn srgb_to_linearf_extended(gamma: f32) -> f32 { if gamma < 12.92 * 0.0030412825601275209 { gamma * (1. / 12.92f32) } else { dirty_powf((gamma + 0.0550107189475866) / 1.0550107189475866, 2.4) } } #[inline] /// Gamma transfer function for sRGB fn srgb_from_linear(linear: f64) -> f64 { if linear < 0.0f64 { 0.0f64 } else if linear < 0.0030412825601275209f64 { linear * 12.92f64 } else if linear < 1.0f64 { fmla( 1.0550107189475866f64, f_pow(linear, 1.0f64 / 2.4f64), -0.0550107189475866f64, ) } else { 1.0f64 } } #[inline] /// Gamma transfer function for sRGB pub(crate) fn srgb_from_linear_extended(linear: f32) -> f32 { if linear < 0.0030412825601275209f32 { linear * 12.92f32 } else { fmla( 1.0550107189475866f32, dirty_powf(linear, 1.0f32 / 2.4f32), -0.0550107189475866f32, ) } } #[inline] /// Linear transfer function for Rec.709 fn rec709_to_linear(gamma: f64) -> f64 { if gamma < 0.0f64 { 0.0f64 } else if gamma < 4.5f64 * 0.018053968510807f64 { gamma * (1f64 / 4.5f64) } else if gamma < 1.0f64 { f_pow( (gamma + 0.09929682680944f64) / 1.09929682680944f64, 1.0f64 / 0.45f64, ) } else { 1.0f64 } } #[inline] /// Linear transfer function for Rec.709 fn rec709_to_linearf_extended(gamma: f32) -> f32 { if gamma < 4.5 * 0.018053968510807 { gamma * (1. / 4.5) } else { f_powf((gamma + 0.09929682680944) / 1.09929682680944, 1.0 / 0.45) } } #[inline] /// Gamma transfer function for Rec.709 fn rec709_from_linear(linear: f64) -> f64 { if linear < 0.0f64 { 0.0f64 } else if linear < 0.018053968510807f64 { linear * 4.5f64 } else if linear < 1.0f64 { fmla( 1.09929682680944f64, f_pow(linear, 0.45f64), -0.09929682680944f64, ) } else { 1.0f64 } } #[inline] /// Gamma transfer function for Rec.709 fn rec709_from_linearf_extended(linear: f32) -> f32 { if linear < 0.018053968510807 { linear * 4.5 } else { fmla( 1.09929682680944, dirty_powf(linear, 0.45), -0.09929682680944, ) } } #[inline] /// Linear transfer function for Smpte 428 pub(crate) fn smpte428_to_linear(gamma: f64) -> f64 { const SCALE: f64 = 1. / 0.91655527974030934f64; f_pow(gamma.max(0.).min(1f64), 2.6f64) * SCALE } #[inline] /// Linear transfer function for Smpte 428 pub(crate) fn smpte428_to_linearf_extended(gamma: f32) -> f32 { const SCALE: f32 = 1. / 0.91655527974030934; dirty_powf(gamma.max(0.), 2.6) * SCALE } #[inline] /// Gamma transfer function for Smpte 428 fn smpte428_from_linear(linear: f64) -> f64 { const POWER_VALUE: f64 = 1.0f64 / 2.6f64; f_pow(0.91655527974030934f64 * linear.max(0.), POWER_VALUE) } #[inline] /// Gamma transfer function for Smpte 428 fn smpte428_from_linearf(linear: f32) -> f32 { const POWER_VALUE: f32 = 1.0 / 2.6; dirty_powf(0.91655527974030934 * linear.max(0.), POWER_VALUE) } #[inline] /// Linear transfer function for Smpte 240 pub(crate) fn smpte240_to_linear(gamma: f64) -> f64 { if gamma < 0.0 { 0.0 } else if gamma < 4.0 * 0.022821585529445 { gamma / 4.0 } else if gamma < 1.0 { f_pow((gamma + 0.111572195921731) / 1.111572195921731, 1.0 / 0.45) } else { 1.0 } } #[inline] /// Linear transfer function for Smpte 240 pub(crate) fn smpte240_to_linearf_extended(gamma: f32) -> f32 { if gamma < 4.0 * 0.022821585529445 { gamma / 4.0 } else { dirty_powf((gamma + 0.111572195921731) / 1.111572195921731, 1.0 / 0.45) } } #[inline] /// Gamma transfer function for Smpte 240 fn smpte240_from_linear(linear: f64) -> f64 { if linear < 0.0 { 0.0 } else if linear < 0.022821585529445 { linear * 4.0 } else if linear < 1.0 { fmla(1.111572195921731, f_pow(linear, 0.45), -0.111572195921731) } else { 1.0 } } #[inline] /// Gamma transfer function for Smpte 240 fn smpte240_from_linearf_extended(linear: f32) -> f32 { if linear < 0.022821585529445 { linear * 4.0 } else { fmla(1.111572195921731, f_powf(linear, 0.45), -0.111572195921731) } } #[inline] /// Gamma transfer function for Log100 fn log100_from_linear(linear: f64) -> f64 { if linear <= 0.01f64 { 0. } else { 1. + f_log10(linear.min(1.)) / 2.0 } } #[inline] /// Gamma transfer function for Log100 fn log100_from_linearf(linear: f32) -> f32 { if linear <= 0.01 { 0. } else { 1. + f_log10f(linear.min(1.)) / 2.0 } } #[inline] /// Linear transfer function for Log100 pub(crate) fn log100_to_linear(gamma: f64) -> f64 { // The function is non-bijective so choose the middle of [0, 0.00316227766f]. const MID_INTERVAL: f64 = 0.01 / 2.; if gamma <= 0. { MID_INTERVAL } else { f_exp10(2. * (gamma.min(1.) - 1.)) } } #[inline] /// Linear transfer function for Log100 pub(crate) fn log100_to_linearf(gamma: f32) -> f32 { // The function is non-bijective so choose the middle of [0, 0.00316227766f]. const MID_INTERVAL: f32 = 0.01 / 2.; if gamma <= 0. { MID_INTERVAL } else { f_exp10f(2. * (gamma.min(1.) - 1.)) } } #[inline] /// Linear transfer function for Log100Sqrt10 pub(crate) fn log100_sqrt10_to_linear(gamma: f64) -> f64 { // The function is non-bijective so choose the middle of [0, 0.00316227766f]. const MID_INTERVAL: f64 = 0.00316227766 / 2.; if gamma <= 0. { MID_INTERVAL } else { f_exp10(2.5 * (gamma.min(1.) - 1.)) } } #[inline] /// Linear transfer function for Log100Sqrt10 pub(crate) fn log100_sqrt10_to_linearf(gamma: f32) -> f32 { // The function is non-bijective so choose the middle of [0, 0.00316227766f]. const MID_INTERVAL: f32 = 0.00316227766 / 2.; if gamma <= 0. { MID_INTERVAL } else { f_exp10f(2.5 * (gamma.min(1.) - 1.)) } } #[inline] /// Gamma transfer function for Log100Sqrt10 fn log100_sqrt10_from_linear(linear: f64) -> f64 { if linear <= 0.00316227766 { 0.0 } else { 1.0 + f_log10(linear.min(1.)) / 2.5 } } #[inline] /// Gamma transfer function for Log100Sqrt10 fn log100_sqrt10_from_linearf(linear: f32) -> f32 { if linear <= 0.00316227766 { 0.0 } else { 1.0 + f_log10f(linear.min(1.)) / 2.5 } } #[inline] /// Gamma transfer function for Bt.1361 fn bt1361_from_linear(linear: f64) -> f64 { if linear < -0.25 { -0.25 } else if linear < 0.0 { fmla( -0.27482420670236, f_pow(-4.0 * linear, 0.45), 0.02482420670236, ) } else if linear < 0.018053968510807 { linear * 4.5 } else if linear < 1.0 { fmla(1.09929682680944, f_pow(linear, 0.45), -0.09929682680944) } else { 1.0 } } #[inline] /// Gamma transfer function for Bt.1361 fn bt1361_from_linearf(linear: f32) -> f32 { if linear < -0.25 { -0.25 } else if linear < 0.0 { fmla( -0.27482420670236, dirty_powf(-4.0 * linear, 0.45), 0.02482420670236, ) } else if linear < 0.018053968510807 { linear * 4.5 } else if linear < 1.0 { fmla( 1.09929682680944, dirty_powf(linear, 0.45), -0.09929682680944, ) } else { 1.0 } } #[inline] /// Linear transfer function for Bt.1361 pub(crate) fn bt1361_to_linear(gamma: f64) -> f64 { if gamma < -0.25f64 { -0.25f64 } else if gamma < 0.0f64 { f_pow( (gamma - 0.02482420670236f64) / -0.27482420670236f64, 1.0f64 / 0.45f64, ) / -4.0f64 } else if gamma < 4.5 * 0.018053968510807 { gamma / 4.5 } else if gamma < 1.0 { f_pow((gamma + 0.09929682680944) / 1.09929682680944, 1.0 / 0.45) } else { 1.0f64 } } #[inline] /// Linear transfer function for Bt.1361 fn bt1361_to_linearf(gamma: f32) -> f32 { if gamma < -0.25 { -0.25 } else if gamma < 0.0 { dirty_powf((gamma - 0.02482420670236) / -0.27482420670236, 1.0 / 0.45) / -4.0 } else if gamma < 4.5 * 0.018053968510807 { gamma / 4.5 } else if gamma < 1.0 { dirty_powf((gamma + 0.09929682680944) / 1.09929682680944, 1.0 / 0.45) } else { 1.0 } } #[inline(always)] /// Pure gamma transfer function for gamma 2.2 fn pure_gamma_function(x: f64, gamma: f64) -> f64 { if x <= 0f64 { 0f64 } else if x >= 1f64 { 1f64 } else { f_pow(x, gamma) } } #[inline(always)] /// Pure gamma transfer function for gamma 2.2 fn pure_gamma_function_f(x: f32, gamma: f32) -> f32 { if x <= 0. { 0. } else { dirty_powf(x, gamma) } } #[inline] pub(crate) fn iec61966_to_linear(gamma: f64) -> f64 { if gamma < -4.5f64 * 0.018053968510807f64 { f_pow( (-gamma + 0.09929682680944f64) / -1.09929682680944f64, 1.0 / 0.45, ) } else if gamma < 4.5f64 * 0.018053968510807f64 { gamma / 4.5 } else { f_pow( (gamma + 0.09929682680944f64) / 1.09929682680944f64, 1.0 / 0.45, ) } } #[inline] fn iec61966_to_linearf(gamma: f32) -> f32 { if gamma < -4.5 * 0.018053968510807 { dirty_powf((-gamma + 0.09929682680944) / -1.09929682680944, 1.0 / 0.45) } else if gamma < 4.5 * 0.018053968510807 { gamma / 4.5 } else { dirty_powf((gamma + 0.09929682680944) / 1.09929682680944, 1.0 / 0.45) } } #[inline] fn iec61966_from_linear(v: f64) -> f64 { if v < -0.018053968510807f64 { fmla(-1.09929682680944f64, f_pow(-v, 0.45), 0.09929682680944f64) } else if v < 0.018053968510807f64 { v * 4.5f64 } else { fmla(1.09929682680944f64, f_pow(v, 0.45), -0.09929682680944f64) } } #[inline] fn iec61966_from_linearf(v: f32) -> f32 { if v < -0.018053968510807 { fmla(-1.09929682680944, dirty_powf(-v, 0.45), 0.09929682680944) } else if v < 0.018053968510807 { v * 4.5 } else { fmla(1.09929682680944, dirty_powf(v, 0.45), -0.09929682680944) } } #[inline] /// Pure gamma transfer function for gamma 2.2 fn gamma2p2_from_linear(linear: f64) -> f64 { pure_gamma_function(linear, 1f64 / 2.2f64) } #[inline] /// Pure gamma transfer function for gamma 2.2 fn gamma2p2_from_linear_f(linear: f32) -> f32 { pure_gamma_function_f(linear, 1. / 2.2) } #[inline] /// Linear transfer function for gamma 2.2 fn gamma2p2_to_linear(gamma: f64) -> f64 { pure_gamma_function(gamma, 2.2f64) } #[inline] /// Linear transfer function for gamma 2.2 fn gamma2p2_to_linear_f(gamma: f32) -> f32 { pure_gamma_function_f(gamma, 2.2) } #[inline] /// Pure gamma transfer function for gamma 2.8 fn gamma2p8_from_linear(linear: f64) -> f64 { pure_gamma_function(linear, 1f64 / 2.8f64) } #[inline] /// Pure gamma transfer function for gamma 2.8 fn gamma2p8_from_linear_f(linear: f32) -> f32 { pure_gamma_function_f(linear, 1. / 2.8) } #[inline] /// Linear transfer function for gamma 2.8 fn gamma2p8_to_linear(gamma: f64) -> f64 { pure_gamma_function(gamma, 2.8f64) } #[inline] /// Linear transfer function for gamma 2.8 fn gamma2p8_to_linear_f(gamma: f32) -> f32 { pure_gamma_function_f(gamma, 2.8) } #[inline] /// Linear transfer function for PQ pub(crate) fn pq_to_linear(gamma: f64) -> f64 { if gamma > 0.0 { let pow_gamma = f_pow(gamma, 1.0 / 78.84375); let num = (pow_gamma - 0.8359375).max(0.); let den = mlaf(18.8515625, -18.6875, pow_gamma).max(f64::MIN); f_pow(num / den, 1.0 / 0.1593017578125) } else { 0.0 } } #[inline] /// Linear transfer function for PQ pub(crate) fn pq_to_linearf(gamma: f32) -> f32 { if gamma > 0.0 { let pow_gamma = f_powf(gamma, 1.0 / 78.84375); let num = (pow_gamma - 0.8359375).max(0.); let den = mlaf(18.8515625, -18.6875, pow_gamma).max(f32::MIN); f_powf(num / den, 1.0 / 0.1593017578125) } else { 0.0 } } #[inline] /// Gamma transfer function for PQ fn pq_from_linear(linear: f64) -> f64 { if linear > 0.0 { let linear = linear.clamp(0., 1.); let pow_linear = f_pow(linear, 0.1593017578125); let num = fmla(0.1640625, pow_linear, -0.1640625); let den = mlaf(1.0, 18.6875, pow_linear); f_pow(1.0 + num / den, 78.84375) } else { 0.0 } } #[inline] /// Gamma transfer function for PQ pub(crate) fn pq_from_linearf(linear: f32) -> f32 { if linear > 0.0 { let linear = linear.max(0.); let pow_linear = f_powf(linear, 0.1593017578125); let num = fmla(0.1640625, pow_linear, -0.1640625); let den = mlaf(1.0, 18.6875, pow_linear); f_powf(1.0 + num / den, 78.84375) } else { 0.0 } } #[inline] /// Linear transfer function for HLG pub(crate) fn hlg_to_linear(gamma: f64) -> f64 { if gamma < 0.0 { return 0.0; } if gamma <= 0.5 { f_pow((gamma * gamma) * (1.0 / 3.0), 1.2) } else { f_pow( (f_exp((gamma - 0.55991073) / 0.17883277) + 0.28466892) / 12.0, 1.2, ) } } #[inline] /// Linear transfer function for HLG pub(crate) fn hlg_to_linearf(gamma: f32) -> f32 { if gamma < 0.0 { return 0.0; } if gamma <= 0.5 { f_powf((gamma * gamma) * (1.0 / 3.0), 1.2) } else { f_powf( (f_expf((gamma - 0.55991073) / 0.17883277) + 0.28466892) / 12.0, 1.2, ) } } #[inline] /// Gamma transfer function for HLG fn hlg_from_linear(linear: f64) -> f64 { // Scale from extended SDR range to [0.0, 1.0]. let mut linear = linear.clamp(0., 1.); // Inverse OOTF followed by OETF see Table 5 and Note 5i in ITU-R BT.2100-2 page 7-8. linear = f_pow(linear, 1.0 / 1.2); if linear < 0.0 { 0.0 } else if linear <= (1.0 / 12.0) { (3.0 * linear).sqrt() } else { fmla( 0.17883277, f_log(fmla(12.0, linear, -0.28466892)), 0.55991073, ) } } #[inline] /// Gamma transfer function for HLG fn hlg_from_linearf(linear: f32) -> f32 { // Scale from extended SDR range to [0.0, 1.0]. let mut linear = linear.max(0.); // Inverse OOTF followed by OETF see Table 5 and Note 5i in ITU-R BT.2100-2 page 7-8. linear = f_powf(linear, 1.0 / 1.2); if linear < 0.0 { 0.0 } else if linear <= (1.0 / 12.0) { (3.0 * linear).sqrt() } else { 0.17883277 * f_logf(12.0 * linear - 0.28466892) + 0.55991073 } } #[inline] fn trc_linear(v: f64) -> f64 { v.min(1.).max(0.) } impl TransferCharacteristics { #[inline] pub fn linearize(self, v: f64) -> f64 { match self { TransferCharacteristics::Reserved => 0f64, TransferCharacteristics::Bt709 | TransferCharacteristics::Bt601 | TransferCharacteristics::Bt202010bit | TransferCharacteristics::Bt202012bit => rec709_to_linear(v), TransferCharacteristics::Unspecified => 0f64, TransferCharacteristics::Bt470M => gamma2p2_to_linear(v), TransferCharacteristics::Bt470Bg => gamma2p8_to_linear(v), TransferCharacteristics::Smpte240 => smpte240_to_linear(v), TransferCharacteristics::Linear => trc_linear(v), TransferCharacteristics::Log100 => log100_to_linear(v), TransferCharacteristics::Log100sqrt10 => log100_sqrt10_to_linear(v), TransferCharacteristics::Iec61966 => iec61966_to_linear(v), TransferCharacteristics::Bt1361 => bt1361_to_linear(v), TransferCharacteristics::Srgb => srgb_to_linear(v), TransferCharacteristics::Smpte2084 => pq_to_linear(v), TransferCharacteristics::Smpte428 => smpte428_to_linear(v), TransferCharacteristics::Hlg => hlg_to_linear(v), } } #[inline] pub fn gamma(self, v: f64) -> f64 { match self { TransferCharacteristics::Reserved => 0f64, TransferCharacteristics::Bt709 | TransferCharacteristics::Bt601 | TransferCharacteristics::Bt202010bit | TransferCharacteristics::Bt202012bit => rec709_from_linear(v), TransferCharacteristics::Unspecified => 0f64, TransferCharacteristics::Bt470M => gamma2p2_from_linear(v), TransferCharacteristics::Bt470Bg => gamma2p8_from_linear(v), TransferCharacteristics::Smpte240 => smpte240_from_linear(v), TransferCharacteristics::Linear => trc_linear(v), TransferCharacteristics::Log100 => log100_from_linear(v), TransferCharacteristics::Log100sqrt10 => log100_sqrt10_from_linear(v), TransferCharacteristics::Iec61966 => iec61966_from_linear(v), TransferCharacteristics::Bt1361 => bt1361_from_linear(v), TransferCharacteristics::Srgb => srgb_from_linear(v), TransferCharacteristics::Smpte2084 => pq_from_linear(v), TransferCharacteristics::Smpte428 => smpte428_from_linear(v), TransferCharacteristics::Hlg => hlg_from_linear(v), } } pub(crate) fn extended_gamma_tristimulus(self) -> fn(Rgb) -> Rgb { match self { TransferCharacteristics::Reserved => |x| Rgb::new(x.r, x.g, x.b), TransferCharacteristics::Bt709 | TransferCharacteristics::Bt601 | TransferCharacteristics::Bt202010bit | TransferCharacteristics::Bt202012bit => |x| { Rgb::new( rec709_from_linearf_extended(x.r), rec709_from_linearf_extended(x.g), rec709_from_linearf_extended(x.b), ) }, TransferCharacteristics::Unspecified => |x| Rgb::new(x.r, x.g, x.b), TransferCharacteristics::Bt470M => |x| { Rgb::new( gamma2p2_from_linear_f(x.r), gamma2p2_from_linear_f(x.g), gamma2p2_from_linear_f(x.b), ) }, TransferCharacteristics::Bt470Bg => |x| { Rgb::new( gamma2p8_from_linear_f(x.r), gamma2p8_from_linear_f(x.g), gamma2p8_from_linear_f(x.b), ) }, TransferCharacteristics::Smpte240 => |x| { Rgb::new( smpte240_from_linearf_extended(x.r), smpte240_from_linearf_extended(x.g), smpte240_from_linearf_extended(x.b), ) }, TransferCharacteristics::Linear => |x| Rgb::new(x.r, x.g, x.b), TransferCharacteristics::Log100 => |x| { Rgb::new( log100_from_linearf(x.r), log100_from_linearf(x.g), log100_from_linearf(x.b), ) }, TransferCharacteristics::Log100sqrt10 => |x| { Rgb::new( log100_sqrt10_from_linearf(x.r), log100_sqrt10_from_linearf(x.g), log100_sqrt10_from_linearf(x.b), ) }, TransferCharacteristics::Iec61966 => |x| { Rgb::new( iec61966_from_linearf(x.r), iec61966_from_linearf(x.g), iec61966_from_linearf(x.b), ) }, TransferCharacteristics::Bt1361 => |x| { Rgb::new( bt1361_from_linearf(x.r), bt1361_from_linearf(x.g), bt1361_from_linearf(x.b), ) }, TransferCharacteristics::Srgb => |x| { Rgb::new( srgb_from_linear_extended(x.r), srgb_from_linear_extended(x.g), srgb_from_linear_extended(x.b), ) }, TransferCharacteristics::Smpte2084 => |x| { Rgb::new( pq_from_linearf(x.r), pq_from_linearf(x.g), pq_from_linearf(x.b), ) }, TransferCharacteristics::Smpte428 => |x| { Rgb::new( smpte428_from_linearf(x.r), smpte428_from_linearf(x.g), smpte428_from_linearf(x.b), ) }, TransferCharacteristics::Hlg => |x| { Rgb::new( hlg_from_linearf(x.r), hlg_from_linearf(x.g), hlg_from_linearf(x.b), ) }, } } pub(crate) fn extended_gamma_single(self) -> fn(f32) -> f32 { match self { TransferCharacteristics::Reserved => |x| x, TransferCharacteristics::Bt709 | TransferCharacteristics::Bt601 | TransferCharacteristics::Bt202010bit | TransferCharacteristics::Bt202012bit => |x| rec709_from_linearf_extended(x), TransferCharacteristics::Unspecified => |x| x, TransferCharacteristics::Bt470M => |x| gamma2p2_from_linear_f(x), TransferCharacteristics::Bt470Bg => |x| gamma2p8_from_linear_f(x), TransferCharacteristics::Smpte240 => |x| smpte240_from_linearf_extended(x), TransferCharacteristics::Linear => |x| x, TransferCharacteristics::Log100 => |x| log100_from_linearf(x), TransferCharacteristics::Log100sqrt10 => |x| log100_sqrt10_from_linearf(x), TransferCharacteristics::Iec61966 => |x| iec61966_from_linearf(x), TransferCharacteristics::Bt1361 => |x| bt1361_from_linearf(x), TransferCharacteristics::Srgb => |x| srgb_from_linear_extended(x), TransferCharacteristics::Smpte2084 => |x| pq_from_linearf(x), TransferCharacteristics::Smpte428 => |x| smpte428_from_linearf(x), TransferCharacteristics::Hlg => |x| hlg_from_linearf(x), } } pub(crate) fn extended_linear_tristimulus(self) -> fn(Rgb) -> Rgb { match self { TransferCharacteristics::Reserved => |x| Rgb::new(x.r, x.g, x.b), TransferCharacteristics::Bt709 | TransferCharacteristics::Bt601 | TransferCharacteristics::Bt202010bit | TransferCharacteristics::Bt202012bit => |x| { Rgb::new( rec709_to_linearf_extended(x.r), rec709_to_linearf_extended(x.g), rec709_to_linearf_extended(x.b), ) }, TransferCharacteristics::Unspecified => |x| Rgb::new(x.r, x.g, x.b), TransferCharacteristics::Bt470M => |x| { Rgb::new( gamma2p2_to_linear_f(x.r), gamma2p2_to_linear_f(x.g), gamma2p2_to_linear_f(x.b), ) }, TransferCharacteristics::Bt470Bg => |x| { Rgb::new( gamma2p8_to_linear_f(x.r), gamma2p8_to_linear_f(x.g), gamma2p8_to_linear_f(x.b), ) }, TransferCharacteristics::Smpte240 => |x| { Rgb::new( smpte240_to_linearf_extended(x.r), smpte240_to_linearf_extended(x.g), smpte240_to_linearf_extended(x.b), ) }, TransferCharacteristics::Linear => |x| Rgb::new(x.r, x.g, x.b), TransferCharacteristics::Log100 => |x| { Rgb::new( log100_to_linearf(x.r), log100_to_linearf(x.g), log100_to_linearf(x.b), ) }, TransferCharacteristics::Log100sqrt10 => |x| { Rgb::new( log100_sqrt10_to_linearf(x.r), log100_sqrt10_to_linearf(x.g), log100_sqrt10_to_linearf(x.b), ) }, TransferCharacteristics::Iec61966 => |x| { Rgb::new( iec61966_to_linearf(x.r), iec61966_to_linearf(x.g), iec61966_to_linearf(x.b), ) }, TransferCharacteristics::Bt1361 => |x| { Rgb::new( bt1361_to_linearf(x.r), bt1361_to_linearf(x.g), bt1361_to_linearf(x.b), ) }, TransferCharacteristics::Srgb => |x| { Rgb::new( srgb_to_linearf_extended(x.r), srgb_to_linearf_extended(x.g), srgb_to_linearf_extended(x.b), ) }, TransferCharacteristics::Smpte2084 => { |x| Rgb::new(pq_to_linearf(x.r), pq_to_linearf(x.g), pq_to_linearf(x.b)) } TransferCharacteristics::Smpte428 => |x| { Rgb::new( smpte428_to_linearf_extended(x.r), smpte428_to_linearf_extended(x.g), smpte428_to_linearf_extended(x.b), ) }, TransferCharacteristics::Hlg => |x| { Rgb::new( hlg_to_linearf(x.r), hlg_to_linearf(x.g), hlg_to_linearf(x.b), ) }, } } pub(crate) fn extended_linear_single(self) -> fn(f32) -> f32 { match self { TransferCharacteristics::Reserved => |x| x, TransferCharacteristics::Bt709 | TransferCharacteristics::Bt601 | TransferCharacteristics::Bt202010bit | TransferCharacteristics::Bt202012bit => |x| rec709_to_linearf_extended(x), TransferCharacteristics::Unspecified => |x| x, TransferCharacteristics::Bt470M => |x| gamma2p2_to_linear_f(x), TransferCharacteristics::Bt470Bg => |x| gamma2p8_to_linear_f(x), TransferCharacteristics::Smpte240 => |x| smpte240_to_linearf_extended(x), TransferCharacteristics::Linear => |x| x, TransferCharacteristics::Log100 => |x| log100_to_linearf(x), TransferCharacteristics::Log100sqrt10 => |x| log100_sqrt10_to_linearf(x), TransferCharacteristics::Iec61966 => |x| iec61966_to_linearf(x), TransferCharacteristics::Bt1361 => |x| bt1361_to_linearf(x), TransferCharacteristics::Srgb => |x| srgb_to_linearf_extended(x), TransferCharacteristics::Smpte2084 => |x| pq_to_linearf(x), TransferCharacteristics::Smpte428 => |x| smpte428_to_linearf_extended(x), TransferCharacteristics::Hlg => |x| hlg_to_linearf(x), } } pub(crate) fn make_linear_table< T: PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( &self, ) -> Box<[f32; N]> { let mut gamma_table = Box::new([0f32; N]); let max_value = if T::FINITE { (1 << BIT_DEPTH) - 1 } else { T::NOT_FINITE_LINEAR_TABLE_SIZE - 1 }; let cap_values = if T::FINITE { (1u32 << BIT_DEPTH) as usize } else { T::NOT_FINITE_LINEAR_TABLE_SIZE }; assert!(cap_values <= N, "Invalid lut table construction"); let scale_value = 1f64 / max_value as f64; for (i, g) in gamma_table.iter_mut().enumerate().take(cap_values) { *g = self.linearize(i as f64 * scale_value) as f32; } gamma_table } pub(crate) fn make_gamma_table< T: Default + Copy + 'static + PointeeSizeExpressible, const BUCKET: usize, const N: usize, >( &self, bit_depth: usize, ) -> Box<[T; BUCKET]> where f32: AsPrimitive, { let mut table = Box::new([T::default(); BUCKET]); let max_range = 1f64 / (N - 1) as f64; let max_value = ((1 << bit_depth) - 1) as f64; if T::FINITE { for (v, output) in table.iter_mut().take(N).enumerate() { *output = ((self.gamma(v as f64 * max_range) * max_value) as f32) .round() .as_(); } } else { for (v, output) in table.iter_mut().take(N).enumerate() { *output = (self.gamma(v as f64 * max_range) as f32).as_(); } } table } } #[cfg(test)] mod tests { use super::*; #[test] fn srgb_test() { let srgb_0 = srgb_to_linear(0.5); let srgb_1 = srgb_from_linear(srgb_0); assert!((0.5 - srgb_1).abs() < 1e-9f64); } #[test] fn log100_sqrt10_test() { let srgb_0 = log100_sqrt10_to_linear(0.5); let srgb_1 = log100_sqrt10_from_linear(srgb_0); assert_eq!(0.5, srgb_1); } #[test] fn log100_test() { let srgb_0 = log100_to_linear(0.5); let srgb_1 = log100_from_linear(srgb_0); assert_eq!(0.5, srgb_1); } #[test] fn iec61966_test() { let srgb_0 = iec61966_to_linear(0.5); let srgb_1 = iec61966_from_linear(srgb_0); assert!((0.5 - srgb_1).abs() < 1e-9f64); } #[test] fn smpte240_test() { let srgb_0 = smpte240_to_linear(0.5); let srgb_1 = smpte240_from_linear(srgb_0); assert!((0.5 - srgb_1).abs() < 1e-9f64); } #[test] fn smpte428_test() { let srgb_0 = smpte428_to_linear(0.5); let srgb_1 = smpte428_from_linear(srgb_0); assert!((0.5 - srgb_1).abs() < 1e-9f64); } #[test] fn rec709_test() { let srgb_0 = rec709_to_linear(0.5); let srgb_1 = rec709_from_linear(srgb_0); assert!((0.5 - srgb_1).abs() < 1e-9f64); } #[test] fn rec709f_test() { let srgb_0 = rec709_to_linearf_extended(0.5); let srgb_1 = rec709_from_linearf_extended(srgb_0); assert!((0.5 - srgb_1).abs() < 1e-5f32); } #[test] fn srgbf_test() { let srgb_0 = srgb_to_linearf_extended(0.5); let srgb_1 = srgb_from_linear_extended(srgb_0); assert!((0.5 - srgb_1).abs() < 1e-5f32); } #[test] fn hlg_test() { let z0 = hlg_to_linear(0.5); let z1 = hlg_from_linear(z0); assert!((0.5 - z1).abs() < 1e-5f64); } #[test] fn pq_test() { let z0 = pq_to_linear(0.5); let z1 = pq_from_linear(z0); assert!((0.5 - z1).abs() < 1e-5f64); } #[test] fn pqf_test() { let z0 = pq_to_linearf(0.5); let z1 = pq_from_linearf(z0); assert!((0.5 - z1).abs() < 1e-5f32); } #[test] fn iec_test() { let z0 = iec61966_to_linear(0.5); let z1 = iec61966_from_linear(z0); assert!((0.5 - z1).abs() < 1e-5f64); } #[test] fn bt1361_test() { let z0 = bt1361_to_linear(0.5); let z1 = bt1361_from_linear(z0); assert!((0.5 - z1).abs() < 1e-5f64); } } moxcms-0.7.7/src/gamut.rs000064400000000000000000000054711046102023000134260ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::Rgb; #[inline] fn filmlike_clip_rgb_tone(r: &mut f32, g: &mut f32, b: &mut f32, l: f32) { let new_r = r.min(l); let new_b = b.min(l); let new_g = new_b + ((new_r - new_b) * (*g - *b) / (*r - *b)); *r = new_r; *g = new_g; *b = new_b; } /// Soft clipping out-of-bounds values in S-curve /// /// Works only on highlights, negative values are skipped #[inline] pub fn filmlike_clip(rgb: Rgb) -> Rgb { const L: f32 = 1.; let mut rgb = rgb; if rgb.r >= rgb.g { if rgb.g > rgb.b { filmlike_clip_rgb_tone(&mut rgb.r, &mut rgb.g, &mut rgb.b, L); } else if rgb.b > rgb.r { filmlike_clip_rgb_tone(&mut rgb.b, &mut rgb.r, &mut rgb.g, L); } else if rgb.b > rgb.g { filmlike_clip_rgb_tone(&mut rgb.r, &mut rgb.b, &mut rgb.g, L); } else { Rgb::new(rgb.r.min(L), rgb.g.min(L), rgb.g); } } else if rgb.r >= rgb.b { filmlike_clip_rgb_tone(&mut rgb.g, &mut rgb.r, &mut rgb.b, L); } else if rgb.b > rgb.g { filmlike_clip_rgb_tone(&mut rgb.b, &mut rgb.g, &mut rgb.r, L); } else { filmlike_clip_rgb_tone(&mut rgb.g, &mut rgb.b, &mut rgb.r, L); } rgb } moxcms-0.7.7/src/helpers.rs000064400000000000000000000176621046102023000137600ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::matan::{ does_curve_have_discontinuity, is_curve_ascending, is_curve_degenerated, is_curve_descending, is_curve_linear8, is_curve_linear16, is_curve_monotonic, }; use crate::reader::{ s15_fixed16_number_to_double, uint8_number_to_float_fast, uint16_number_to_float_fast, }; use crate::{CmsError, LutStore, Matrix3d, ToneReprCurve, Vector3d}; impl LutStore { pub fn to_clut_f32(&self) -> Vec { match self { LutStore::Store8(store) => store .iter() .map(|x| uint8_number_to_float_fast(*x)) .collect(), LutStore::Store16(store) => store .iter() .map(|x| uint16_number_to_float_fast(*x as u32)) .collect(), } } pub(crate) fn is_degenerated(&self, entries: usize, channel: usize) -> bool { let start = entries * channel; let end = start + entries; match &self { LutStore::Store8(v) => is_curve_degenerated(&v[start..end]), LutStore::Store16(v) => is_curve_degenerated(&v[start..end]), } } pub(crate) fn is_monotonic(&self, entries: usize, channel: usize) -> bool { let start = entries * channel; let end = start + entries; match &self { LutStore::Store8(v) => is_curve_monotonic(&v[start..end]), LutStore::Store16(v) => is_curve_monotonic(&v[start..end]), } } pub(crate) fn have_discontinuities(&self, entries: usize, channel: usize) -> bool { let start = entries * channel; let end = start + entries; match &self { LutStore::Store8(v) => does_curve_have_discontinuity(&v[start..end]), LutStore::Store16(v) => does_curve_have_discontinuity(&v[start..end]), } } #[allow(dead_code)] pub(crate) fn is_linear(&self, entries: usize, channel: usize) -> bool { let start = entries * channel; let end = start + entries; match &self { LutStore::Store8(v) => is_curve_linear8(&v[start..end]), LutStore::Store16(v) => is_curve_linear16(&v[start..end]), } } #[allow(dead_code)] pub(crate) fn is_descending(&self, entries: usize, channel: usize) -> bool { let start = entries * channel; let end = start + entries; match &self { LutStore::Store8(v) => is_curve_descending(&v[start..end]), LutStore::Store16(v) => is_curve_descending(&v[start..end]), } } #[allow(dead_code)] pub(crate) fn is_ascending(&self, entries: usize, channel: usize) -> bool { let start = entries * channel; let end = start + entries; match &self { LutStore::Store8(v) => is_curve_ascending(&v[start..end]), LutStore::Store16(v) => is_curve_ascending(&v[start..end]), } } } impl ToneReprCurve { pub(crate) fn is_linear(&self) -> bool { match &self { ToneReprCurve::Lut(lut) => { if lut.is_empty() { return true; } if lut.len() == 1 { let gamma = 1. / crate::trc::u8_fixed_8number_to_float(lut[0]); if (gamma - 1.).abs() < 1e-4 { return true; } } is_curve_linear16(lut) } ToneReprCurve::Parametric(parametric) => { if parametric.is_empty() { return true; } if parametric.len() == 1 && parametric[0] == 1. { return true; } false } } } pub(crate) fn is_monotonic(&self) -> bool { match &self { ToneReprCurve::Lut(lut) => is_curve_monotonic(lut), ToneReprCurve::Parametric(_) => true, } } pub(crate) fn is_degenerated(&self) -> bool { match &self { ToneReprCurve::Lut(lut) => is_curve_degenerated(lut), ToneReprCurve::Parametric(_) => false, } } pub(crate) fn have_discontinuities(&self) -> bool { match &self { ToneReprCurve::Lut(lut) => does_curve_have_discontinuity(lut), ToneReprCurve::Parametric(_) => false, } } } pub(crate) fn read_matrix_3d(arr: &[u8]) -> Result { if arr.len() < 36 { return Err(CmsError::InvalidProfile); } let m_tag = &arr[..36]; let e00 = i32::from_be_bytes([m_tag[0], m_tag[1], m_tag[2], m_tag[3]]); let e01 = i32::from_be_bytes([m_tag[4], m_tag[5], m_tag[6], m_tag[7]]); let e02 = i32::from_be_bytes([m_tag[8], m_tag[9], m_tag[10], m_tag[11]]); let e10 = i32::from_be_bytes([m_tag[12], m_tag[13], m_tag[14], m_tag[15]]); let e11 = i32::from_be_bytes([m_tag[16], m_tag[17], m_tag[18], m_tag[19]]); let e12 = i32::from_be_bytes([m_tag[20], m_tag[21], m_tag[22], m_tag[23]]); let e20 = i32::from_be_bytes([m_tag[24], m_tag[25], m_tag[26], m_tag[27]]); let e21 = i32::from_be_bytes([m_tag[28], m_tag[29], m_tag[30], m_tag[31]]); let e22 = i32::from_be_bytes([m_tag[32], m_tag[33], m_tag[34], m_tag[35]]); Ok(Matrix3d { v: [ [ s15_fixed16_number_to_double(e00), s15_fixed16_number_to_double(e01), s15_fixed16_number_to_double(e02), ], [ s15_fixed16_number_to_double(e10), s15_fixed16_number_to_double(e11), s15_fixed16_number_to_double(e12), ], [ s15_fixed16_number_to_double(e20), s15_fixed16_number_to_double(e21), s15_fixed16_number_to_double(e22), ], ], }) } pub(crate) fn read_vector_3d(arr: &[u8]) -> Result { if arr.len() < 12 { return Err(CmsError::InvalidProfile); } let m_tag = &arr[..12]; let b0 = i32::from_be_bytes([m_tag[0], m_tag[1], m_tag[2], m_tag[3]]); let b1 = i32::from_be_bytes([m_tag[4], m_tag[5], m_tag[6], m_tag[7]]); let b2 = i32::from_be_bytes([m_tag[8], m_tag[9], m_tag[10], m_tag[11]]); Ok(Vector3d { v: [ s15_fixed16_number_to_double(b0), s15_fixed16_number_to_double(b1), s15_fixed16_number_to_double(b2), ], }) } moxcms-0.7.7/src/ictcp.rs000064400000000000000000000143461046102023000134140ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::gamma::{pq_from_linearf, pq_to_linearf}; use crate::{Matrix3f, Rgb, Vector3f, Xyz}; const CROSSTALK: Matrix3f = Matrix3f { v: [[0.92, 0.04, 0.04], [0.04, 0.92, 0.04], [0.04, 0.04, 0.92]], }; const HPE_LMS: Matrix3f = Matrix3f { v: [ [0.4002, 0.7076, -0.0808], [-0.2263, 1.1653, 0.0457], [0f32, 0f32, 0.9182], ], }; const XYZ_TO_LMS: Matrix3f = CROSSTALK.mat_mul_const(HPE_LMS); const LMS_TO_XYZ: Matrix3f = XYZ_TO_LMS.inverse(); const L_LMS_TO_ICTCP: Matrix3f = Matrix3f { v: [ [2048. / 4096., 2048. / 4096., 0.], [6610. / 4096., -13613. / 4096., 7003. / 4096.], [17933. / 4096., -17390. / 4096., -543. / 4096.], ], }; const ICTCP_TO_L_LMS: Matrix3f = L_LMS_TO_ICTCP.inverse(); #[derive(Copy, Clone, Default, PartialOrd, PartialEq)] pub struct ICtCp { /// Lightness pub i: f32, /// Tritan pub ct: f32, /// Protan pub cp: f32, } impl ICtCp { #[inline] pub const fn new(i: f32, ct: f32, cp: f32) -> ICtCp { ICtCp { i, ct, cp } } /// Converts XYZ D65 to ICtCp #[inline] pub fn from_xyz(xyz: Xyz) -> ICtCp { let lms = XYZ_TO_LMS.mul_vector(xyz.to_vector()); let lin_l = pq_from_linearf(lms.v[0]); let lin_m = pq_from_linearf(lms.v[1]); let lin_s = pq_from_linearf(lms.v[2]); let ictcp = L_LMS_TO_ICTCP.mul_vector(Vector3f { v: [lin_l, lin_m, lin_s], }); ICtCp { i: ictcp.v[0], ct: ictcp.v[1], cp: ictcp.v[2], } } /// Converts to [ICtCp] from linear light [Rgb] /// /// Precompute forward matrix by [ICtCp::prepare_to_lms]. /// D65 white point is assumed. #[inline] pub fn from_linear_rgb(rgb: Rgb, matrix: Matrix3f) -> ICtCp { let lms = matrix.mul_vector(rgb.to_vector()); let lin_l = pq_from_linearf(lms.v[0]); let lin_m = pq_from_linearf(lms.v[1]); let lin_s = pq_from_linearf(lms.v[2]); let ictcp = L_LMS_TO_ICTCP.mul_vector(Vector3f { v: [lin_l, lin_m, lin_s], }); ICtCp { i: ictcp.v[0], ct: ictcp.v[1], cp: ictcp.v[2], } } /// Converts [ICtCp] to [Rgb] /// /// Precompute forward matrix by [ICtCp::prepare_to_lms] and then inverse it #[inline] pub fn to_linear_rgb(&self, matrix: Matrix3f) -> Rgb { let l_lms = ICTCP_TO_L_LMS.mul_vector(Vector3f { v: [self.i, self.ct, self.cp], }); let gamma_l = pq_to_linearf(l_lms.v[0]); let gamma_m = pq_to_linearf(l_lms.v[1]); let gamma_s = pq_to_linearf(l_lms.v[2]); let lms = matrix.mul_vector(Vector3f { v: [gamma_l, gamma_m, gamma_s], }); Rgb { r: lms.v[0], g: lms.v[1], b: lms.v[2], } } /// Converts ICtCp to XYZ D65 #[inline] pub fn to_xyz(&self) -> Xyz { let l_lms = ICTCP_TO_L_LMS.mul_vector(Vector3f { v: [self.i, self.ct, self.cp], }); let gamma_l = pq_to_linearf(l_lms.v[0]); let gamma_m = pq_to_linearf(l_lms.v[1]); let gamma_s = pq_to_linearf(l_lms.v[2]); let lms = LMS_TO_XYZ.mul_vector(Vector3f { v: [gamma_l, gamma_m, gamma_s], }); Xyz { x: lms.v[0], y: lms.v[1], z: lms.v[2], } } /// Prepares RGB->LMS matrix #[inline] pub const fn prepare_to_lms(rgb_to_xyz: Matrix3f) -> Matrix3f { XYZ_TO_LMS.mat_mul_const(rgb_to_xyz) } } #[cfg(test)] mod tests { use super::*; #[test] fn check_roundtrip() { let xyz = Xyz::new(0.5, 0.4, 0.3); let ictcp = ICtCp::from_xyz(xyz); let r_xyz = ictcp.to_xyz(); assert!((r_xyz.x - xyz.x).abs() < 1e-4); assert!((r_xyz.y - xyz.y).abs() < 1e-4); assert!((r_xyz.z - xyz.z).abs() < 1e-4); } #[test] fn check_roundtrip_rgb() { let rgb_to_xyz = Matrix3f { v: [ [0.67345345, 0.165661961, 0.125096574], [0.27903071, 0.675341845, 0.045627553], [-0.00193137419, 0.0299795717, 0.797140181], ], }; let prepared_matrix = ICtCp::prepare_to_lms(rgb_to_xyz); let inversed_matrix = prepared_matrix.inverse(); let rgb = Rgb::new(0.5, 0.4, 0.3); let ictcp = ICtCp::from_linear_rgb(rgb, prepared_matrix); let r_xyz = ictcp.to_linear_rgb(inversed_matrix); assert!((r_xyz.r - rgb.r).abs() < 1e-4); assert!((r_xyz.g - rgb.g).abs() < 1e-4); assert!((r_xyz.b - rgb.b).abs() < 1e-4); } } moxcms-0.7.7/src/jzazbz.rs000064400000000000000000000263271046102023000136260ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::Xyz; use crate::jzczhz::Jzczhz; use crate::mlaf::mlaf; use num_traits::Pow; use pxfm::{dirty_powf, f_cbrtf, f_powf}; use std::ops::{ Add, AddAssign, Div, DivAssign, Index, IndexMut, Mul, MulAssign, Neg, Sub, SubAssign, }; #[inline] fn perceptual_quantizer(x: f32) -> f32 { if x <= 0. { return 0.; } let xx = dirty_powf(x * 1e-4, 0.1593017578125); let rs = dirty_powf( mlaf(0.8359375, 18.8515625, xx) / mlaf(1., 18.6875, xx), 134.034375, ); if rs.is_nan() { return 0.; } rs } #[inline] fn perceptual_quantizer_inverse(x: f32) -> f32 { if x <= 0. { return 0.; } let xx = dirty_powf(x, 7.460772656268214e-03); let rs = 1e4 * dirty_powf( (0.8359375 - xx) / mlaf(-18.8515625, 18.6875, xx), 6.277394636015326, ); if rs.is_nan() { return 0.; } rs } #[repr(C)] #[derive(Debug, Copy, Clone, PartialOrd, PartialEq, Default)] /// Represents Jzazbz pub struct Jzazbz { /// Jz(lightness) generally expects to be between `0.0..1.0`. pub jz: f32, /// Az generally expects to be between `-0.5..0.5`. pub az: f32, /// Bz generally expects to be between `-0.5..0.5`. pub bz: f32, } impl Jzazbz { /// Constructs new instance #[inline] pub fn new(jz: f32, az: f32, bz: f32) -> Jzazbz { Jzazbz { jz, az, bz } } /// Creates new [Jzazbz] from CIE [Xyz]. /// /// JzAzBz is defined in D65 white point, adapt XYZ if needed first. #[inline] pub fn from_xyz(xyz: Xyz) -> Jzazbz { Self::from_xyz_with_display_luminance(xyz, 200.) } /// Creates new [Jzazbz] from CIE [Xyz]. /// /// JzAzBz is defined in D65 white point, adapt XYZ if needed first. #[inline] pub fn from_xyz_with_display_luminance(xyz: Xyz, display_luminance: f32) -> Jzazbz { let abs_xyz = xyz * display_luminance; let lp = perceptual_quantizer(mlaf( mlaf(0.674207838 * abs_xyz.x, 0.382799340, abs_xyz.y), -0.047570458, abs_xyz.z, )); let mp = perceptual_quantizer(mlaf( mlaf(0.149284160 * abs_xyz.x, 0.739628340, abs_xyz.y), 0.083327300, abs_xyz.z, )); let sp = perceptual_quantizer(mlaf( mlaf(0.070941080 * abs_xyz.x, 0.174768000, abs_xyz.y), 0.670970020, abs_xyz.z, )); let iz = 0.5 * (lp + mp); let az = mlaf(mlaf(3.524000 * lp, -4.066708, mp), 0.542708, sp); let bz = mlaf(mlaf(0.199076 * lp, 1.096799, mp), -1.295875, sp); let jz = (0.44 * iz) / mlaf(1., -0.56, iz) - 1.6295499532821566e-11; Jzazbz::new(jz, az, bz) } /// Converts [Jzazbz] to [Xyz] D65 #[inline] pub fn to_xyz(&self, display_luminance: f32) -> Xyz { let jz = self.jz + 1.6295499532821566e-11; let iz = jz / mlaf(0.44f32, 0.56, jz); let l = perceptual_quantizer_inverse(mlaf( mlaf(iz, 1.386050432715393e-1, self.az), 5.804731615611869e-2, self.bz, )); let m = perceptual_quantizer_inverse(mlaf( mlaf(iz, -1.386050432715393e-1, self.az), -5.804731615611891e-2, self.bz, )); let s = perceptual_quantizer_inverse(mlaf( mlaf(iz, -9.601924202631895e-2, self.az), -8.118918960560390e-1, self.bz, )); let x = mlaf( mlaf(1.661373055774069e+00 * l, -9.145230923250668e-01, m), 2.313620767186147e-01, s, ); let y = mlaf( mlaf(-3.250758740427037e-01 * l, 1.571847038366936e+00, m), -2.182538318672940e-01, s, ); let z = mlaf( mlaf(-9.098281098284756e-02 * l, -3.127282905230740e-01, m), 1.522766561305260e+00, s, ); let rel_luminance = 1f32 / display_luminance; Xyz::new(x, y, z) * rel_luminance } /// Converts into *Jzczhz* #[inline] pub fn to_jzczhz(&self) -> Jzczhz { Jzczhz::from_jzazbz(*self) } #[inline] pub fn euclidean_distance(&self, other: Self) -> f32 { let djz = self.jz - other.jz; let daz = self.az - other.az; let dbz = self.bz - other.bz; (djz * djz + daz * daz + dbz * dbz).sqrt() } #[inline] pub fn taxicab_distance(&self, other: Self) -> f32 { let djz = self.jz - other.jz; let daz = self.az - other.az; let dbz = self.bz - other.bz; djz.abs() + daz.abs() + dbz.abs() } } impl Index for Jzazbz { type Output = f32; #[inline] fn index(&self, index: usize) -> &f32 { match index { 0 => &self.jz, 1 => &self.az, 2 => &self.bz, _ => panic!("Index out of bounds for Jzazbz"), } } } impl IndexMut for Jzazbz { #[inline] fn index_mut(&mut self, index: usize) -> &mut f32 { match index { 0 => &mut self.jz, 1 => &mut self.az, 2 => &mut self.bz, _ => panic!("Index out of bounds for Jzazbz"), } } } impl Add for Jzazbz { type Output = Jzazbz; #[inline] fn add(self, rhs: f32) -> Self::Output { Jzazbz::new(self.jz + rhs, self.az + rhs, self.bz + rhs) } } impl Sub for Jzazbz { type Output = Jzazbz; #[inline] fn sub(self, rhs: f32) -> Self::Output { Jzazbz::new(self.jz - rhs, self.az - rhs, self.bz - rhs) } } impl Mul for Jzazbz { type Output = Jzazbz; #[inline] fn mul(self, rhs: f32) -> Self::Output { Jzazbz::new(self.jz * rhs, self.az * rhs, self.bz * rhs) } } impl Div for Jzazbz { type Output = Jzazbz; #[inline] fn div(self, rhs: f32) -> Self::Output { Jzazbz::new(self.jz / rhs, self.az / rhs, self.bz / rhs) } } impl Add for Jzazbz { type Output = Jzazbz; #[inline] fn add(self, rhs: Jzazbz) -> Self::Output { Jzazbz::new(self.jz + rhs.jz, self.az + rhs.az, self.bz + rhs.bz) } } impl Sub for Jzazbz { type Output = Jzazbz; #[inline] fn sub(self, rhs: Jzazbz) -> Self::Output { Jzazbz::new(self.jz - rhs.jz, self.az - rhs.az, self.bz - rhs.bz) } } impl Mul for Jzazbz { type Output = Jzazbz; #[inline] fn mul(self, rhs: Jzazbz) -> Self::Output { Jzazbz::new(self.jz * rhs.jz, self.az * rhs.az, self.bz * rhs.bz) } } impl Div for Jzazbz { type Output = Jzazbz; #[inline] fn div(self, rhs: Jzazbz) -> Self::Output { Jzazbz::new(self.jz / rhs.jz, self.az / rhs.az, self.bz / rhs.bz) } } impl AddAssign for Jzazbz { #[inline] fn add_assign(&mut self, rhs: Jzazbz) { self.jz += rhs.jz; self.az += rhs.az; self.bz += rhs.bz; } } impl SubAssign for Jzazbz { #[inline] fn sub_assign(&mut self, rhs: Jzazbz) { self.jz -= rhs.jz; self.az -= rhs.az; self.bz -= rhs.bz; } } impl MulAssign for Jzazbz { #[inline] fn mul_assign(&mut self, rhs: Jzazbz) { self.jz *= rhs.jz; self.az *= rhs.az; self.bz *= rhs.bz; } } impl DivAssign for Jzazbz { #[inline] fn div_assign(&mut self, rhs: Jzazbz) { self.jz /= rhs.jz; self.az /= rhs.az; self.bz /= rhs.bz; } } impl AddAssign for Jzazbz { #[inline] fn add_assign(&mut self, rhs: f32) { self.jz += rhs; self.az += rhs; self.bz += rhs; } } impl SubAssign for Jzazbz { #[inline] fn sub_assign(&mut self, rhs: f32) { self.jz -= rhs; self.az -= rhs; self.bz -= rhs; } } impl MulAssign for Jzazbz { #[inline] fn mul_assign(&mut self, rhs: f32) { self.jz *= rhs; self.az *= rhs; self.bz *= rhs; } } impl DivAssign for Jzazbz { #[inline] fn div_assign(&mut self, rhs: f32) { self.jz /= rhs; self.az /= rhs; self.bz /= rhs; } } impl Neg for Jzazbz { type Output = Jzazbz; #[inline] fn neg(self) -> Self::Output { Jzazbz::new(-self.jz, -self.az, -self.bz) } } impl Jzazbz { #[inline] pub fn sqrt(&self) -> Jzazbz { Jzazbz::new(self.jz.sqrt(), self.az.sqrt(), self.bz.sqrt()) } #[inline] pub fn cbrt(&self) -> Jzazbz { Jzazbz::new(f_cbrtf(self.jz), f_cbrtf(self.az), f_cbrtf(self.bz)) } } impl Pow for Jzazbz { type Output = Jzazbz; #[inline] fn pow(self, rhs: f32) -> Self::Output { Jzazbz::new( f_powf(self.jz, rhs), f_powf(self.az, rhs), f_powf(self.bz, rhs), ) } } impl Pow for Jzazbz { type Output = Jzazbz; #[inline] fn pow(self, rhs: Jzazbz) -> Self::Output { Jzazbz::new( f_powf(self.jz, rhs.jz), f_powf(self.az, self.az), f_powf(self.bz, self.bz), ) } } #[cfg(test)] mod tests { use super::*; #[test] fn jzazbz_round() { let xyz = Xyz::new(0.5, 0.4, 0.3); let jzazbz = Jzazbz::from_xyz_with_display_luminance(xyz, 253f32); let old_xyz = jzazbz.to_xyz(253f32); assert!( (xyz.x - old_xyz.x).abs() <= 1e-3, "{:?} != {:?}", xyz, old_xyz ); assert!( (xyz.y - old_xyz.y).abs() <= 1e-3, "{:?} != {:?}", xyz, old_xyz ); assert!( (xyz.z - old_xyz.z).abs() <= 1e-3, "{:?} != {:?}", xyz, old_xyz ); } } moxcms-0.7.7/src/jzczhz.rs000064400000000000000000000231321046102023000136250ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::Xyz; use crate::jzazbz::Jzazbz; use num_traits::Pow; use pxfm::{f_atan2f, f_cbrtf, f_hypot3f, f_hypotf, f_powf, f_sincosf, f_sinf}; use std::ops::{ Add, AddAssign, Div, DivAssign, Index, IndexMut, Mul, MulAssign, Neg, Sub, SubAssign, }; /// Represents Jzazbz in polar coordinates as Jzczhz #[repr(C)] #[derive(Debug, Copy, Clone, PartialOrd, PartialEq)] pub struct Jzczhz { /// Jz(lightness) generally expects to be between `0.0..1.0`. pub jz: f32, /// Cz generally expects to be between `-1.0..1.0`. pub cz: f32, /// Hz generally expects to be between `-1.0..1.0`. pub hz: f32, } impl Jzczhz { /// Creates new instance of Jzczhz #[inline] pub fn new(jz: f32, cz: f32, hz: f32) -> Jzczhz { Jzczhz { jz, cz, hz } } /// Converts Jzazbz to polar coordinates Jzczhz #[inline] pub fn from_jzazbz(jzazbz: Jzazbz) -> Jzczhz { let cz = f_hypotf(jzazbz.az, jzazbz.bz); let hz = f_atan2f(jzazbz.bz, jzazbz.az); Jzczhz::new(jzazbz.jz, cz, hz) } /// Converts Jzczhz into Jzazbz #[inline] pub fn to_jzazbz(&self) -> Jzazbz { let sincos = f_sincosf(self.hz); let az = self.cz * sincos.1; let bz = self.cz * sincos.0; Jzazbz::new(self.jz, az, bz) } /// Converts Jzczhz into Jzazbz #[inline] pub fn to_jzazbz_with_luminance(&self) -> Jzazbz { let sincos = f_sincosf(self.hz); let az = self.cz * sincos.1; let bz = self.cz * sincos.0; Jzazbz::new(self.jz, az, bz) } /// Converts Jzczhz to *Xyz* #[inline] pub fn to_xyz(&self, display_luminance: f32) -> Xyz { let jzazbz = self.to_jzazbz(); jzazbz.to_xyz(display_luminance) } /// Converts [Xyz] to [Jzczhz] #[inline] pub fn from_xyz(xyz: Xyz) -> Jzczhz { let jzazbz = Jzazbz::from_xyz(xyz); Jzczhz::from_jzazbz(jzazbz) } /// Converts [Xyz] to [Jzczhz] #[inline] pub fn from_xyz_with_display_luminance(xyz: Xyz, luminance: f32) -> Jzczhz { let jzazbz = Jzazbz::from_xyz_with_display_luminance(xyz, luminance); Jzczhz::from_jzazbz(jzazbz) } /// Computes distance for *Jzczhz* #[inline] pub fn distance(&self, other: Jzczhz) -> f32 { let djz = self.jz - other.jz; let dcz = self.cz - other.cz; let dhz = self.hz - other.hz; let dh = 2. * (self.cz * other.cz).sqrt() * f_sinf(dhz * 0.5); f_hypot3f(djz, dcz, dh) } #[inline] pub fn euclidean_distance(&self, other: Self) -> f32 { let djz = self.jz - other.jz; let dhz = self.hz - other.hz; let dcz = self.cz - other.cz; (djz * djz + dhz * dhz + dcz * dcz).sqrt() } #[inline] pub fn taxicab_distance(&self, other: Self) -> f32 { let djz = self.jz - other.jz; let dhz = self.hz - other.hz; let dcz = self.cz - other.cz; djz.abs() + dhz.abs() + dcz.abs() } } impl Index for Jzczhz { type Output = f32; #[inline] fn index(&self, index: usize) -> &f32 { match index { 0 => &self.jz, 1 => &self.cz, 2 => &self.hz, _ => panic!("Index out of bounds for Jzczhz"), } } } impl IndexMut for Jzczhz { #[inline] fn index_mut(&mut self, index: usize) -> &mut f32 { match index { 0 => &mut self.jz, 1 => &mut self.cz, 2 => &mut self.hz, _ => panic!("Index out of bounds for Jzczhz"), } } } impl Add for Jzczhz { type Output = Jzczhz; #[inline] fn add(self, rhs: f32) -> Self::Output { Jzczhz::new(self.jz + rhs, self.cz + rhs, self.hz + rhs) } } impl Sub for Jzczhz { type Output = Jzczhz; #[inline] fn sub(self, rhs: f32) -> Self::Output { Jzczhz::new(self.jz - rhs, self.cz - rhs, self.hz - rhs) } } impl Mul for Jzczhz { type Output = Jzczhz; #[inline] fn mul(self, rhs: f32) -> Self::Output { Jzczhz::new(self.jz * rhs, self.cz * rhs, self.hz * rhs) } } impl Div for Jzczhz { type Output = Jzczhz; #[inline] fn div(self, rhs: f32) -> Self::Output { Jzczhz::new(self.jz / rhs, self.cz / rhs, self.hz / rhs) } } impl Add for Jzczhz { type Output = Jzczhz; #[inline] fn add(self, rhs: Jzczhz) -> Self::Output { Jzczhz::new(self.jz + rhs.jz, self.cz + rhs.cz, self.hz + rhs.hz) } } impl Sub for Jzczhz { type Output = Jzczhz; #[inline] fn sub(self, rhs: Jzczhz) -> Self::Output { Jzczhz::new(self.jz - rhs.jz, self.cz - rhs.cz, self.hz - rhs.hz) } } impl Mul for Jzczhz { type Output = Jzczhz; #[inline] fn mul(self, rhs: Jzczhz) -> Self::Output { Jzczhz::new(self.jz * rhs.jz, self.cz * rhs.cz, self.hz * rhs.hz) } } impl Div for Jzczhz { type Output = Jzczhz; #[inline] fn div(self, rhs: Jzczhz) -> Self::Output { Jzczhz::new(self.jz / rhs.jz, self.cz / rhs.cz, self.hz / rhs.hz) } } impl AddAssign for Jzczhz { #[inline] fn add_assign(&mut self, rhs: Jzczhz) { self.jz += rhs.jz; self.cz += rhs.cz; self.hz += rhs.hz; } } impl SubAssign for Jzczhz { #[inline] fn sub_assign(&mut self, rhs: Jzczhz) { self.jz -= rhs.jz; self.cz -= rhs.cz; self.hz -= rhs.hz; } } impl MulAssign for Jzczhz { #[inline] fn mul_assign(&mut self, rhs: Jzczhz) { self.jz *= rhs.jz; self.cz *= rhs.cz; self.hz *= rhs.hz; } } impl DivAssign for Jzczhz { #[inline] fn div_assign(&mut self, rhs: Jzczhz) { self.jz /= rhs.jz; self.cz /= rhs.cz; self.hz /= rhs.hz; } } impl AddAssign for Jzczhz { #[inline] fn add_assign(&mut self, rhs: f32) { self.jz += rhs; self.cz += rhs; self.hz += rhs; } } impl SubAssign for Jzczhz { #[inline] fn sub_assign(&mut self, rhs: f32) { self.jz -= rhs; self.cz -= rhs; self.hz -= rhs; } } impl MulAssign for Jzczhz { #[inline] fn mul_assign(&mut self, rhs: f32) { self.jz *= rhs; self.cz *= rhs; self.hz *= rhs; } } impl DivAssign for Jzczhz { #[inline] fn div_assign(&mut self, rhs: f32) { self.jz /= rhs; self.cz /= rhs; self.hz /= rhs; } } impl Jzczhz { #[inline] pub fn sqrt(&self) -> Jzczhz { Jzczhz::new(self.jz.sqrt(), self.cz.sqrt(), self.hz.sqrt()) } #[inline] pub fn cbrt(&self) -> Jzczhz { Jzczhz::new(f_cbrtf(self.jz), f_cbrtf(self.cz), f_cbrtf(self.hz)) } } impl Pow for Jzczhz { type Output = Jzczhz; #[inline] fn pow(self, rhs: f32) -> Self::Output { Jzczhz::new( f_powf(self.jz, rhs), f_powf(self.cz, rhs), f_powf(self.hz, rhs), ) } } impl Pow for Jzczhz { type Output = Jzczhz; #[inline] fn pow(self, rhs: Jzczhz) -> Self::Output { Jzczhz::new( f_powf(self.jz, rhs.jz), f_powf(self.cz, self.cz), f_powf(self.hz, self.hz), ) } } impl Neg for Jzczhz { type Output = Jzczhz; #[inline] fn neg(self) -> Self::Output { Jzczhz::new(-self.jz, -self.cz, -self.hz) } } #[cfg(test)] mod tests { use super::*; #[test] fn jzczhz_round() { let xyz = Xyz::new(0.5, 0.4, 0.3); let jzczhz = Jzczhz::from_xyz_with_display_luminance(xyz, 253.); let old_xyz = jzczhz.to_xyz(253f32); assert!( (xyz.x - old_xyz.x).abs() <= 1e-3, "{:?} != {:?}", xyz, old_xyz ); assert!( (xyz.y - old_xyz.y).abs() <= 1e-3, "{:?} != {:?}", xyz, old_xyz ); assert!( (xyz.z - old_xyz.z).abs() <= 1e-3, "{:?} != {:?}", xyz, old_xyz ); } } moxcms-0.7.7/src/lab.rs000064400000000000000000000176471046102023000130570ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::mlaf::{fmla, mlaf}; use crate::{Chromaticity, LCh, Xyz}; use pxfm::f_cbrtf; /// Holds CIE LAB values #[repr(C)] #[derive(Copy, Clone, Debug, Default, PartialOrd, PartialEq)] pub struct Lab { /// `l`: lightness component (0 to 100) pub l: f32, /// `a`: green (negative) and red (positive) component. pub a: f32, /// `b`: blue (negative) and yellow (positive) component pub b: f32, } impl Lab { /// Create a new CIELAB color. /// /// # Arguments /// /// * `l`: lightness component (0 to 100). /// * `a`: green (negative) and red (positive) component. /// * `b`: blue (negative) and yellow (positive) component. #[inline] pub const fn new(l: f32, a: f32, b: f32) -> Self { Self { l, a, b } } } #[inline(always)] const fn f_1(t: f32) -> f32 { if t <= 24.0 / 116.0 { (108.0 / 841.0) * (t - 16.0 / 116.0) } else { t * t * t } } #[inline(always)] fn f(t: f32) -> f32 { if t <= 24. / 116. * (24. / 116.) * (24. / 116.) { (841. / 108. * t) + 16. / 116. } else { f_cbrtf(t) } } impl Lab { /// Converts to CIE Lab from CIE XYZ for PCS encoding #[inline] pub fn from_pcs_xyz(xyz: Xyz) -> Self { const WP: Xyz = Chromaticity::D50.to_xyz(); let device_x = (xyz.x as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.x as f64) as f32; let device_y = (xyz.y as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.y as f64) as f32; let device_z = (xyz.z as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.z as f64) as f32; let fx = f(device_x); let fy = f(device_y); let fz = f(device_z); let lb = mlaf(-16.0, 116.0, fy); let a = 500.0 * (fx - fy); let b = 200.0 * (fy - fz); let l = lb / 100.0; let a = (a + 128.0) / 255.0; let b = (b + 128.0) / 255.0; Self::new(l, a, b) } /// Converts to CIE Lab from CIE XYZ #[inline] pub fn from_xyz(xyz: Xyz) -> Self { const WP: Xyz = Chromaticity::D50.to_xyz(); let device_x = (xyz.x as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.x as f64) as f32; let device_y = (xyz.y as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.y as f64) as f32; let device_z = (xyz.z as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.z as f64) as f32; let fx = f(device_x); let fy = f(device_y); let fz = f(device_z); let lb = mlaf(-16.0, 116.0, fy); let a = 500.0 * (fx - fy); let b = 200.0 * (fy - fz); Self::new(lb, a, b) } /// Converts CIE [Lab] into CIE [Xyz] for PCS encoding #[inline] pub fn to_pcs_xyz(self) -> Xyz { let device_l = self.l * 100.0; let device_a = fmla(self.a, 255.0, -128.0); let device_b = fmla(self.b, 255.0, -128.0); let y = (device_l + 16.0) / 116.0; const WP: Xyz = Chromaticity::D50.to_xyz(); let x = f_1(mlaf(y, 0.002, device_a)) * WP.x; let y1 = f_1(y) * WP.y; let z = f_1(mlaf(y, -0.005, device_b)) * WP.z; let x = (x as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; let y = (y1 as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; let z = (z as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; Xyz::new(x, y, z) } /// Converts CIE [Lab] into CIE [Xyz] #[inline] pub fn to_xyz(self) -> Xyz { let device_l = self.l; let device_a = self.a; let device_b = self.b; let y = (device_l + 16.0) / 116.0; const WP: Xyz = Chromaticity::D50.to_xyz(); let x = f_1(mlaf(y, 0.002, device_a)) * WP.x; let y1 = f_1(y) * WP.y; let z = f_1(mlaf(y, -0.005, device_b)) * WP.z; let x = (x as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; let y = (y1 as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; let z = (z as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; Xyz::new(x, y, z) } /// Desaturates out of gamut PCS encoded LAB pub fn desaturate_pcs(self) -> Lab { if self.l < 0. { return Lab::new(0., 0., 0.); } let mut new_lab = self; if new_lab.l > 1. { new_lab.l = 1.; } let amax = 1.0; let amin = 0.0; let bmin = 0.0; let bmax = 1.0; if self.a < amin || self.a > amax || self.b < bmin || self.b > bmax { if self.a == 0.0 { // Is hue exactly 90? // atan will not work, so clamp here new_lab.b = if new_lab.b < bmin { bmin } else { bmax }; return Lab::new(self.l, self.a, self.b); } let lch = LCh::from_lab(new_lab); let slope = new_lab.b / new_lab.a; let h = lch.h * (180.0 / std::f32::consts::PI); // There are 4 zones if (0. ..45.).contains(&h) || (315. ..=360.).contains(&h) { // clip by amax new_lab.a = amax; new_lab.b = amax * slope; } else if (45. ..135.).contains(&h) { // clip by bmax new_lab.b = bmax; new_lab.a = bmax / slope; } else if (135. ..225.).contains(&h) { // clip by amin new_lab.a = amin; new_lab.b = amin * slope; } else if (225. ..315.).contains(&h) { // clip by bmin new_lab.b = bmin; new_lab.a = bmin / slope; } } new_lab } } #[cfg(test)] mod tests { use super::*; #[test] fn round_trip() { let xyz = Xyz::new(0.1, 0.2, 0.3); let lab = Lab::from_xyz(xyz); let rolled_back = lab.to_xyz(); let dx = (xyz.x - rolled_back.x).abs(); let dy = (xyz.y - rolled_back.y).abs(); let dz = (xyz.z - rolled_back.z).abs(); assert!(dx < 1e-5); assert!(dy < 1e-5); assert!(dz < 1e-5); } #[test] fn round_pcs_trip() { let xyz = Xyz::new(0.1, 0.2, 0.3); let lab = Lab::from_pcs_xyz(xyz); let rolled_back = lab.to_pcs_xyz(); let dx = (xyz.x - rolled_back.x).abs(); let dy = (xyz.y - rolled_back.y).abs(); let dz = (xyz.z - rolled_back.z).abs(); assert!(dx < 1e-5); assert!(dy < 1e-5); assert!(dz < 1e-5); } } moxcms-0.7.7/src/lib.rs000064400000000000000000000114341046102023000130530ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #![allow(clippy::manual_clamp, clippy::excessive_precision)] #![cfg_attr(docsrs, feature(doc_cfg))] #![deny(unreachable_pub)] #![deny( clippy::print_stdout, clippy::print_stderr, clippy::print_literal, clippy::print_in_format_impl )] #![allow(stable_features)] #![cfg_attr( not(any(feature = "avx", feature = "sse", feature = "avx512", feature = "neon")), forbid(unsafe_code) )] #![cfg_attr(all(feature = "avx512", target_arch = "x86_64"), feature(cfg_version))] #![cfg_attr( all(feature = "avx512", target_arch = "x86_64"), feature(avx512_target_feature) )] #![cfg_attr( all(feature = "avx512", target_arch = "x86_64"), feature(stdarch_x86_avx512) )] mod chad; mod cicp; mod conversions; mod dat; mod defaults; mod err; mod gamma; mod gamut; mod ictcp; mod jzazbz; mod jzczhz; mod lab; mod luv; /// One of main intent is to provide fast math available in const context /// ULP most of the methods <= 0.5 mod math; mod matrix; mod mlaf; mod nd_array; mod oklab; mod oklch; mod profile; mod reader; mod rgb; mod safe_math; mod tag; mod transform; mod trc; mod writer; mod yrg; // Simple math analysis module mod chromaticity; mod dt_ucs; mod helpers; mod lut_hint; mod matan; mod srlab2; mod xyy; pub use chad::{ adapt_to_d50, adapt_to_d50_d, adapt_to_illuminant, adapt_to_illuminant_d, adapt_to_illuminant_xyz, adapt_to_illuminant_xyz_d, adaption_matrix, adaption_matrix_d, }; pub use chromaticity::Chromaticity; pub use cicp::{CicpColorPrimaries, ColorPrimaries, MatrixCoefficients, TransferCharacteristics}; pub use dat::ColorDateTime; pub use defaults::{ HLG_LUT_TABLE, PQ_LUT_TABLE, WHITE_POINT_D50, WHITE_POINT_D60, WHITE_POINT_D65, WHITE_POINT_DCI_P3, }; pub use dt_ucs::{DtUchHcb, DtUchHsb, DtUchJch}; pub use err::{CmsError, MalformedSize}; pub use gamut::filmlike_clip; pub use ictcp::ICtCp; pub use jzazbz::Jzazbz; pub use jzczhz::Jzczhz; pub use lab::Lab; pub use luv::{LCh, Luv}; pub use math::rounding_div_ceil; pub use matrix::{ BT2020_MATRIX, DISPLAY_P3_MATRIX, Matrix3, Matrix3d, Matrix3f, Matrix4f, SRGB_MATRIX, Vector3, Vector3d, Vector3f, Vector3i, Vector3u, Vector4, Vector4d, Vector4f, Vector4i, Xyz, Xyzd, }; pub use nd_array::{Cube, Hypercube}; pub use oklab::Oklab; pub use oklch::Oklch; pub use profile::{ CicpProfile, ColorProfile, DataColorSpace, DescriptionString, LocalizableString, LutDataType, LutMultidimensionalType, LutStore, LutType, LutWarehouse, Measurement, MeasurementGeometry, ParsingOptions, ProfileClass, ProfileSignature, ProfileText, ProfileVersion, RenderingIntent, StandardIlluminant, StandardObserver, TechnologySignatures, ViewingConditions, }; pub use rgb::{FusedExp, FusedExp2, FusedExp10, FusedLog, FusedLog2, FusedLog10, FusedPow, Rgb}; pub use srlab2::Srlab2; pub use transform::{ BarycentricWeightScale, InPlaceStage, InterpolationMethod, Layout, PointeeSizeExpressible, Stage, Transform8BitExecutor, Transform16BitExecutor, TransformExecutor, TransformF32BitExecutor, TransformF64BitExecutor, TransformOptions, }; pub use trc::{GammaLutInterpolate, ToneCurveEvaluator, ToneReprCurve, curve_from_gamma}; pub use xyy::{XyY, XyYRepresentable}; pub use yrg::{Ych, Yrg, cie_y_1931_to_cie_y_2006}; moxcms-0.7.7/src/lut_hint.rs000064400000000000000000000104121046102023000141260ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::LutWarehouse; impl LutWarehouse { /// Method tests if mathematical fusion on LUT table is allowed. /// If it's not, full brute-force pass in [Katana] is required. pub(crate) fn is_katana_required(&self) -> bool { match self { LutWarehouse::Lut(lut) => { let input_entries = lut.num_input_channels as usize; let output_entries = lut.num_output_channels as usize; for i in 0..input_entries { if lut.input_table.is_degenerated(input_entries, i) { return true; } if !lut.input_table.is_monotonic(input_entries, i) { return true; } if lut.input_table.have_discontinuities(input_entries, i) { return true; } } for i in 0..output_entries { if lut.output_table.is_degenerated(output_entries, i) { return true; } if !lut.output_table.is_monotonic(output_entries, i) { return true; } if lut.output_table.have_discontinuities(output_entries, i) { return true; } } false } LutWarehouse::Multidimensional(mab) => { for curve in mab.a_curves.iter() { if curve.is_degenerated() { return true; } if !curve.is_monotonic() { return true; } if curve.have_discontinuities() { return true; } } for curve in mab.m_curves.iter() { if curve.is_degenerated() { return true; } if !curve.is_monotonic() { return true; } if curve.have_discontinuities() { return true; } } for curve in mab.b_curves.iter() { if curve.is_degenerated() { return true; } if !curve.is_monotonic() { return true; } if curve.have_discontinuities() { return true; } } false } } } } moxcms-0.7.7/src/luv.rs000064400000000000000000000400411046102023000131070ustar 00000000000000/* * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved. * // * // Use of this source code is governed by a BSD-style * // license that can be found in the LICENSE file. */ //! # Luv /// Struct representing a color in CIE LUV, a.k.a. L\*u\*v\*, color space #[repr(C)] #[derive(Debug, Copy, Clone, Default, PartialOrd)] pub struct Luv { /// The L\* value (achromatic luminance) of the colour in 0–100 range. pub l: f32, /// The u\* value of the colour. /// /// Together with v\* value, it defines chromaticity of the colour. The u\* /// coordinate represents colour’s position on red-green axis with negative /// values indicating more red and positive more green colour. Typical /// values are in -134–220 range (but exact range for ‘valid’ colours /// depends on luminance and v\* value). pub u: f32, /// The u\* value of the colour. /// /// Together with u\* value, it defines chromaticity of the colour. The v\* /// coordinate represents colour’s position on blue-yellow axis with /// negative values indicating more blue and positive more yellow colour. /// Typical values are in -140–122 range (but exact range for ‘valid’ /// colours depends on luminance and u\* value). pub v: f32, } /// Representing a color in cylindrical CIE LCh(uv) color space #[repr(C)] #[derive(Debug, Copy, Clone, Default, PartialOrd)] pub struct LCh { /// The L\* value (achromatic luminance) of the colour in 0–100 range. /// /// This is the same value as in the [`Luv`] object. pub l: f32, /// The C\*_uv value (chroma) of the colour. /// /// Together with h_uv, it defines chromaticity of the colour. The typical /// values of the coordinate go from zero up to around 150 (but exact range /// for ‘valid’ colours depends on luminance and hue). Zero represents /// shade of grey. pub c: f32, /// The h_uv value (hue) of the colour measured in radians. /// /// Together with C\*_uv, it defines chromaticity of the colour. The value /// represents an angle thus it wraps around τ. Typically, the value will /// be in the -π–π range. The value is undefined if C\*_uv is zero. pub h: f32, } use crate::mlaf::mlaf; use crate::{Chromaticity, Lab, Xyz}; use num_traits::Pow; use pxfm::{f_atan2f, f_cbrtf, f_hypotf, f_powf, f_sincosf}; use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign}; pub(crate) const LUV_WHITE_U_PRIME: f32 = 4.0f32 * Chromaticity::D50.to_xyz().y / (Chromaticity::D50.to_xyz().x + 15.0 * Chromaticity::D50.to_xyz().y + 3.0 * Chromaticity::D50.to_xyz().z); pub(crate) const LUV_WHITE_V_PRIME: f32 = 9.0f32 * Chromaticity::D50.to_xyz().y / (Chromaticity::D50.to_xyz().x + 15.0 * Chromaticity::D50.to_xyz().y + 3.0 * Chromaticity::D50.to_xyz().z); pub(crate) const LUV_CUTOFF_FORWARD_Y: f32 = (6f32 / 29f32) * (6f32 / 29f32) * (6f32 / 29f32); pub(crate) const LUV_MULTIPLIER_FORWARD_Y: f32 = (29f32 / 3f32) * (29f32 / 3f32) * (29f32 / 3f32); pub(crate) const LUV_MULTIPLIER_INVERSE_Y: f32 = (3f32 / 29f32) * (3f32 / 29f32) * (3f32 / 29f32); impl Luv { /// Converts CIE XYZ to CIE Luv using D50 white point #[inline] #[allow(clippy::manual_clamp)] pub fn from_xyz(xyz: Xyz) -> Self { let [x, y, z] = [xyz.x, xyz.y, xyz.z]; let den = mlaf(mlaf(x, 15.0, y), 3.0, z); let l = (if y < LUV_CUTOFF_FORWARD_Y { LUV_MULTIPLIER_FORWARD_Y * y } else { 116. * f_cbrtf(y) - 16. }) .min(100.) .max(0.); let (u, v); if den != 0f32 { let u_prime = 4. * x / den; let v_prime = 9. * y / den; u = 13. * l * (u_prime - LUV_WHITE_U_PRIME); v = 13. * l * (v_prime - LUV_WHITE_V_PRIME); } else { u = 0.; v = 0.; } Luv { l, u, v } } /// To [Xyz] using D50 colorimetry #[inline] pub fn to_xyz(&self) -> Xyz { if self.l <= 0. { return Xyz::new(0., 0., 0.); } let l13 = 1. / (13. * self.l); let u = mlaf(LUV_WHITE_U_PRIME, self.u, l13); let v = mlaf(LUV_WHITE_V_PRIME, self.v, l13); let y = if self.l > 8. { let jx = (self.l + 16.) / 116.; jx * jx * jx } else { self.l * LUV_MULTIPLIER_INVERSE_Y }; let (x, z); if v != 0. { let den = 1. / (4. * v); x = y * 9. * u * den; z = y * mlaf(mlaf(12.0, -3.0, u), -20., v) * den; } else { x = 0.; z = 0.; } Xyz::new(x, y, z) } #[inline] pub const fn new(l: f32, u: f32, v: f32) -> Luv { Luv { l, u, v } } } impl LCh { #[inline] pub const fn new(l: f32, c: f32, h: f32) -> Self { LCh { l, c, h } } /// Converts Lab to LCh(uv) #[inline] pub fn from_luv(luv: Luv) -> Self { LCh { l: luv.l, c: f_hypotf(luv.u, luv.v), h: f_atan2f(luv.v, luv.u), } } /// Converts Lab to LCh(ab) #[inline] pub fn from_lab(lab: Lab) -> Self { LCh { l: lab.l, c: f_hypotf(lab.a, lab.b), h: f_atan2f(lab.b, lab.a), } } /// Computes LCh(uv) #[inline] pub fn from_xyz(xyz: Xyz) -> Self { Self::from_luv(Luv::from_xyz(xyz)) } /// Computes LCh(ab) #[inline] pub fn from_xyz_lab(xyz: Xyz) -> Self { Self::from_lab(Lab::from_xyz(xyz)) } /// Converts LCh(uv) to Luv #[inline] pub fn to_xyz(&self) -> Xyz { self.to_luv().to_xyz() } /// Converts LCh(ab) to Lab #[inline] pub fn to_xyz_lab(&self) -> Xyz { self.to_lab().to_xyz() } #[inline] pub fn to_luv(&self) -> Luv { let sincos = f_sincosf(self.h); Luv { l: self.l, u: self.c * sincos.1, v: self.c * sincos.0, } } #[inline] pub fn to_lab(&self) -> Lab { let sincos = f_sincosf(self.h); Lab { l: self.l, a: self.c * sincos.1, b: self.c * sincos.0, } } } impl PartialEq for Luv { /// Compares two colours ignoring chromaticity if L\* is zero. #[inline] fn eq(&self, other: &Self) -> bool { if self.l != other.l { false } else if self.l == 0.0 { true } else { self.u == other.u && self.v == other.v } } } impl PartialEq for LCh { /// Compares two colours ignoring chromaticity if L\* is zero and hue if C\* /// is zero. Hues which are τ apart are compared equal. #[inline] fn eq(&self, other: &Self) -> bool { if self.l != other.l { false } else if self.l == 0.0 { true } else if self.c != other.c { false } else if self.c == 0.0 { true } else { use std::f32::consts::TAU; self.h.rem_euclid(TAU) == other.h.rem_euclid(TAU) } } } impl Luv { #[inline] pub fn euclidean_distance(&self, other: Luv) -> f32 { let dl = self.l - other.l; let du = self.u - other.u; let dv = self.v - other.v; (dl * dl + du * du + dv * dv).sqrt() } } impl LCh { #[inline] pub fn euclidean_distance(&self, other: LCh) -> f32 { let dl = self.l - other.l; let dc = self.c - other.c; let dh = self.h - other.h; (dl * dl + dc * dc + dh * dh).sqrt() } } impl Luv { #[inline] pub const fn taxicab_distance(&self, other: Self) -> f32 { let dl = self.l - other.l; let du = self.u - other.u; let dv = self.v - other.v; dl.abs() + du.abs() + dv.abs() } } impl LCh { #[inline] pub const fn taxicab_distance(&self, other: Self) -> f32 { let dl = self.l - other.l; let dc = self.c - other.c; let dh = self.h - other.h; dl.abs() + dc.abs() + dh.abs() } } impl Add for Luv { type Output = Luv; #[inline] fn add(self, rhs: Luv) -> Luv { Luv::new(self.l + rhs.l, self.u + rhs.u, self.v + rhs.v) } } impl Add for LCh { type Output = LCh; #[inline] fn add(self, rhs: LCh) -> LCh { LCh::new(self.l + rhs.l, self.c + rhs.c, self.h + rhs.h) } } impl Sub for Luv { type Output = Luv; #[inline] fn sub(self, rhs: Luv) -> Luv { Luv::new(self.l - rhs.l, self.u - rhs.u, self.v - rhs.v) } } impl Sub for LCh { type Output = LCh; #[inline] fn sub(self, rhs: LCh) -> LCh { LCh::new(self.l - rhs.l, self.c - rhs.c, self.h - rhs.h) } } impl Mul for Luv { type Output = Luv; #[inline] fn mul(self, rhs: Luv) -> Luv { Luv::new(self.l * rhs.l, self.u * rhs.u, self.v * rhs.v) } } impl Mul for LCh { type Output = LCh; #[inline] fn mul(self, rhs: LCh) -> LCh { LCh::new(self.l * rhs.l, self.c * rhs.c, self.h * rhs.h) } } impl Div for Luv { type Output = Luv; #[inline] fn div(self, rhs: Luv) -> Luv { Luv::new(self.l / rhs.l, self.u / rhs.u, self.v / rhs.v) } } impl Div for LCh { type Output = LCh; #[inline] fn div(self, rhs: LCh) -> LCh { LCh::new(self.l / rhs.l, self.c / rhs.c, self.h / rhs.h) } } impl Add for Luv { type Output = Luv; #[inline] fn add(self, rhs: f32) -> Self::Output { Luv::new(self.l + rhs, self.u + rhs, self.v + rhs) } } impl Add for LCh { type Output = LCh; #[inline] fn add(self, rhs: f32) -> Self::Output { LCh::new(self.l + rhs, self.c + rhs, self.h + rhs) } } impl Sub for Luv { type Output = Luv; #[inline] fn sub(self, rhs: f32) -> Self::Output { Luv::new(self.l - rhs, self.u - rhs, self.v - rhs) } } impl Sub for LCh { type Output = LCh; #[inline] fn sub(self, rhs: f32) -> Self::Output { LCh::new(self.l - rhs, self.c - rhs, self.h - rhs) } } impl Mul for Luv { type Output = Luv; #[inline] fn mul(self, rhs: f32) -> Self::Output { Luv::new(self.l * rhs, self.u * rhs, self.v * rhs) } } impl Mul for LCh { type Output = LCh; #[inline] fn mul(self, rhs: f32) -> Self::Output { LCh::new(self.l * rhs, self.c * rhs, self.h * rhs) } } impl Div for Luv { type Output = Luv; #[inline] fn div(self, rhs: f32) -> Self::Output { Luv::new(self.l / rhs, self.u / rhs, self.v / rhs) } } impl Div for LCh { type Output = LCh; #[inline] fn div(self, rhs: f32) -> Self::Output { LCh::new(self.l / rhs, self.c / rhs, self.h / rhs) } } impl AddAssign for Luv { #[inline] fn add_assign(&mut self, rhs: Luv) { self.l += rhs.l; self.u += rhs.u; self.v += rhs.v; } } impl AddAssign for LCh { #[inline] fn add_assign(&mut self, rhs: LCh) { self.l += rhs.l; self.c += rhs.c; self.h += rhs.h; } } impl SubAssign for Luv { #[inline] fn sub_assign(&mut self, rhs: Luv) { self.l -= rhs.l; self.u -= rhs.u; self.v -= rhs.v; } } impl SubAssign for LCh { #[inline] fn sub_assign(&mut self, rhs: LCh) { self.l -= rhs.l; self.c -= rhs.c; self.h -= rhs.h; } } impl MulAssign for Luv { #[inline] fn mul_assign(&mut self, rhs: Luv) { self.l *= rhs.l; self.u *= rhs.u; self.v *= rhs.v; } } impl MulAssign for LCh { #[inline] fn mul_assign(&mut self, rhs: LCh) { self.l *= rhs.l; self.c *= rhs.c; self.h *= rhs.h; } } impl DivAssign for Luv { #[inline] fn div_assign(&mut self, rhs: Luv) { self.l /= rhs.l; self.u /= rhs.u; self.v /= rhs.v; } } impl DivAssign for LCh { #[inline] fn div_assign(&mut self, rhs: LCh) { self.l /= rhs.l; self.c /= rhs.c; self.h /= rhs.h; } } impl AddAssign for Luv { #[inline] fn add_assign(&mut self, rhs: f32) { self.l += rhs; self.u += rhs; self.v += rhs; } } impl AddAssign for LCh { #[inline] fn add_assign(&mut self, rhs: f32) { self.l += rhs; self.c += rhs; self.h += rhs; } } impl SubAssign for Luv { #[inline] fn sub_assign(&mut self, rhs: f32) { self.l -= rhs; self.u -= rhs; self.v -= rhs; } } impl SubAssign for LCh { #[inline] fn sub_assign(&mut self, rhs: f32) { self.l -= rhs; self.c -= rhs; self.h -= rhs; } } impl MulAssign for Luv { #[inline] fn mul_assign(&mut self, rhs: f32) { self.l *= rhs; self.u *= rhs; self.v *= rhs; } } impl MulAssign for LCh { #[inline] fn mul_assign(&mut self, rhs: f32) { self.l *= rhs; self.c *= rhs; self.h *= rhs; } } impl DivAssign for Luv { #[inline] fn div_assign(&mut self, rhs: f32) { self.l /= rhs; self.u /= rhs; self.v /= rhs; } } impl DivAssign for LCh { #[inline] fn div_assign(&mut self, rhs: f32) { self.l /= rhs; self.c /= rhs; self.h /= rhs; } } impl Neg for LCh { type Output = LCh; #[inline] fn neg(self) -> Self::Output { LCh::new(-self.l, -self.c, -self.h) } } impl Neg for Luv { type Output = Luv; #[inline] fn neg(self) -> Self::Output { Luv::new(-self.l, -self.u, -self.v) } } impl Pow for Luv { type Output = Luv; #[inline] fn pow(self, rhs: f32) -> Self::Output { Luv::new( f_powf(self.l, rhs), f_powf(self.u, rhs), f_powf(self.v, rhs), ) } } impl Pow for LCh { type Output = LCh; #[inline] fn pow(self, rhs: f32) -> Self::Output { LCh::new( f_powf(self.l, rhs), f_powf(self.c, rhs), f_powf(self.h, rhs), ) } } impl Pow for Luv { type Output = Luv; #[inline] fn pow(self, rhs: Luv) -> Self::Output { Luv::new( f_powf(self.l, rhs.l), f_powf(self.u, rhs.u), f_powf(self.v, rhs.v), ) } } impl Pow for LCh { type Output = LCh; #[inline] fn pow(self, rhs: LCh) -> Self::Output { LCh::new( f_powf(self.l, rhs.l), f_powf(self.c, rhs.c), f_powf(self.h, rhs.h), ) } } impl Luv { #[inline] pub fn sqrt(&self) -> Luv { Luv::new(self.l.sqrt(), self.u.sqrt(), self.v.sqrt()) } #[inline] pub fn cbrt(&self) -> Luv { Luv::new(f_cbrtf(self.l), f_cbrtf(self.u), f_cbrtf(self.v)) } } impl LCh { #[inline] pub fn sqrt(&self) -> LCh { LCh::new( if self.l < 0. { 0. } else { self.l.sqrt() }, if self.c < 0. { 0. } else { self.c.sqrt() }, if self.h < 0. { 0. } else { self.h.sqrt() }, ) } #[inline] pub fn cbrt(&self) -> LCh { LCh::new(f_cbrtf(self.l), f_cbrtf(self.c), f_cbrtf(self.h)) } } #[cfg(test)] mod tests { use super::*; #[test] fn round_trip_luv() { let xyz = Xyz::new(0.1, 0.2, 0.3); let lab = Luv::from_xyz(xyz); let rolled_back = lab.to_xyz(); let dx = (xyz.x - rolled_back.x).abs(); let dy = (xyz.y - rolled_back.y).abs(); let dz = (xyz.z - rolled_back.z).abs(); assert!(dx < 1e-5); assert!(dy < 1e-5); assert!(dz < 1e-5); } #[test] fn round_trip_lch() { let xyz = Xyz::new(0.1, 0.2, 0.3); let luv = Luv::from_xyz(xyz); let lab = LCh::from_luv(luv); let rolled_back = lab.to_luv(); let dx = (luv.l - rolled_back.l).abs(); let dy = (luv.u - rolled_back.u).abs(); let dz = (luv.v - rolled_back.v).abs(); assert!(dx < 1e-4); assert!(dy < 1e-4); assert!(dz < 1e-4); } } moxcms-0.7.7/src/matan/curve_shape.rs000064400000000000000000000053331046102023000157120ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ pub(crate) fn is_curve_linear16(curve: &[u16]) -> bool { let scale = 1. / (curve.len() - 1) as f32 * 65535.; for (index, &value) in curve.iter().enumerate() { let quantized = (index as f32 * scale).round() as u16; let diff = (quantized as i32 - value as i32).abs(); if diff > 0x0f { return false; } } true } pub(crate) fn is_curve_descending(v: &[T]) -> bool { if v.is_empty() { return false; } if v.len() == 1 { return false; } v[0] > v[v.len() - 1] } pub(crate) fn is_curve_ascending(v: &[T]) -> bool { if v.is_empty() { return false; } if v.len() == 1 { return false; } v[0] < v[v.len() - 1] } pub(crate) fn is_curve_linear8(curve: &[u8]) -> bool { let scale = 1. / (curve.len() - 1) as f32 * 255.; for (index, &value) in curve.iter().enumerate() { let quantized = (index as f32 * scale).round() as u16; let diff = (quantized as i32 - value as i32).abs(); if diff > 0x03 { return false; } } true } moxcms-0.7.7/src/matan/degeneration.rs000064400000000000000000000053541046102023000160550ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #[derive(Copy, Clone, Default, Debug)] struct DegenerationAmount { leading: usize, trailing: usize, } /// Counts amount of duplicates on each side of curve fn count_leading_trailing_duplicated(lut: &[T]) -> DegenerationAmount { if lut.is_empty() { return DegenerationAmount::default(); } let first = lut.first().unwrap(); let last = lut.last().unwrap(); let leading = lut.iter().take_while(|&v| v.eq(first)).count(); let trailing = lut.iter().rev().take_while(|&v| v.eq(last)).count(); DegenerationAmount { leading, trailing } } /// Finds out if curve is degenerated on the sides. pub(crate) fn is_curve_degenerated(v: &[T]) -> bool { if v.is_empty() || v.len() < 2 { return false; } let degeneration_amount = count_leading_trailing_duplicated(v); if degeneration_amount.trailing <= 1 && degeneration_amount.leading <= 1 { return false; } let leading_percentage = degeneration_amount.leading; let trailing_percentage = degeneration_amount.trailing; ((leading_percentage / 20) > 0) || ((trailing_percentage / 20) > 0) } moxcms-0.7.7/src/matan/discontinuity.rs000064400000000000000000000053271046102023000163160ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use num_traits::AsPrimitive; pub(crate) trait DiscontinuitySpike { const SPIKE: f64; } impl DiscontinuitySpike for u8 { const SPIKE: f64 = 16.0; } impl DiscontinuitySpike for u16 { const SPIKE: f64 = 2100.; } impl DiscontinuitySpike for f32 { const SPIKE: f64 = 0.07; } /// Searches LUT curve for discontinuity pub(crate) fn does_curve_have_discontinuity< T: Copy + PartialEq + DiscontinuitySpike + AsPrimitive + 'static, >( curve: &[T], ) -> bool { if curve.len() < 2 { return false; } let threshold: f64 = T::SPIKE; let mut discontinuities = 0u64; let mut previous_element: f64 = curve[0].as_(); let diff: f64 = (curve[1].as_() - previous_element).abs(); if diff > threshold { discontinuities += 1; } for element in curve.iter().skip(1) { let new_diff: f64 = (element.as_() - previous_element).abs(); if new_diff > threshold { discontinuities += 1; if discontinuities > 3 { break; } } previous_element = element.as_(); } discontinuities > 3 } moxcms-0.7.7/src/matan/mod.rs000064400000000000000000000037341046102023000141700ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ mod curve_shape; mod degeneration; mod discontinuity; mod monotonic; mod slope_limit; pub(crate) use curve_shape::{ is_curve_ascending, is_curve_descending, is_curve_linear8, is_curve_linear16, }; pub(crate) use degeneration::is_curve_degenerated; pub(crate) use discontinuity::does_curve_have_discontinuity; pub(crate) use monotonic::is_curve_monotonic; moxcms-0.7.7/src/matan/monotonic.rs000064400000000000000000000045331046102023000154140ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::matan::is_curve_ascending; /// Finds out if curve is monotonic. pub(crate) fn is_curve_monotonic(lut: &[T]) -> bool { if lut.len() < 2 { return true; } let is_ascending = is_curve_ascending(lut); let mut violations = 0usize; if is_ascending { for (current, previous) in lut.iter().skip(1).zip(lut.iter().take(lut.len() - 1)) { if current.lt(previous) { violations += 1; } } } else { for (current, previous) in lut.iter().skip(1).zip(lut.iter().take(lut.len() - 1)) { if current.gt(previous) { violations += 1; } } } (violations as f64 / lut.len() as f64) < 0.05 } moxcms-0.7.7/src/matan/slope_limit.rs000064400000000000000000000063371046102023000157330ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #![allow(dead_code)] use crate::PointeeSizeExpressible; use crate::matan::is_curve_descending; use num_traits::AsPrimitive; pub(crate) fn limit_slope + PartialOrd + PointeeSizeExpressible>( curve: &mut [T], value_cap: f32, ) where f32: AsPrimitive, { let at_begin = (curve.len() as f32 * 0.02 + 0.5).floor() as usize; // Cutoff at 2% if at_begin == 0 { return; } let at_end = curve.len() - at_begin - 1; // And 98% let (begin_val, end_val) = if is_curve_descending(curve) { (value_cap, 0.) } else { (0., value_cap) }; let val = curve[at_begin].as_(); let slope = (val - begin_val) / at_begin as f32; let beta = val - slope * at_begin as f32; if T::FINITE { for v in curve.iter_mut().take(at_begin) { *v = (v.as_() * slope + beta) .round() .min(value_cap) .max(0.0) .as_(); } } else { for v in curve.iter_mut().take(at_begin) { *v = (v.as_() * slope + beta).min(value_cap).max(0.0).as_(); } } let val = curve[at_end].as_(); let slope = (end_val - val) / at_begin as f32; let beta = val - slope * at_end as f32; if T::FINITE { for v in curve.iter_mut().skip(at_end) { *v = (v.as_() * slope + beta) .round() .min(value_cap) .max(0.0) .as_(); } } else { for v in curve.iter_mut().skip(at_end) { *v = (v.as_() * slope + beta).min(value_cap).max(0.0).as_(); } } } moxcms-0.7.7/src/math/mod.rs000064400000000000000000000047001046102023000140130ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #![allow(clippy::approx_constant, clippy::manual_range_contains)] use num_traits::Num; #[inline(always)] pub const fn rounding_div_ceil(value: i32, div: i32) -> i32 { (value + div - 1) / div } // Generic function for max #[inline(always)] pub(crate) fn m_max(a: T, b: T) -> T { if a > b { a } else { b } } // Generic function for min #[inline(always)] pub(crate) fn m_min(a: T, b: T) -> T { if a < b { a } else { b } } #[inline] pub(crate) fn m_clamp(a: T, min: T, max: T) -> T { if a > max { max } else if a >= min { a } else { // a < min or a is NaN min } } pub trait FusedMultiplyAdd { fn mla(&self, b: T, c: T) -> T; } pub(crate) trait FusedMultiplyNegAdd { fn neg_mla(&self, b: T, c: T) -> T; } moxcms-0.7.7/src/matrix.rs000064400000000000000000001036571046102023000136220ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd}; use crate::mlaf::{mlaf, neg_mlaf}; use crate::reader::s15_fixed16_number_to_double; use num_traits::{AsPrimitive, MulAdd}; use std::ops::{Add, Div, Mul, Neg, Shr, Sub}; /// Vector math helper #[repr(transparent)] #[derive(Copy, Clone, Debug, Default)] pub struct Vector3 { pub v: [T; 3], } /// Vector math helper #[repr(transparent)] #[derive(Copy, Clone, Debug, Default)] pub struct Vector4 { pub v: [T; 4], } pub type Vector4f = Vector4; pub type Vector4d = Vector4; pub type Vector4i = Vector4; pub type Vector3f = Vector3; pub type Vector3d = Vector3; pub type Vector3i = Vector3; pub type Vector3u = Vector3; impl PartialEq for Vector3 where T: AsPrimitive, { #[inline(always)] fn eq(&self, other: &Self) -> bool { const TOLERANCE: f32 = 0.0001f32; let dx = (self.v[0].as_() - other.v[0].as_()).abs(); let dy = (self.v[1].as_() - other.v[1].as_()).abs(); let dz = (self.v[2].as_() - other.v[2].as_()).abs(); dx < TOLERANCE && dy < TOLERANCE && dz < TOLERANCE } } impl Vector3 { #[inline(always)] pub fn to_(self) -> Vector3 where T: AsPrimitive, { Vector3 { v: [self.v[0].as_(), self.v[1].as_(), self.v[2].as_()], } } } impl Mul> for Vector3 where T: Mul + Copy, { type Output = Vector3; #[inline(always)] fn mul(self, rhs: Vector3) -> Self::Output { Self { v: [ self.v[0] * rhs.v[0], self.v[1] * rhs.v[1], self.v[2] * rhs.v[2], ], } } } impl Shr for Vector3 where T: Shr, { type Output = Vector3; fn shr(self, rhs: i32) -> Self::Output { Self { v: [self.v[0] >> rhs, self.v[1] >> rhs, self.v[2] >> rhs], } } } impl Shr for Vector4 where T: Shr, { type Output = Vector4; fn shr(self, rhs: i32) -> Self::Output { Self { v: [ self.v[0] >> rhs, self.v[1] >> rhs, self.v[2] >> rhs, self.v[3] >> rhs, ], } } } impl Mul> for Vector4 where T: Mul + Copy, { type Output = Vector4; #[inline(always)] fn mul(self, rhs: Vector4) -> Self::Output { Self { v: [ self.v[0] * rhs.v[0], self.v[1] * rhs.v[1], self.v[2] * rhs.v[2], self.v[3] * rhs.v[3], ], } } } impl Mul for Vector3 where T: Mul + Copy, { type Output = Vector3; #[inline(always)] fn mul(self, rhs: T) -> Self::Output { Self { v: [self.v[0] * rhs, self.v[1] * rhs, self.v[2] * rhs], } } } impl Vector3 { #[inline(always)] const fn const_mul_vector(self, v: Vector3f) -> Vector3f { Vector3f { v: [self.v[0] * v.v[0], self.v[1] * v.v[1], self.v[2] * v.v[2]], } } } impl Vector3d { #[inline(always)] const fn const_mul_vector(self, v: Vector3d) -> Vector3d { Vector3d { v: [self.v[0] * v.v[0], self.v[1] * v.v[1], self.v[2] * v.v[2]], } } } impl Vector3 { pub fn cast(&self) -> Vector3 where T: AsPrimitive, { Vector3:: { v: [self.v[0].as_(), self.v[1].as_(), self.v[2].as_()], } } } impl Mul for Vector4 where T: Mul + Copy, { type Output = Vector4; #[inline(always)] fn mul(self, rhs: T) -> Self::Output { Self { v: [ self.v[0] * rhs, self.v[1] * rhs, self.v[2] * rhs, self.v[3] * rhs, ], } } } impl + Add + MulAdd> FusedMultiplyAdd> for Vector3 { #[inline(always)] fn mla(&self, b: Vector3, c: Vector3) -> Vector3 { let x0 = mlaf(self.v[0], b.v[0], c.v[0]); let x1 = mlaf(self.v[1], b.v[1], c.v[1]); let x2 = mlaf(self.v[2], b.v[2], c.v[2]); Vector3 { v: [x0, x1, x2] } } } impl + Add + MulAdd + Neg> FusedMultiplyNegAdd> for Vector3 { #[inline(always)] fn neg_mla(&self, b: Vector3, c: Vector3) -> Vector3 { let x0 = neg_mlaf(self.v[0], b.v[0], c.v[0]); let x1 = neg_mlaf(self.v[1], b.v[1], c.v[1]); let x2 = neg_mlaf(self.v[2], b.v[2], c.v[2]); Vector3 { v: [x0, x1, x2] } } } impl + Add + MulAdd> FusedMultiplyAdd> for Vector4 { #[inline(always)] fn mla(&self, b: Vector4, c: Vector4) -> Vector4 { let x0 = mlaf(self.v[0], b.v[0], c.v[0]); let x1 = mlaf(self.v[1], b.v[1], c.v[1]); let x2 = mlaf(self.v[2], b.v[2], c.v[2]); let x3 = mlaf(self.v[3], b.v[3], c.v[3]); Vector4 { v: [x0, x1, x2, x3], } } } impl + Add + MulAdd + Neg> FusedMultiplyNegAdd> for Vector4 { #[inline(always)] fn neg_mla(&self, b: Vector4, c: Vector4) -> Vector4 { let x0 = neg_mlaf(self.v[0], b.v[0], c.v[0]); let x1 = neg_mlaf(self.v[1], b.v[1], c.v[1]); let x2 = neg_mlaf(self.v[2], b.v[2], c.v[2]); let x3 = neg_mlaf(self.v[3], b.v[3], c.v[3]); Vector4 { v: [x0, x1, x2, x3], } } } impl From for Vector3 where T: Copy, { fn from(value: T) -> Self { Self { v: [value, value, value], } } } impl From for Vector4 where T: Copy, { fn from(value: T) -> Self { Self { v: [value, value, value, value], } } } impl Add> for Vector3 where T: Add + Copy, { type Output = Vector3; #[inline(always)] fn add(self, rhs: Vector3) -> Self::Output { Self { v: [ self.v[0] + rhs.v[0], self.v[1] + rhs.v[1], self.v[2] + rhs.v[2], ], } } } impl Add> for Vector4 where T: Add + Copy, { type Output = Vector4; #[inline(always)] fn add(self, rhs: Vector4) -> Self::Output { Self { v: [ self.v[0] + rhs.v[0], self.v[1] + rhs.v[1], self.v[2] + rhs.v[2], self.v[3] + rhs.v[3], ], } } } impl Add for Vector3 where T: Add + Copy, { type Output = Vector3; #[inline(always)] fn add(self, rhs: T) -> Self::Output { Self { v: [self.v[0] + rhs, self.v[1] + rhs, self.v[2] + rhs], } } } impl Add for Vector4 where T: Add + Copy, { type Output = Vector4; #[inline(always)] fn add(self, rhs: T) -> Self::Output { Self { v: [ self.v[0] + rhs, self.v[1] + rhs, self.v[2] + rhs, self.v[3] + rhs, ], } } } impl Sub> for Vector3 where T: Sub + Copy, { type Output = Vector3; #[inline(always)] fn sub(self, rhs: Vector3) -> Self::Output { Self { v: [ self.v[0] - rhs.v[0], self.v[1] - rhs.v[1], self.v[2] - rhs.v[2], ], } } } impl Sub> for Vector4 where T: Sub + Copy, { type Output = Vector4; #[inline(always)] fn sub(self, rhs: Vector4) -> Self::Output { Self { v: [ self.v[0] - rhs.v[0], self.v[1] - rhs.v[1], self.v[2] - rhs.v[2], self.v[3] - rhs.v[3], ], } } } /// Matrix math helper #[repr(C)] #[derive(Copy, Clone, Debug, Default)] pub struct Matrix3f { pub v: [[f32; 3]; 3], } /// Matrix math helper #[repr(C)] #[derive(Copy, Clone, Debug, Default)] pub struct Matrix3d { pub v: [[f64; 3]; 3], } #[repr(C)] #[derive(Copy, Clone, Debug, Default)] pub struct Matrix3 { pub v: [[T; 3]; 3], } impl Matrix3 { #[inline] #[allow(dead_code)] pub(crate) fn transpose(&self) -> Matrix3 { Matrix3 { v: [ [self.v[0][0], self.v[1][0], self.v[2][0]], [self.v[0][1], self.v[1][1], self.v[2][1]], [self.v[0][2], self.v[1][2], self.v[2][2]], ], } } } #[repr(C)] #[derive(Copy, Clone, Debug, Default)] pub struct Matrix4f { pub v: [[f32; 4]; 4], } pub const SRGB_MATRIX: Matrix3d = Matrix3d { v: [ [ s15_fixed16_number_to_double(0x6FA2), s15_fixed16_number_to_double(0x6299), s15_fixed16_number_to_double(0x24A0), ], [ s15_fixed16_number_to_double(0x38F5), s15_fixed16_number_to_double(0xB785), s15_fixed16_number_to_double(0x0F84), ], [ s15_fixed16_number_to_double(0x0390), s15_fixed16_number_to_double(0x18DA), s15_fixed16_number_to_double(0xB6CF), ], ], }; pub const DISPLAY_P3_MATRIX: Matrix3d = Matrix3d { v: [ [0.515102, 0.291965, 0.157153], [0.241182, 0.692236, 0.0665819], [-0.00104941, 0.0418818, 0.784378], ], }; pub const BT2020_MATRIX: Matrix3d = Matrix3d { v: [ [0.673459, 0.165661, 0.125100], [0.279033, 0.675338, 0.0456288], [-0.00193139, 0.0299794, 0.797162], ], }; impl Matrix4f { #[inline] pub fn determinant(&self) -> Option { let a = self.v[0][0]; let b = self.v[0][1]; let c = self.v[0][2]; let d = self.v[0][3]; // Cofactor expansion let m11 = Matrix3f { v: [ [self.v[1][1], self.v[1][2], self.v[1][3]], [self.v[2][1], self.v[2][2], self.v[2][3]], [self.v[3][1], self.v[3][2], self.v[3][3]], ], }; let m12 = Matrix3f { v: [ [self.v[1][0], self.v[1][2], self.v[1][3]], [self.v[2][0], self.v[2][2], self.v[2][3]], [self.v[3][0], self.v[3][2], self.v[3][3]], ], }; let m13 = Matrix3f { v: [ [self.v[1][0], self.v[1][1], self.v[1][3]], [self.v[2][0], self.v[2][1], self.v[2][3]], [self.v[3][0], self.v[3][1], self.v[3][3]], ], }; let m14 = Matrix3f { v: [ [self.v[1][0], self.v[1][1], self.v[1][2]], [self.v[2][0], self.v[2][1], self.v[2][2]], [self.v[3][0], self.v[3][1], self.v[3][2]], ], }; let m1_det = m11.determinant()?; let m2_det = m12.determinant()?; let m3_det = m13.determinant()?; let m4_det = m14.determinant()?; // Apply cofactor expansion on the first row Some(a * m1_det - b * m2_det + c * m3_det - d * m4_det) } } impl Matrix3f { #[inline] pub fn transpose(&self) -> Matrix3f { Matrix3f { v: [ [self.v[0][0], self.v[1][0], self.v[2][0]], [self.v[0][1], self.v[1][1], self.v[2][1]], [self.v[0][2], self.v[1][2], self.v[2][2]], ], } } pub const IDENTITY: Matrix3f = Matrix3f { v: [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], }; #[inline] pub const fn test_equality(&self, other: Matrix3f) -> bool { const TOLERANCE: f32 = 0.001f32; let diff_r_x = (self.v[0][0] - other.v[0][0]).abs(); let diff_r_y = (self.v[0][1] - other.v[0][1]).abs(); let diff_r_z = (self.v[0][2] - other.v[0][2]).abs(); if diff_r_x > TOLERANCE || diff_r_y > TOLERANCE || diff_r_z > TOLERANCE { return false; } let diff_g_x = (self.v[1][0] - other.v[1][0]).abs(); let diff_g_y = (self.v[1][1] - other.v[1][1]).abs(); let diff_g_z = (self.v[1][2] - other.v[1][2]).abs(); if diff_g_x > TOLERANCE || diff_g_y > TOLERANCE || diff_g_z > TOLERANCE { return false; } let diff_b_x = (self.v[2][0] - other.v[2][0]).abs(); let diff_b_y = (self.v[2][1] - other.v[2][1]).abs(); let diff_b_z = (self.v[2][2] - other.v[2][2]).abs(); if diff_b_x > TOLERANCE || diff_b_y > TOLERANCE || diff_b_z > TOLERANCE { return false; } true } #[inline] pub const fn determinant(&self) -> Option { let v = self.v; let a0 = v[0][0] * v[1][1] * v[2][2]; let a1 = v[0][1] * v[1][2] * v[2][0]; let a2 = v[0][2] * v[1][0] * v[2][1]; let s0 = v[0][2] * v[1][1] * v[2][0]; let s1 = v[0][1] * v[1][0] * v[2][2]; let s2 = v[0][0] * v[1][2] * v[2][1]; let j = a0 + a1 + a2 - s0 - s1 - s2; if j == 0. { return None; } Some(j) } #[inline] pub const fn inverse(&self) -> Self { let v = self.v; let det = self.determinant(); match det { None => Matrix3f::IDENTITY, Some(determinant) => { let det = 1. / determinant; let a = v[0][0]; let b = v[0][1]; let c = v[0][2]; let d = v[1][0]; let e = v[1][1]; let f = v[1][2]; let g = v[2][0]; let h = v[2][1]; let i = v[2][2]; Matrix3f { v: [ [ (e * i - f * h) * det, (c * h - b * i) * det, (b * f - c * e) * det, ], [ (f * g - d * i) * det, (a * i - c * g) * det, (c * d - a * f) * det, ], [ (d * h - e * g) * det, (b * g - a * h) * det, (a * e - b * d) * det, ], ], } } } } #[inline] pub fn mul_row(&self, rhs: f32) -> Self { if R == 0 { Self { v: [(Vector3f { v: self.v[0] } * rhs).v, self.v[1], self.v[2]], } } else if R == 1 { Self { v: [self.v[0], (Vector3f { v: self.v[1] } * rhs).v, self.v[2]], } } else if R == 2 { Self { v: [self.v[0], self.v[1], (Vector3f { v: self.v[2] } * rhs).v], } } else { unimplemented!() } } #[inline] pub const fn mul_row_vector(&self, rhs: Vector3f) -> Self { if R == 0 { Self { v: [ (Vector3f { v: self.v[0] }.const_mul_vector(rhs)).v, self.v[1], self.v[2], ], } } else if R == 1 { Self { v: [ self.v[0], (Vector3f { v: self.v[1] }.const_mul_vector(rhs)).v, self.v[2], ], } } else if R == 2 { Self { v: [ self.v[0], self.v[1], (Vector3f { v: self.v[2] }.const_mul_vector(rhs)).v, ], } } else { unimplemented!() } } #[inline] pub const fn mul_vector(&self, other: Vector3f) -> Vector3f { let x = self.v[0][1] * other.v[1] + self.v[0][2] * other.v[2] + self.v[0][0] * other.v[0]; let y = self.v[1][0] * other.v[0] + self.v[1][1] * other.v[1] + self.v[1][2] * other.v[2]; let z = self.v[2][0] * other.v[0] + self.v[2][1] * other.v[1] + self.v[2][2] * other.v[2]; Vector3f { v: [x, y, z] } } /// Multiply using FMA #[inline] pub fn f_mul_vector(&self, other: Vector3f) -> Vector3f { let x = mlaf( mlaf(self.v[0][1] * other.v[1], self.v[0][2], other.v[2]), self.v[0][0], other.v[0], ); let y = mlaf( mlaf(self.v[1][0] * other.v[0], self.v[1][1], other.v[1]), self.v[1][2], other.v[2], ); let z = mlaf( mlaf(self.v[2][0] * other.v[0], self.v[2][1], other.v[1]), self.v[2][2], other.v[2], ); Vector3f { v: [x, y, z] } } #[inline] pub fn mat_mul(&self, other: Matrix3f) -> Self { let mut result = Matrix3f::default(); for i in 0..3 { for j in 0..3 { result.v[i][j] = mlaf( mlaf(self.v[i][0] * other.v[0][j], self.v[i][1], other.v[1][j]), self.v[i][2], other.v[2][j], ); } } result } #[inline] pub const fn mat_mul_const(&self, other: Matrix3f) -> Self { let mut result = Matrix3f { v: [[0f32; 3]; 3] }; let mut i = 0usize; while i < 3 { let mut j = 0usize; while j < 3 { result.v[i][j] = self.v[i][0] * other.v[0][j] + self.v[i][1] * other.v[1][j] + self.v[i][2] * other.v[2][j]; j += 1; } i += 1; } result } #[inline] pub const fn to_f64(&self) -> Matrix3d { Matrix3d { v: [ [ self.v[0][0] as f64, self.v[0][1] as f64, self.v[0][2] as f64, ], [ self.v[1][0] as f64, self.v[1][1] as f64, self.v[1][2] as f64, ], [ self.v[2][0] as f64, self.v[2][1] as f64, self.v[2][2] as f64, ], ], } } } impl Matrix3d { #[inline] pub fn transpose(&self) -> Matrix3d { Matrix3d { v: [ [self.v[0][0], self.v[1][0], self.v[2][0]], [self.v[0][1], self.v[1][1], self.v[2][1]], [self.v[0][2], self.v[1][2], self.v[2][2]], ], } } pub const IDENTITY: Matrix3d = Matrix3d { v: [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], }; #[inline] pub const fn test_equality(&self, other: Matrix3d) -> bool { const TOLERANCE: f64 = 0.001f64; let diff_r_x = (self.v[0][0] - other.v[0][0]).abs(); let diff_r_y = (self.v[0][1] - other.v[0][1]).abs(); let diff_r_z = (self.v[0][2] - other.v[0][2]).abs(); if diff_r_x > TOLERANCE || diff_r_y > TOLERANCE || diff_r_z > TOLERANCE { return false; } let diff_g_x = (self.v[1][0] - other.v[1][0]).abs(); let diff_g_y = (self.v[1][1] - other.v[1][1]).abs(); let diff_g_z = (self.v[1][2] - other.v[1][2]).abs(); if diff_g_x > TOLERANCE || diff_g_y > TOLERANCE || diff_g_z > TOLERANCE { return false; } let diff_b_x = (self.v[2][0] - other.v[2][0]).abs(); let diff_b_y = (self.v[2][1] - other.v[2][1]).abs(); let diff_b_z = (self.v[2][2] - other.v[2][2]).abs(); if diff_b_x > TOLERANCE || diff_b_y > TOLERANCE || diff_b_z > TOLERANCE { return false; } true } #[inline] pub const fn determinant(&self) -> Option { let v = self.v; let a0 = v[0][0] * v[1][1] * v[2][2]; let a1 = v[0][1] * v[1][2] * v[2][0]; let a2 = v[0][2] * v[1][0] * v[2][1]; let s0 = v[0][2] * v[1][1] * v[2][0]; let s1 = v[0][1] * v[1][0] * v[2][2]; let s2 = v[0][0] * v[1][2] * v[2][1]; let j = a0 + a1 + a2 - s0 - s1 - s2; if j == 0. { return None; } Some(j) } #[inline] pub const fn inverse(&self) -> Self { let v = self.v; let det = self.determinant(); match det { None => Matrix3d::IDENTITY, Some(determinant) => { let det = 1. / determinant; let a = v[0][0]; let b = v[0][1]; let c = v[0][2]; let d = v[1][0]; let e = v[1][1]; let f = v[1][2]; let g = v[2][0]; let h = v[2][1]; let i = v[2][2]; Matrix3d { v: [ [ (e * i - f * h) * det, (c * h - b * i) * det, (b * f - c * e) * det, ], [ (f * g - d * i) * det, (a * i - c * g) * det, (c * d - a * f) * det, ], [ (d * h - e * g) * det, (b * g - a * h) * det, (a * e - b * d) * det, ], ], } } } } #[inline] pub fn mul_row(&self, rhs: f64) -> Self { if R == 0 { Self { v: [(Vector3d { v: self.v[0] } * rhs).v, self.v[1], self.v[2]], } } else if R == 1 { Self { v: [self.v[0], (Vector3d { v: self.v[1] } * rhs).v, self.v[2]], } } else if R == 2 { Self { v: [self.v[0], self.v[1], (Vector3d { v: self.v[2] } * rhs).v], } } else { unimplemented!() } } #[inline] pub const fn mul_row_vector(&self, rhs: Vector3d) -> Self { if R == 0 { Self { v: [ (Vector3d { v: self.v[0] }.const_mul_vector(rhs)).v, self.v[1], self.v[2], ], } } else if R == 1 { Self { v: [ self.v[0], (Vector3d { v: self.v[1] }.const_mul_vector(rhs)).v, self.v[2], ], } } else if R == 2 { Self { v: [ self.v[0], self.v[1], (Vector3d { v: self.v[2] }.const_mul_vector(rhs)).v, ], } } else { unimplemented!() } } #[inline] pub const fn mul_vector(&self, other: Vector3d) -> Vector3d { let x = self.v[0][1] * other.v[1] + self.v[0][2] * other.v[2] + self.v[0][0] * other.v[0]; let y = self.v[1][0] * other.v[0] + self.v[1][1] * other.v[1] + self.v[1][2] * other.v[2]; let z = self.v[2][0] * other.v[0] + self.v[2][1] * other.v[1] + self.v[2][2] * other.v[2]; Vector3:: { v: [x, y, z] } } #[inline] pub fn mat_mul(&self, other: Matrix3d) -> Self { let mut result = Matrix3d::default(); for i in 0..3 { for j in 0..3 { result.v[i][j] = mlaf( mlaf(self.v[i][0] * other.v[0][j], self.v[i][1], other.v[1][j]), self.v[i][2], other.v[2][j], ); } } result } #[inline] pub const fn mat_mul_const(&self, other: Matrix3d) -> Self { let mut result = Matrix3d { v: [[0.; 3]; 3] }; let mut i = 0usize; while i < 3 { let mut j = 0usize; while j < 3 { result.v[i][j] = self.v[i][0] * other.v[0][j] + self.v[i][1] * other.v[1][j] + self.v[i][2] * other.v[2][j]; j += 1; } i += 1; } result } #[inline] pub const fn to_f32(&self) -> Matrix3f { Matrix3f { v: [ [ self.v[0][0] as f32, self.v[0][1] as f32, self.v[0][2] as f32, ], [ self.v[1][0] as f32, self.v[1][1] as f32, self.v[1][2] as f32, ], [ self.v[2][0] as f32, self.v[2][1] as f32, self.v[2][2] as f32, ], ], } } } impl Mul for Matrix3f { type Output = Matrix3f; #[inline] fn mul(self, rhs: Matrix3f) -> Self::Output { self.mat_mul(rhs) } } impl Mul for Matrix3d { type Output = Matrix3d; #[inline] fn mul(self, rhs: Matrix3d) -> Self::Output { self.mat_mul(rhs) } } /// Holds CIE XYZ representation #[repr(C)] #[derive(Clone, Debug, Copy, Default)] pub struct Xyz { pub x: f32, pub y: f32, pub z: f32, } impl Xyz { #[inline] pub fn to_xyy(&self) -> [f32; 3] { let sums = self.x + self.y + self.z; if sums == 0. { return [0., 0., self.y]; } let x = self.x / sums; let y = self.y / sums; let yb = self.y; [x, y, yb] } #[inline] pub fn from_xyy(xyy: [f32; 3]) -> Xyz { let reciprocal = if xyy[1] != 0. { 1. / xyy[1] * xyy[2] } else { 0. }; let x = xyy[0] * reciprocal; let y = xyy[2]; let z = (1. - xyy[0] - xyy[1]) * reciprocal; Xyz { x, y, z } } } /// Holds CIE XYZ representation, in double precision #[repr(C)] #[derive(Clone, Debug, Copy, Default)] pub struct Xyzd { pub x: f64, pub y: f64, pub z: f64, } macro_rules! define_xyz { ($xyz_name:ident, $im_type: ident, $matrix: ident) => { impl PartialEq for $xyz_name { #[inline] fn eq(&self, other: &Self) -> bool { const TOLERANCE: $im_type = 0.0001; let dx = (self.x - other.x).abs(); let dy = (self.y - other.y).abs(); let dz = (self.z - other.z).abs(); dx < TOLERANCE && dy < TOLERANCE && dz < TOLERANCE } } impl $xyz_name { #[inline] pub const fn new(x: $im_type, y: $im_type, z: $im_type) -> Self { Self { x, y, z } } #[inline] pub const fn to_vector(self) -> Vector3f { Vector3f { v: [self.x as f32, self.y as f32, self.z as f32], } } #[inline] pub const fn to_vector_d(self) -> Vector3d { Vector3d { v: [self.x as f64, self.y as f64, self.z as f64], } } #[inline] pub fn matrix_mul(&self, matrix: $matrix) -> Self { let x = mlaf( mlaf(self.x * matrix.v[0][0], self.y, matrix.v[0][1]), self.z, matrix.v[0][2], ); let y = mlaf( mlaf(self.x * matrix.v[1][0], self.y, matrix.v[1][1]), self.z, matrix.v[1][2], ); let z = mlaf( mlaf(self.x * matrix.v[2][0], self.y, matrix.v[2][1]), self.z, matrix.v[2][2], ); Self::new(x, y, z) } #[inline] pub fn from_linear_rgb(rgb: crate::Rgb<$im_type>, rgb_to_xyz: $matrix) -> Self { let r = rgb.r; let g = rgb.g; let b = rgb.b; let transform = rgb_to_xyz; let new_r = mlaf( mlaf(r * transform.v[0][0], g, transform.v[0][1]), b, transform.v[0][2], ); let new_g = mlaf( mlaf(r * transform.v[1][0], g, transform.v[1][1]), b, transform.v[1][2], ); let new_b = mlaf( mlaf(r * transform.v[2][0], g, transform.v[2][1]), b, transform.v[2][2], ); $xyz_name::new(new_r, new_g, new_b) } #[inline] pub fn normalize(self) -> Self { if self.y == 0. { return Self { x: 0., y: 1.0, z: 0.0, }; } let reciprocal = 1. / self.y; Self { x: self.x * reciprocal, y: 1.0, z: self.z * reciprocal, } } #[inline] pub fn to_linear_rgb(self, rgb_to_xyz: $matrix) -> crate::Rgb<$im_type> { let x = self.x; let y = self.y; let z = self.z; let transform = rgb_to_xyz; let new_r = mlaf( mlaf(x * transform.v[0][0], y, transform.v[0][1]), z, transform.v[0][2], ); let new_g = mlaf( mlaf(x * transform.v[1][0], y, transform.v[1][1]), z, transform.v[1][2], ); let new_b = mlaf( mlaf(x * transform.v[2][0], y, transform.v[2][1]), z, transform.v[2][2], ); crate::Rgb::<$im_type>::new(new_r, new_g, new_b) } } impl Mul<$im_type> for $xyz_name { type Output = $xyz_name; #[inline] fn mul(self, rhs: $im_type) -> Self::Output { Self { x: self.x * rhs, y: self.y * rhs, z: self.z * rhs, } } } impl Mul<$matrix> for $xyz_name { type Output = $xyz_name; #[inline] fn mul(self, rhs: $matrix) -> Self::Output { self.matrix_mul(rhs) } } impl Mul<$xyz_name> for $xyz_name { type Output = $xyz_name; #[inline] fn mul(self, rhs: $xyz_name) -> Self::Output { Self { x: self.x * rhs.x, y: self.y * rhs.y, z: self.z * rhs.z, } } } impl Div<$xyz_name> for $xyz_name { type Output = $xyz_name; #[inline] fn div(self, rhs: $xyz_name) -> Self::Output { Self { x: self.x / rhs.x, y: self.y / rhs.y, z: self.z / rhs.z, } } } impl Div<$im_type> for $xyz_name { type Output = $xyz_name; #[inline] fn div(self, rhs: $im_type) -> Self::Output { Self { x: self.x / rhs, y: self.y / rhs, z: self.z / rhs, } } } }; } impl Xyz { pub fn to_xyzd(self) -> Xyzd { Xyzd { x: self.x as f64, y: self.y as f64, z: self.z as f64, } } } impl Xyzd { pub fn to_xyz(self) -> Xyz { Xyz { x: self.x as f32, y: self.y as f32, z: self.z as f32, } } pub fn to_xyzd(self) -> Xyzd { Xyzd { x: self.x, y: self.y, z: self.z, } } } define_xyz!(Xyz, f32, Matrix3f); define_xyz!(Xyzd, f64, Matrix3d); moxcms-0.7.7/src/mlaf.rs000064400000000000000000000053751046102023000132330ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use num_traits::MulAdd; use std::ops::{Add, Mul, Neg}; #[cfg(any( all( any(target_arch = "x86", target_arch = "x86_64"), target_feature = "fma" ), all(target_arch = "aarch64", target_feature = "neon") ))] #[inline(always)] pub(crate) fn mlaf + Add + MulAdd>( acc: T, a: T, b: T, ) -> T { MulAdd::mul_add(a, b, acc) } #[inline(always)] #[cfg(not(any( all( any(target_arch = "x86", target_arch = "x86_64"), target_feature = "fma" ), all(target_arch = "aarch64", target_feature = "neon") )))] pub(crate) fn mlaf + Add + MulAdd>( acc: T, a: T, b: T, ) -> T { acc + a * b } #[inline(always)] pub(crate) fn neg_mlaf< T: Copy + Mul + Add + MulAdd + Neg, >( acc: T, a: T, b: T, ) -> T { mlaf(acc, a, -b) } #[inline(always)] pub(crate) fn fmla + Add + MulAdd>( a: T, b: T, acc: T, ) -> T { mlaf(acc, a, b) } moxcms-0.7.7/src/nd_array.rs000064400000000000000000001164321046102023000141100ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd}; use crate::mlaf::{mlaf, neg_mlaf}; use crate::{Vector3f, Vector4f}; use std::ops::{Add, Mul, Sub}; impl FusedMultiplyAdd for f32 { #[inline(always)] fn mla(&self, b: f32, c: f32) -> f32 { mlaf(*self, b, c) } } impl FusedMultiplyNegAdd for f32 { #[inline(always)] fn neg_mla(&self, b: f32, c: f32) -> f32 { neg_mlaf(*self, b, c) } } #[inline(always)] pub(crate) fn lerp< T: Mul + Sub + Add + From + Copy + FusedMultiplyAdd + FusedMultiplyNegAdd, >( a: T, b: T, t: T, ) -> T { a.neg_mla(a, t).mla(b, t) } /// 4D CLUT helper. /// /// Represents hypercube. pub struct Hypercube<'a> { array: &'a [f32], x_stride: u32, y_stride: u32, z_stride: u32, grid_size: [u8; 4], } trait Fetcher4 { fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> T; } impl Hypercube<'_> { pub fn new(array: &[f32], grid_size: usize) -> Hypercube<'_> { let z_stride = grid_size as u32; let y_stride = z_stride * z_stride; let x_stride = z_stride * z_stride * z_stride; Hypercube { array, x_stride, y_stride, z_stride, grid_size: [ grid_size as u8, grid_size as u8, grid_size as u8, grid_size as u8, ], } } pub fn new_hypercube(array: &[f32], grid_size: [u8; 4]) -> Hypercube<'_> { let z_stride = grid_size[3] as u32; let y_stride = z_stride * grid_size[2] as u32; let x_stride = y_stride * grid_size[1] as u32; Hypercube { array, x_stride, y_stride, z_stride, grid_size, } } } struct Fetch4Vec3<'a> { array: &'a [f32], x_stride: u32, y_stride: u32, z_stride: u32, } struct Fetch4Vec4<'a> { array: &'a [f32], x_stride: u32, y_stride: u32, z_stride: u32, } impl Fetcher4 for Fetch4Vec3<'_> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> Vector3f { let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32 * self.z_stride + w as u32) as usize * 3; let k = &self.array[start..start + 3]; Vector3f { v: [k[0], k[1], k[2]], } } } impl Fetcher4 for Fetch4Vec4<'_> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> Vector4f { let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32 * self.z_stride + w as u32) as usize * 4; let k = &self.array[start..start + 4]; Vector4f { v: [k[0], k[1], k[2], k[3]], } } } impl Hypercube<'_> { #[inline(always)] fn quadlinear< T: From + Add + Mul + FusedMultiplyAdd + Sub + Copy + FusedMultiplyNegAdd, >( &self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32, r: impl Fetcher4, ) -> T { let lin_x = lin_x.max(0.0).min(1.0); let lin_y = lin_y.max(0.0).min(1.0); let lin_z = lin_z.max(0.0).min(1.0); let lin_w = lin_w.max(0.0).min(1.0); let scale_x = (self.grid_size[0] as i32 - 1) as f32; let scale_y = (self.grid_size[1] as i32 - 1) as f32; let scale_z = (self.grid_size[2] as i32 - 1) as f32; let scale_w = (self.grid_size[3] as i32 - 1) as f32; let x = (lin_x * scale_x).floor() as i32; let y = (lin_y * scale_y).floor() as i32; let z = (lin_z * scale_z).floor() as i32; let w = (lin_w * scale_w).floor() as i32; let x_n = (lin_x * scale_x).ceil() as i32; let y_n = (lin_y * scale_y).ceil() as i32; let z_n = (lin_z * scale_z).ceil() as i32; let w_n = (lin_w * scale_w).ceil() as i32; let x_d = T::from(lin_x * scale_x - x as f32); let y_d = T::from(lin_y * scale_y - y as f32); let z_d = T::from(lin_z * scale_z - z as f32); let w_d = T::from(lin_w * scale_w - w as f32); let r_x1 = lerp(r.fetch(x, y, z, w), r.fetch(x_n, y, z, w), x_d); let r_x2 = lerp(r.fetch(x, y_n, z, w), r.fetch(x_n, y_n, z, w), x_d); let r_y1 = lerp(r_x1, r_x2, y_d); let r_x3 = lerp(r.fetch(x, y, z_n, w), r.fetch(x_n, y, z_n, w), x_d); let r_x4 = lerp(r.fetch(x, y_n, z_n, w), r.fetch(x_n, y_n, z_n, w), x_d); let r_y2 = lerp(r_x3, r_x4, y_d); let r_z1 = lerp(r_y1, r_y2, z_d); let r_x1 = lerp(r.fetch(x, y, z, w_n), r.fetch(x_n, y, z, w_n), x_d); let r_x2 = lerp(r.fetch(x, y_n, z, w_n), r.fetch(x_n, y_n, z, w_n), x_d); let r_y1 = lerp(r_x1, r_x2, y_d); let r_x3 = lerp(r.fetch(x, y, z_n, w_n), r.fetch(x_n, y, z_n, w_n), x_d); let r_x4 = lerp(r.fetch(x, y_n, z_n, w_n), r.fetch(x_n, y_n, z_n, w_n), x_d); let r_y2 = lerp(r_x3, r_x4, y_d); let r_z2 = lerp(r_y1, r_y2, z_d); lerp(r_z1, r_z2, w_d) } #[inline] pub fn quadlinear_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector3f { self.quadlinear( lin_x, lin_y, lin_z, lin_w, Fetch4Vec3 { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, z_stride: self.z_stride, }, ) } #[inline] pub fn quadlinear_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector4f { self.quadlinear( lin_x, lin_y, lin_z, lin_w, Fetch4Vec4 { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, z_stride: self.z_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] #[inline(always)] fn pyramid< T: From + Add + Mul + FusedMultiplyAdd + Sub + Copy + FusedMultiplyNegAdd, >( &self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32, r: impl Fetcher4, ) -> T { let lin_x = lin_x.max(0.0).min(1.0); let lin_y = lin_y.max(0.0).min(1.0); let lin_z = lin_z.max(0.0).min(1.0); let lin_w = lin_w.max(0.0).min(1.0); let scale_x = (self.grid_size[0] as i32 - 1) as f32; let scale_y = (self.grid_size[1] as i32 - 1) as f32; let scale_z = (self.grid_size[2] as i32 - 1) as f32; let scale_w = (self.grid_size[3] as i32 - 1) as f32; let x = (lin_x * scale_x).floor() as i32; let y = (lin_y * scale_y).floor() as i32; let z = (lin_z * scale_z).floor() as i32; let w = (lin_w * scale_w).floor() as i32; let x_n = (lin_x * scale_x).ceil() as i32; let y_n = (lin_y * scale_y).ceil() as i32; let z_n = (lin_z * scale_z).ceil() as i32; let w_n = (lin_w * scale_w).ceil() as i32; let dr = lin_x * scale_x - x as f32; let dg = lin_y * scale_y - y as f32; let db = lin_z * scale_z - z as f32; let dw = lin_w * scale_w - w as f32; let c0 = r.fetch(x, y, z, w); let w0 = if dr > db && dg > db { let x0 = r.fetch(x_n, y_n, z_n, w); let x1 = r.fetch(x_n, y_n, z, w); let x2 = r.fetch(x_n, y, z, w); let x3 = r.fetch(x, y_n, z, w); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(dr * dg)) } else if db > dr && dg > dr { let x0 = r.fetch(x, y, z_n, w); let x1 = r.fetch(x_n, y_n, z_n, w); let x2 = r.fetch(x, y_n, z_n, w); let x3 = r.fetch(x, y_n, z, w); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(dg * db)) } else { let x0 = r.fetch(x, y, z_n, w); let x1 = r.fetch(x_n, y, z, w); let x2 = r.fetch(x_n, y, z_n, w); let x3 = r.fetch(x_n, y_n, z_n, w); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(db * dr)) }; let c0 = r.fetch(x, y, z, w_n); let w1 = if dr > db && dg > db { let x0 = r.fetch(x_n, y_n, z_n, w_n); let x1 = r.fetch(x_n, y_n, z, w_n); let x2 = r.fetch(x_n, y, z, w_n); let x3 = r.fetch(x, y_n, z, w_n); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(dr * dg)) } else if db > dr && dg > dr { let x0 = r.fetch(x, y, z_n, w_n); let x1 = r.fetch(x_n, y_n, z_n, w_n); let x2 = r.fetch(x, y_n, z_n, w_n); let x3 = r.fetch(x, y_n, z, w_n); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(dg * db)) } else { let x0 = r.fetch(x, y, z_n, w_n); let x1 = r.fetch(x_n, y, z, w_n); let x2 = r.fetch(x_n, y, z_n, w_n); let x3 = r.fetch(x_n, y_n, z_n, w_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(db * dr)) }; w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw)) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] #[inline] pub fn pyramid_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector3f { self.pyramid( lin_x, lin_y, lin_z, lin_w, Fetch4Vec3 { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, z_stride: self.z_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] #[inline] pub fn pyramid_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector4f { self.pyramid( lin_x, lin_y, lin_z, lin_w, Fetch4Vec4 { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, z_stride: self.z_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] #[inline(always)] fn prism< T: From + Add + Mul + FusedMultiplyAdd + Sub + Copy + FusedMultiplyNegAdd, >( &self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32, r: impl Fetcher4, ) -> T { let lin_x = lin_x.max(0.0).min(1.0); let lin_y = lin_y.max(0.0).min(1.0); let lin_z = lin_z.max(0.0).min(1.0); let lin_w = lin_w.max(0.0).min(1.0); let scale_x = (self.grid_size[0] as i32 - 1) as f32; let scale_y = (self.grid_size[1] as i32 - 1) as f32; let scale_z = (self.grid_size[2] as i32 - 1) as f32; let scale_w = (self.grid_size[3] as i32 - 1) as f32; let x = (lin_x * scale_x).floor() as i32; let y = (lin_y * scale_y).floor() as i32; let z = (lin_z * scale_z).floor() as i32; let w = (lin_w * scale_w).floor() as i32; let x_n = (lin_x * scale_x).ceil() as i32; let y_n = (lin_y * scale_y).ceil() as i32; let z_n = (lin_z * scale_z).ceil() as i32; let w_n = (lin_w * scale_w).ceil() as i32; let dr = lin_x * scale_x - x as f32; let dg = lin_y * scale_y - y as f32; let db = lin_z * scale_z - z as f32; let dw = lin_w * scale_w - w as f32; let c0 = r.fetch(x, y, z, w); let w0 = if db >= dr { let x0 = r.fetch(x, y, z_n, w); let x1 = r.fetch(x_n, y, z_n, w); let x2 = r.fetch(x, y_n, z, w); let x3 = r.fetch(x, y_n, z_n, w); let x4 = r.fetch(x_n, y_n, z_n, w); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); let s3 = s2.mla(c4, T::from(dg * db)); s3.mla(c5, T::from(dr * dg)) } else { let x0 = r.fetch(x_n, y, z, w); let x1 = r.fetch(x_n, y, z_n, w); let x2 = r.fetch(x, y_n, z, w); let x3 = r.fetch(x_n, y_n, z, w); let x4 = r.fetch(x_n, y_n, z_n, w); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); let s3 = s2.mla(c4, T::from(dg * db)); s3.mla(c5, T::from(dr * dg)) }; let c0 = r.fetch(x, y, z, w_n); let w1 = if db >= dr { let x0 = r.fetch(x, y, z_n, w_n); let x1 = r.fetch(x_n, y, z_n, w_n); let x2 = r.fetch(x, y_n, z, w_n); let x3 = r.fetch(x, y_n, z_n, w_n); let x4 = r.fetch(x_n, y_n, z_n, w_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); let s3 = s2.mla(c4, T::from(dg * db)); s3.mla(c5, T::from(dr * dg)) } else { let x0 = r.fetch(x_n, y, z, w_n); let x1 = r.fetch(x_n, y, z_n, w_n); let x2 = r.fetch(x, y_n, z, w_n); let x3 = r.fetch(x_n, y_n, z, w_n); let x4 = r.fetch(x_n, y_n, z_n, w_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); let s3 = s2.mla(c4, T::from(dg * db)); s3.mla(c5, T::from(dr * dg)) }; w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw)) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] #[inline] pub fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector3f { self.prism( lin_x, lin_y, lin_z, lin_w, Fetch4Vec3 { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, z_stride: self.z_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] #[inline] pub fn prism_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector4f { self.prism( lin_x, lin_y, lin_z, lin_w, Fetch4Vec4 { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, z_stride: self.z_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] #[inline(always)] fn tetra< T: From + Add + Mul + FusedMultiplyAdd + Sub + Copy + FusedMultiplyNegAdd, >( &self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32, r: impl Fetcher4, ) -> T { let lin_x = lin_x.max(0.0).min(1.0); let lin_y = lin_y.max(0.0).min(1.0); let lin_z = lin_z.max(0.0).min(1.0); let lin_w = lin_w.max(0.0).min(1.0); let scale_x = (self.grid_size[0] as i32 - 1) as f32; let scale_y = (self.grid_size[1] as i32 - 1) as f32; let scale_z = (self.grid_size[2] as i32 - 1) as f32; let scale_w = (self.grid_size[3] as i32 - 1) as f32; let x = (lin_x * scale_x).floor() as i32; let y = (lin_y * scale_y).floor() as i32; let z = (lin_z * scale_z).floor() as i32; let w = (lin_w * scale_w).floor() as i32; let x_n = (lin_x * scale_x).ceil() as i32; let y_n = (lin_y * scale_y).ceil() as i32; let z_n = (lin_z * scale_z).ceil() as i32; let w_n = (lin_w * scale_w).ceil() as i32; let rx = lin_x * scale_x - x as f32; let ry = lin_y * scale_y - y as f32; let rz = lin_z * scale_z - z as f32; let rw = lin_w * scale_w - w as f32; let c0 = r.fetch(x, y, z, w); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z, w) - c0; c2 = r.fetch(x_n, y_n, z, w) - r.fetch(x_n, y, z, w); c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z, w) - c0; c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w); c3 = r.fetch(x_n, y, z_n, w) - r.fetch(x_n, y, z, w); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n, w) - r.fetch(x, y, z_n, w); c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w); c3 = r.fetch(x, y, z_n, w) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z, w) - r.fetch(x, y_n, z, w); c2 = r.fetch(x, y_n, z, w) - c0; c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w); c2 = r.fetch(x, y_n, z, w) - c0; c3 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y_n, z, w); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w); c2 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y, z_n, w); c3 = r.fetch(x, y, z_n, w) - c0; } let s0 = c0.mla(c1, T::from(rx)); let s1 = s0.mla(c2, T::from(ry)); let w0 = s1.mla(c3, T::from(rz)); let c0 = r.fetch(x, y, z, w_n); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = r.fetch(x_n, y, z, w_n) - c0; c2 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x_n, y, z, w_n); c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n); } else if rx >= rz { //rx >= rz && rz >= ry c1 = r.fetch(x_n, y, z, w_n) - c0; c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n); c3 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x_n, y, z, w_n); } else { //rz > rx && rx >= ry c1 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x, y, z_n, w_n); c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n); c3 = r.fetch(x, y, z_n, w_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x, y_n, z, w_n); c2 = r.fetch(x, y_n, z, w_n) - c0; c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n); } else if ry >= rz { //ry >= rz && rz > rx c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n); c2 = r.fetch(x, y_n, z, w_n) - c0; c3 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y_n, z, w_n); } else { //rz > ry && ry > rx c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n); c2 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y, z_n, w_n); c3 = r.fetch(x, y, z_n, w_n) - c0; } let s0 = c0.mla(c1, T::from(rx)); let s1 = s0.mla(c2, T::from(ry)); let w1 = s1.mla(c3, T::from(rz)); w0.neg_mla(w0, T::from(rw)).mla(w1, T::from(rw)) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] #[inline] pub fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector3f { self.tetra( lin_x, lin_y, lin_z, lin_w, Fetch4Vec3 { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, z_stride: self.z_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] #[inline] pub fn tetra_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> Vector4f { self.tetra( lin_x, lin_y, lin_z, lin_w, Fetch4Vec4 { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, z_stride: self.z_stride, }, ) } } /// 3D CLUT helper /// /// Represents hexahedron. pub struct Cube<'a> { array: &'a [f32], x_stride: u32, y_stride: u32, grid_size: [u8; 3], } pub(crate) trait ArrayFetch { fn fetch(&self, x: i32, y: i32, z: i32) -> T; } struct ArrayFetchVector3f<'a> { array: &'a [f32], x_stride: u32, y_stride: u32, } impl ArrayFetch for ArrayFetchVector3f<'_> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> Vector3f { let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32) as usize * 3; let k = &self.array[start..start + 3]; Vector3f { v: [k[0], k[1], k[2]], } } } struct ArrayFetchVector4f<'a> { array: &'a [f32], x_stride: u32, y_stride: u32, } impl ArrayFetch for ArrayFetchVector4f<'_> { #[inline(always)] fn fetch(&self, x: i32, y: i32, z: i32) -> Vector4f { let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32) as usize * 4; let k = &self.array[start..start + 4]; Vector4f { v: [k[0], k[1], k[2], k[3]], } } } impl Cube<'_> { pub fn new(array: &[f32], grid_size: usize) -> Cube<'_> { let y_stride = grid_size; let x_stride = y_stride * y_stride; Cube { array, x_stride: x_stride as u32, y_stride: y_stride as u32, grid_size: [grid_size as u8, grid_size as u8, grid_size as u8], } } pub fn new_cube(array: &[f32], grid_size: [u8; 3]) -> Cube<'_> { let y_stride = grid_size[2] as u32; let x_stride = y_stride * grid_size[1] as u32; Cube { array, x_stride, y_stride, grid_size, } } #[inline(always)] fn trilinear< T: Copy + From + Sub + Mul + Add + FusedMultiplyNegAdd + FusedMultiplyAdd, >( &self, lin_x: f32, lin_y: f32, lin_z: f32, fetch: impl ArrayFetch, ) -> T { let lin_x = lin_x.max(0.0).min(1.0); let lin_y = lin_y.max(0.0).min(1.0); let lin_z = lin_z.max(0.0).min(1.0); let scale_x = (self.grid_size[0] as i32 - 1) as f32; let scale_y = (self.grid_size[1] as i32 - 1) as f32; let scale_z = (self.grid_size[2] as i32 - 1) as f32; let x = (lin_x * scale_x).floor() as i32; let y = (lin_y * scale_y).floor() as i32; let z = (lin_z * scale_z).floor() as i32; let x_n = (lin_x * scale_x).ceil() as i32; let y_n = (lin_y * scale_y).ceil() as i32; let z_n = (lin_z * scale_z).ceil() as i32; let x_d = T::from(lin_x * scale_x - x as f32); let y_d = T::from(lin_y * scale_y - y as f32); let z_d = T::from(lin_z * scale_z - z as f32); let c000 = fetch.fetch(x, y, z); let c100 = fetch.fetch(x_n, y, z); let c010 = fetch.fetch(x, y_n, z); let c110 = fetch.fetch(x_n, y_n, z); let c001 = fetch.fetch(x, y, z_n); let c101 = fetch.fetch(x_n, y, z_n); let c011 = fetch.fetch(x, y_n, z_n); let c111 = fetch.fetch(x_n, y_n, z_n); let c00 = c000.neg_mla(c000, x_d).mla(c100, x_d); let c10 = c010.neg_mla(c010, x_d).mla(c110, x_d); let c01 = c001.neg_mla(c001, x_d).mla(c101, x_d); let c11 = c011.neg_mla(c011, x_d).mla(c111, x_d); let c0 = c00.neg_mla(c00, y_d).mla(c10, y_d); let c1 = c01.neg_mla(c01, y_d).mla(c11, y_d); c0.neg_mla(c0, z_d).mla(c1, z_d) } #[cfg(feature = "options")] #[inline] fn pyramid< T: Copy + From + Sub + Mul + Add + FusedMultiplyAdd, >( &self, lin_x: f32, lin_y: f32, lin_z: f32, fetch: impl ArrayFetch, ) -> T { let lin_x = lin_x.max(0.0).min(1.0); let lin_y = lin_y.max(0.0).min(1.0); let lin_z = lin_z.max(0.0).min(1.0); let scale_x = (self.grid_size[0] as i32 - 1) as f32; let scale_y = (self.grid_size[1] as i32 - 1) as f32; let scale_z = (self.grid_size[2] as i32 - 1) as f32; let x = (lin_x * scale_x).floor() as i32; let y = (lin_y * scale_y).floor() as i32; let z = (lin_z * scale_z).floor() as i32; let x_n = (lin_x * scale_x).ceil() as i32; let y_n = (lin_y * scale_y).ceil() as i32; let z_n = (lin_z * scale_z).ceil() as i32; let dr = lin_x * scale_x - x as f32; let dg = lin_y * scale_y - y as f32; let db = lin_z * scale_z - z as f32; let c0 = fetch.fetch(x, y, z); if dr > db && dg > db { let x0 = fetch.fetch(x_n, y_n, z_n); let x1 = fetch.fetch(x_n, y_n, z); let x2 = fetch.fetch(x_n, y, z); let x3 = fetch.fetch(x, y_n, z); let c1 = x0 - x1; let c2 = x2 - c0; let c3 = x3 - c0; let c4 = c0 - x3 - x2 + x1; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(dr * dg)) } else if db > dr && dg > dr { let x0 = fetch.fetch(x, y, z_n); let x1 = fetch.fetch(x_n, y_n, z_n); let x2 = fetch.fetch(x, y_n, z_n); let x3 = fetch.fetch(x, y_n, z); let c1 = x0 - c0; let c2 = x1 - x2; let c3 = x3 - c0; let c4 = c0 - x3 - x0 + x2; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(dg * db)) } else { let x0 = fetch.fetch(x, y, z_n); let x1 = fetch.fetch(x_n, y, z); let x2 = fetch.fetch(x_n, y, z_n); let x3 = fetch.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - c0; let c3 = x3 - x2; let c4 = c0 - x1 - x0 + x2; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); s2.mla(c4, T::from(db * dr)) } } #[cfg(feature = "options")] #[inline] fn tetra< T: Copy + From + Sub + Mul + Add + FusedMultiplyAdd, >( &self, lin_x: f32, lin_y: f32, lin_z: f32, fetch: impl ArrayFetch, ) -> T { let lin_x = lin_x.max(0.0).min(1.0); let lin_y = lin_y.max(0.0).min(1.0); let lin_z = lin_z.max(0.0).min(1.0); let scale_x = (self.grid_size[0] as i32 - 1) as f32; let scale_y = (self.grid_size[1] as i32 - 1) as f32; let scale_z = (self.grid_size[2] as i32 - 1) as f32; let x = (lin_x * scale_x).floor() as i32; let y = (lin_y * scale_y).floor() as i32; let z = (lin_z * scale_z).floor() as i32; let x_n = (lin_x * scale_x).ceil() as i32; let y_n = (lin_y * scale_y).ceil() as i32; let z_n = (lin_z * scale_z).ceil() as i32; let rx = lin_x * scale_x - x as f32; let ry = lin_y * scale_y - y as f32; let rz = lin_z * scale_z - z as f32; let c0 = fetch.fetch(x, y, z); let c2; let c1; let c3; if rx >= ry { if ry >= rz { //rx >= ry && ry >= rz c1 = fetch.fetch(x_n, y, z) - c0; c2 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x_n, y, z); c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z); } else if rx >= rz { //rx >= rz && rz >= ry c1 = fetch.fetch(x_n, y, z) - c0; c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n); c3 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x_n, y, z); } else { //rz > rx && rx >= ry c1 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x, y, z_n); c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n); c3 = fetch.fetch(x, y, z_n) - c0; } } else if rx >= rz { //ry > rx && rx >= rz c1 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x, y_n, z); c2 = fetch.fetch(x, y_n, z) - c0; c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z); } else if ry >= rz { //ry >= rz && rz > rx c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n); c2 = fetch.fetch(x, y_n, z) - c0; c3 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y_n, z); } else { //rz > ry && ry > rx c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n); c2 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y, z_n); c3 = fetch.fetch(x, y, z_n) - c0; } let s0 = c0.mla(c1, T::from(rx)); let s1 = s0.mla(c2, T::from(ry)); s1.mla(c3, T::from(rz)) } #[cfg(feature = "options")] #[inline] fn prism< T: Copy + From + Sub + Mul + Add + FusedMultiplyAdd, >( &self, lin_x: f32, lin_y: f32, lin_z: f32, fetch: impl ArrayFetch, ) -> T { let lin_x = lin_x.max(0.0).min(1.0); let lin_y = lin_y.max(0.0).min(1.0); let lin_z = lin_z.max(0.0).min(1.0); let scale_x = (self.grid_size[0] as i32 - 1) as f32; let scale_y = (self.grid_size[1] as i32 - 1) as f32; let scale_z = (self.grid_size[2] as i32 - 1) as f32; let x = (lin_x * scale_x).floor() as i32; let y = (lin_y * scale_y).floor() as i32; let z = (lin_z * scale_z).floor() as i32; let x_n = (lin_x * scale_x).ceil() as i32; let y_n = (lin_y * scale_y).ceil() as i32; let z_n = (lin_z * scale_z).ceil() as i32; let dr = lin_x * scale_x - x as f32; let dg = lin_y * scale_y - y as f32; let db = lin_z * scale_z - z as f32; let c0 = fetch.fetch(x, y, z); if db >= dr { let x0 = fetch.fetch(x, y, z_n); let x1 = fetch.fetch(x_n, y, z_n); let x2 = fetch.fetch(x, y_n, z); let x3 = fetch.fetch(x, y_n, z_n); let x4 = fetch.fetch(x_n, y_n, z_n); let c1 = x0 - c0; let c2 = x1 - x0; let c3 = x2 - c0; let c4 = c0 - x2 - x0 + x3; let c5 = x0 - x3 - x1 + x4; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); let s3 = s2.mla(c4, T::from(dg * db)); s3.mla(c5, T::from(dr * dg)) } else { let x0 = fetch.fetch(x_n, y, z); let x1 = fetch.fetch(x_n, y, z_n); let x2 = fetch.fetch(x, y_n, z); let x3 = fetch.fetch(x_n, y_n, z); let x4 = fetch.fetch(x_n, y_n, z_n); let c1 = x1 - x0; let c2 = x0 - c0; let c3 = x2 - c0; let c4 = x0 - x3 - x1 + x4; let c5 = c0 - x2 - x0 + x3; let s0 = c0.mla(c1, T::from(db)); let s1 = s0.mla(c2, T::from(dr)); let s2 = s1.mla(c3, T::from(dg)); let s3 = s2.mla(c4, T::from(dg * db)); s3.mla(c5, T::from(dr * dg)) } } pub fn trilinear_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector3f { self.trilinear( lin_x, lin_y, lin_z, ArrayFetchVector3f { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] pub fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector3f { self.prism( lin_x, lin_y, lin_z, ArrayFetchVector3f { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] pub fn pyramid_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector3f { self.pyramid( lin_x, lin_y, lin_z, ArrayFetchVector3f { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] pub fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector3f { self.tetra( lin_x, lin_y, lin_z, ArrayFetchVector3f { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, }, ) } pub fn trilinear_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector4f { self.trilinear( lin_x, lin_y, lin_z, ArrayFetchVector4f { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, }, ) } #[cfg(feature = "options")] pub fn tetra_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector4f { self.tetra( lin_x, lin_y, lin_z, ArrayFetchVector4f { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] pub fn pyramid_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector4f { self.pyramid( lin_x, lin_y, lin_z, ArrayFetchVector4f { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, }, ) } #[cfg(feature = "options")] #[cfg_attr(docsrs, doc(cfg(feature = "options")))] pub fn prism_vec4(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> Vector4f { self.prism( lin_x, lin_y, lin_z, ArrayFetchVector4f { array: self.array, x_stride: self.x_stride, y_stride: self.y_stride, }, ) } } moxcms-0.7.7/src/oklab.rs000064400000000000000000000176151046102023000134040ustar 00000000000000/* * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved. * // * // Use of this source code is governed by a BSD-style * // license that can be found in the LICENSE file. */ use crate::Rgb; use crate::mlaf::mlaf; use num_traits::Pow; use pxfm::{f_cbrtf, f_powf}; use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign}; #[repr(C)] #[derive(Debug, Copy, Clone, PartialOrd, PartialEq)] /// Struct that represent *Oklab* colorspace pub struct Oklab { /// All values in Oklab intended to be normalized \[0; 1\] pub l: f32, /// A value range \[-0.5; 0.5\] pub a: f32, /// B value range \[-0.5; 0.5\] pub b: f32, } impl Oklab { #[inline] pub const fn new(l: f32, a: f32, b: f32) -> Oklab { Oklab { l, a, b } } #[inline] /// Convert Linear Rgb to [Oklab] pub fn from_linear_rgb(rgb: Rgb) -> Oklab { Self::linear_rgb_to_oklab(rgb) } #[inline] fn linear_rgb_to_oklab(rgb: Rgb) -> Oklab { let l = mlaf( mlaf(0.4122214708f32 * rgb.r, 0.5363325363f32, rgb.g), 0.0514459929f32, rgb.b, ); let m = mlaf( mlaf(0.2119034982f32 * rgb.r, 0.6806995451f32, rgb.g), 0.1073969566f32, rgb.b, ); let s = mlaf( mlaf(0.0883024619f32 * rgb.r, 0.2817188376f32, rgb.g), 0.6299787005f32, rgb.b, ); let l_cone = f_cbrtf(l); let m_cone = f_cbrtf(m); let s_cone = f_cbrtf(s); Oklab { l: mlaf( mlaf(0.2104542553f32 * l_cone, 0.7936177850f32, m_cone), -0.0040720468f32, s_cone, ), a: mlaf( mlaf(1.9779984951f32 * l_cone, -2.4285922050f32, m_cone), 0.4505937099f32, s_cone, ), b: mlaf( mlaf(0.0259040371f32 * l_cone, 0.7827717662f32, m_cone), -0.8086757660f32, s_cone, ), } } #[inline] /// Converts to linear RGB pub fn to_linear_rgb(&self) -> Rgb { let l_ = mlaf( mlaf(self.l, 0.3963377774f32, self.a), 0.2158037573f32, self.b, ); let m_ = mlaf( mlaf(self.l, -0.1055613458f32, self.a), -0.0638541728f32, self.b, ); let s_ = mlaf( mlaf(self.l, -0.0894841775f32, self.a), -1.2914855480f32, self.b, ); let l = l_ * l_ * l_; let m = m_ * m_ * m_; let s = s_ * s_ * s_; Rgb::new( mlaf( mlaf(4.0767416621f32 * l, -3.3077115913f32, m), 0.2309699292f32, s, ), mlaf( mlaf(-1.2684380046f32 * l, 2.6097574011f32, m), -0.3413193965f32, s, ), mlaf( mlaf(-0.0041960863f32 * l, -0.7034186147f32, m), 1.7076147010f32, s, ), ) } #[inline] pub fn hybrid_distance(&self, other: Self) -> f32 { let lax = self.l - other.l; let dax = self.a - other.a; let bax = self.b - other.b; (dax * dax + bax * bax).sqrt() + lax.abs() } } impl Oklab { pub fn euclidean_distance(&self, other: Self) -> f32 { let lax = self.l - other.l; let dax = self.a - other.a; let bax = self.b - other.b; (lax * lax + dax * dax + bax * bax).sqrt() } } impl Oklab { pub fn taxicab_distance(&self, other: Self) -> f32 { let lax = self.l - other.l; let dax = self.a - other.a; let bax = self.b - other.b; lax.abs() + dax.abs() + bax.abs() } } impl Add for Oklab { type Output = Oklab; #[inline] fn add(self, rhs: Self) -> Oklab { Oklab::new(self.l + rhs.l, self.a + rhs.a, self.b + rhs.b) } } impl Add for Oklab { type Output = Oklab; #[inline] fn add(self, rhs: f32) -> Oklab { Oklab::new(self.l + rhs, self.a + rhs, self.b + rhs) } } impl AddAssign for Oklab { #[inline] fn add_assign(&mut self, rhs: Oklab) { self.l += rhs.l; self.a += rhs.a; self.b += rhs.b; } } impl AddAssign for Oklab { #[inline] fn add_assign(&mut self, rhs: f32) { self.l += rhs; self.a += rhs; self.b += rhs; } } impl Mul for Oklab { type Output = Oklab; #[inline] fn mul(self, rhs: f32) -> Self::Output { Oklab::new(self.l * rhs, self.a * rhs, self.b * rhs) } } impl Mul for Oklab { type Output = Oklab; #[inline] fn mul(self, rhs: Oklab) -> Self::Output { Oklab::new(self.l * rhs.l, self.a * rhs.a, self.b * rhs.b) } } impl MulAssign for Oklab { #[inline] fn mul_assign(&mut self, rhs: f32) { self.l *= rhs; self.a *= rhs; self.b *= rhs; } } impl MulAssign for Oklab { #[inline] fn mul_assign(&mut self, rhs: Oklab) { self.l *= rhs.l; self.a *= rhs.a; self.b *= rhs.b; } } impl Sub for Oklab { type Output = Oklab; #[inline] fn sub(self, rhs: f32) -> Self::Output { Oklab::new(self.l - rhs, self.a - rhs, self.b - rhs) } } impl Sub for Oklab { type Output = Oklab; #[inline] fn sub(self, rhs: Oklab) -> Self::Output { Oklab::new(self.l - rhs.l, self.a - rhs.a, self.b - rhs.b) } } impl SubAssign for Oklab { #[inline] fn sub_assign(&mut self, rhs: f32) { self.l -= rhs; self.a -= rhs; self.b -= rhs; } } impl SubAssign for Oklab { #[inline] fn sub_assign(&mut self, rhs: Oklab) { self.l -= rhs.l; self.a -= rhs.a; self.b -= rhs.b; } } impl Div for Oklab { type Output = Oklab; #[inline] fn div(self, rhs: f32) -> Self::Output { Oklab::new(self.l / rhs, self.a / rhs, self.b / rhs) } } impl Div for Oklab { type Output = Oklab; #[inline] fn div(self, rhs: Oklab) -> Self::Output { Oklab::new(self.l / rhs.l, self.a / rhs.a, self.b / rhs.b) } } impl DivAssign for Oklab { #[inline] fn div_assign(&mut self, rhs: f32) { self.l /= rhs; self.a /= rhs; self.b /= rhs; } } impl DivAssign for Oklab { #[inline] fn div_assign(&mut self, rhs: Oklab) { self.l /= rhs.l; self.a /= rhs.a; self.b /= rhs.b; } } impl Neg for Oklab { type Output = Oklab; #[inline] fn neg(self) -> Self::Output { Oklab::new(-self.l, -self.a, -self.b) } } impl Pow for Oklab { type Output = Oklab; #[inline] fn pow(self, rhs: f32) -> Self::Output { Oklab::new( f_powf(self.l, rhs), f_powf(self.a, rhs), f_powf(self.b, rhs), ) } } impl Pow for Oklab { type Output = Oklab; #[inline] fn pow(self, rhs: Oklab) -> Self::Output { Oklab::new( f_powf(self.l, rhs.l), f_powf(self.a, rhs.a), f_powf(self.b, rhs.b), ) } } impl Oklab { #[inline] pub fn sqrt(&self) -> Oklab { Oklab::new(self.l.sqrt(), self.a.sqrt(), self.b.sqrt()) } #[inline] pub fn cbrt(&self) -> Oklab { Oklab::new(f_cbrtf(self.l), f_cbrtf(self.a), f_cbrtf(self.b)) } } #[cfg(test)] mod tests { use super::*; #[test] fn round_trip() { let xyz = Rgb::new(0.1, 0.2, 0.3); let lab = Oklab::from_linear_rgb(xyz); let rolled_back = lab.to_linear_rgb(); let dx = (xyz.r - rolled_back.r).abs(); let dy = (xyz.g - rolled_back.g).abs(); let dz = (xyz.b - rolled_back.b).abs(); assert!(dx < 1e-5); assert!(dy < 1e-5); assert!(dz < 1e-5); } } moxcms-0.7.7/src/oklch.rs000064400000000000000000000144121046102023000134040ustar 00000000000000/* * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved. * // * // Use of this source code is governed by a BSD-style * // license that can be found in the LICENSE file. */ use crate::{Oklab, Rgb}; use num_traits::Pow; use pxfm::{f_atan2f, f_cbrtf, f_hypotf, f_powf, f_sincosf}; use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign}; /// Represents *Oklch* colorspace #[repr(C)] #[derive(Copy, Clone, PartialOrd, PartialEq)] pub struct Oklch { /// Lightness pub l: f32, /// Chroma pub c: f32, /// Hue pub h: f32, } impl Oklch { /// Creates new instance #[inline] pub const fn new(l: f32, c: f32, h: f32) -> Oklch { Oklch { l, c, h } } /// Converts Linear [Rgb] into [Oklch] /// /// # Arguments /// `transfer_function` - Transfer function into linear colorspace and its inverse #[inline] pub fn from_linear_rgb(rgb: Rgb) -> Oklch { let oklab = Oklab::from_linear_rgb(rgb); Oklch::from_oklab(oklab) } /// Converts [Oklch] into linear [Rgb] #[inline] pub fn to_linear_rgb(&self) -> Rgb { let oklab = self.to_oklab(); oklab.to_linear_rgb() } /// Converts *Oklab* to *Oklch* #[inline] pub fn from_oklab(oklab: Oklab) -> Oklch { let chroma = f_hypotf(oklab.b, oklab.a); let hue = f_atan2f(oklab.b, oklab.a); Oklch::new(oklab.l, chroma, hue) } /// Converts *Oklch* to *Oklab* #[inline] pub fn to_oklab(&self) -> Oklab { let l = self.l; let sincos = f_sincosf(self.h); let a = self.c * sincos.1; let b = self.c * sincos.0; Oklab::new(l, a, b) } } impl Oklch { #[inline] pub fn euclidean_distance(&self, other: Self) -> f32 { let dl = self.l - other.l; let dc = self.c - other.c; let dh = self.h - other.h; (dl * dl + dc * dc + dh * dh).sqrt() } } impl Oklch { #[inline] pub fn taxicab_distance(&self, other: Self) -> f32 { let dl = self.l - other.l; let dc = self.c - other.c; let dh = self.h - other.h; dl.abs() + dc.abs() + dh.abs() } } impl Add for Oklch { type Output = Oklch; #[inline] fn add(self, rhs: Self) -> Oklch { Oklch::new(self.l + rhs.l, self.c + rhs.c, self.h + rhs.h) } } impl Add for Oklch { type Output = Oklch; #[inline] fn add(self, rhs: f32) -> Oklch { Oklch::new(self.l + rhs, self.c + rhs, self.h + rhs) } } impl AddAssign for Oklch { #[inline] fn add_assign(&mut self, rhs: Oklch) { self.l += rhs.l; self.c += rhs.c; self.h += rhs.h; } } impl AddAssign for Oklch { #[inline] fn add_assign(&mut self, rhs: f32) { self.l += rhs; self.c += rhs; self.h += rhs; } } impl Mul for Oklch { type Output = Oklch; #[inline] fn mul(self, rhs: f32) -> Self::Output { Oklch::new(self.l * rhs, self.c * rhs, self.h * rhs) } } impl Mul for Oklch { type Output = Oklch; #[inline] fn mul(self, rhs: Oklch) -> Self::Output { Oklch::new(self.l * rhs.l, self.c * rhs.c, self.h * rhs.h) } } impl MulAssign for Oklch { #[inline] fn mul_assign(&mut self, rhs: f32) { self.l *= rhs; self.c *= rhs; self.h *= rhs; } } impl MulAssign for Oklch { #[inline] fn mul_assign(&mut self, rhs: Oklch) { self.l *= rhs.l; self.c *= rhs.c; self.h *= rhs.h; } } impl Sub for Oklch { type Output = Oklch; #[inline] fn sub(self, rhs: f32) -> Self::Output { Oklch::new(self.l - rhs, self.c - rhs, self.h - rhs) } } impl Sub for Oklch { type Output = Oklch; #[inline] fn sub(self, rhs: Oklch) -> Self::Output { Oklch::new(self.l - rhs.l, self.c - rhs.c, self.h - rhs.h) } } impl SubAssign for Oklch { #[inline] fn sub_assign(&mut self, rhs: f32) { self.l -= rhs; self.c -= rhs; self.h -= rhs; } } impl SubAssign for Oklch { #[inline] fn sub_assign(&mut self, rhs: Oklch) { self.l -= rhs.l; self.c -= rhs.c; self.h -= rhs.h; } } impl Div for Oklch { type Output = Oklch; #[inline] fn div(self, rhs: f32) -> Self::Output { Oklch::new(self.l / rhs, self.c / rhs, self.h / rhs) } } impl Div for Oklch { type Output = Oklch; #[inline] fn div(self, rhs: Oklch) -> Self::Output { Oklch::new(self.l / rhs.l, self.c / rhs.c, self.h / rhs.h) } } impl DivAssign for Oklch { #[inline] fn div_assign(&mut self, rhs: f32) { self.l /= rhs; self.c /= rhs; self.h /= rhs; } } impl DivAssign for Oklch { #[inline] fn div_assign(&mut self, rhs: Oklch) { self.l /= rhs.l; self.c /= rhs.c; self.h /= rhs.h; } } impl Neg for Oklch { type Output = Oklch; #[inline] fn neg(self) -> Self::Output { Oklch::new(-self.l, -self.c, -self.h) } } impl Pow for Oklch { type Output = Oklch; #[inline] fn pow(self, rhs: f32) -> Self::Output { Oklch::new( f_powf(self.l, rhs), f_powf(self.c, rhs), f_powf(self.h, rhs), ) } } impl Pow for Oklch { type Output = Oklch; #[inline] fn pow(self, rhs: Oklch) -> Self::Output { Oklch::new( f_powf(self.l, rhs.l), f_powf(self.c, rhs.c), f_powf(self.h, rhs.h), ) } } impl Oklch { #[inline] pub fn sqrt(&self) -> Oklch { Oklch::new(self.l.sqrt(), self.c.sqrt(), self.h.sqrt()) } #[inline] pub fn cbrt(&self) -> Oklch { Oklch::new(f_cbrtf(self.l), f_cbrtf(self.c), f_cbrtf(self.h)) } } #[cfg(test)] mod tests { use super::*; #[test] fn round_trip() { let xyz = Rgb::new(0.1, 0.2, 0.3); let lab = Oklch::from_linear_rgb(xyz); let rolled_back = lab.to_linear_rgb(); let dx = (xyz.r - rolled_back.r).abs(); let dy = (xyz.g - rolled_back.g).abs(); let dz = (xyz.b - rolled_back.b).abs(); assert!(dx < 1e-5); assert!(dy < 1e-5); assert!(dz < 1e-5); } } moxcms-0.7.7/src/profile.rs000064400000000000000000001475431046102023000137600ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::chad::BRADFORD_D; use crate::cicp::{ CicpColorPrimaries, ColorPrimaries, MatrixCoefficients, TransferCharacteristics, }; use crate::dat::ColorDateTime; use crate::err::CmsError; use crate::matrix::{Matrix3f, Xyz}; use crate::reader::s15_fixed16_number_to_float; use crate::safe_math::{SafeAdd, SafeMul}; use crate::tag::{TAG_SIZE, Tag}; use crate::trc::ToneReprCurve; use crate::{Chromaticity, Layout, Matrix3d, Vector3d, XyY, Xyzd, adapt_to_d50_d}; use std::io::Read; const MAX_PROFILE_SIZE: usize = 1024 * 1024 * 10; // 10 MB max, for Fogra39 etc #[repr(u32)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ProfileSignature { Acsp, } impl TryFrom for ProfileSignature { type Error = CmsError; #[inline] fn try_from(value: u32) -> Result { if value == u32::from_ne_bytes(*b"acsp").to_be() { return Ok(ProfileSignature::Acsp); } Err(CmsError::InvalidProfile) } } impl From for u32 { #[inline] fn from(value: ProfileSignature) -> Self { match value { ProfileSignature::Acsp => u32::from_ne_bytes(*b"acsp").to_be(), } } } #[repr(u32)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Ord, PartialOrd)] pub enum ProfileVersion { V2_0 = 0x02000000, V2_1 = 0x02100000, V2_2 = 0x02200000, V2_3 = 0x02300000, V2_4 = 0x02400000, V4_0 = 0x04000000, V4_1 = 0x04100000, V4_2 = 0x04200000, V4_3 = 0x04300000, #[default] V4_4 = 0x04400000, Unknown, } impl TryFrom for ProfileVersion { type Error = CmsError; fn try_from(value: u32) -> Result { match value { 0x02000000 => Ok(ProfileVersion::V2_0), 0x02100000 => Ok(ProfileVersion::V2_1), 0x02200000 => Ok(ProfileVersion::V2_2), 0x02300000 => Ok(ProfileVersion::V2_3), 0x02400000 => Ok(ProfileVersion::V2_4), 0x04000000 => Ok(ProfileVersion::V4_0), 0x04100000 => Ok(ProfileVersion::V4_1), 0x04200000 => Ok(ProfileVersion::V4_2), 0x04300000 => Ok(ProfileVersion::V4_3), 0x04400000 => Ok(ProfileVersion::V4_3), _ => Err(CmsError::InvalidProfile), } } } impl From for u32 { fn from(value: ProfileVersion) -> Self { match value { ProfileVersion::V2_0 => 0x02000000, ProfileVersion::V2_1 => 0x02100000, ProfileVersion::V2_2 => 0x02200000, ProfileVersion::V2_3 => 0x02300000, ProfileVersion::V2_4 => 0x02400000, ProfileVersion::V4_0 => 0x04000000, ProfileVersion::V4_1 => 0x04100000, ProfileVersion::V4_2 => 0x04200000, ProfileVersion::V4_3 => 0x04300000, ProfileVersion::V4_4 => 0x04400000, ProfileVersion::Unknown => 0x02000000, } } } #[repr(u32)] #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Default, Hash)] pub enum DataColorSpace { #[default] Xyz, Lab, Luv, YCbr, Yxy, Rgb, Gray, Hsv, Hls, Cmyk, Cmy, Color2, Color3, Color4, Color5, Color6, Color7, Color8, Color9, Color10, Color11, Color12, Color13, Color14, Color15, } impl DataColorSpace { #[inline] pub fn check_layout(self, layout: Layout) -> Result<(), CmsError> { let unsupported: bool = match self { DataColorSpace::Xyz => layout != Layout::Rgb, DataColorSpace::Lab => layout != Layout::Rgb, DataColorSpace::Luv => layout != Layout::Rgb, DataColorSpace::YCbr => layout != Layout::Rgb, DataColorSpace::Yxy => layout != Layout::Rgb, DataColorSpace::Rgb => layout != Layout::Rgb && layout != Layout::Rgba, DataColorSpace::Gray => layout != Layout::Gray && layout != Layout::GrayAlpha, DataColorSpace::Hsv => layout != Layout::Rgb, DataColorSpace::Hls => layout != Layout::Rgb, DataColorSpace::Cmyk => layout != Layout::Rgba, DataColorSpace::Cmy => layout != Layout::Rgb, DataColorSpace::Color2 => layout != Layout::GrayAlpha, DataColorSpace::Color3 => layout != Layout::Rgb, DataColorSpace::Color4 => layout != Layout::Rgba, DataColorSpace::Color5 => layout != Layout::Inks5, DataColorSpace::Color6 => layout != Layout::Inks6, DataColorSpace::Color7 => layout != Layout::Inks7, DataColorSpace::Color8 => layout != Layout::Inks8, DataColorSpace::Color9 => layout != Layout::Inks9, DataColorSpace::Color10 => layout != Layout::Inks10, DataColorSpace::Color11 => layout != Layout::Inks11, DataColorSpace::Color12 => layout != Layout::Inks12, DataColorSpace::Color13 => layout != Layout::Inks13, DataColorSpace::Color14 => layout != Layout::Inks14, DataColorSpace::Color15 => layout != Layout::Inks15, }; if unsupported { Err(CmsError::InvalidLayout) } else { Ok(()) } } pub(crate) fn is_three_channels(self) -> bool { matches!( self, DataColorSpace::Xyz | DataColorSpace::Lab | DataColorSpace::Luv | DataColorSpace::YCbr | DataColorSpace::Yxy | DataColorSpace::Rgb | DataColorSpace::Hsv | DataColorSpace::Hls | DataColorSpace::Cmy | DataColorSpace::Color3 ) } } #[repr(u32)] #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Default)] pub enum ProfileClass { InputDevice, #[default] DisplayDevice, OutputDevice, DeviceLink, ColorSpace, Abstract, Named, } impl TryFrom for ProfileClass { type Error = CmsError; fn try_from(value: u32) -> Result { if value == u32::from_ne_bytes(*b"scnr").to_be() { return Ok(ProfileClass::InputDevice); } else if value == u32::from_ne_bytes(*b"mntr").to_be() { return Ok(ProfileClass::DisplayDevice); } else if value == u32::from_ne_bytes(*b"prtr").to_be() { return Ok(ProfileClass::OutputDevice); } else if value == u32::from_ne_bytes(*b"link").to_be() { return Ok(ProfileClass::DeviceLink); } else if value == u32::from_ne_bytes(*b"spac").to_be() { return Ok(ProfileClass::ColorSpace); } else if value == u32::from_ne_bytes(*b"abst").to_be() { return Ok(ProfileClass::Abstract); } else if value == u32::from_ne_bytes(*b"nmcl").to_be() { return Ok(ProfileClass::Named); } Err(CmsError::InvalidProfile) } } impl From for u32 { fn from(val: ProfileClass) -> Self { match val { ProfileClass::InputDevice => u32::from_ne_bytes(*b"scnr").to_be(), ProfileClass::DisplayDevice => u32::from_ne_bytes(*b"mntr").to_be(), ProfileClass::OutputDevice => u32::from_ne_bytes(*b"prtr").to_be(), ProfileClass::DeviceLink => u32::from_ne_bytes(*b"link").to_be(), ProfileClass::ColorSpace => u32::from_ne_bytes(*b"spac").to_be(), ProfileClass::Abstract => u32::from_ne_bytes(*b"abst").to_be(), ProfileClass::Named => u32::from_ne_bytes(*b"nmcl").to_be(), } } } #[derive(Debug, Clone)] pub enum LutStore { Store8(Vec), Store16(Vec), } #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] pub enum LutType { Lut8, Lut16, LutMab, LutMba, } impl TryFrom for LutType { type Error = CmsError; fn try_from(value: u32) -> Result { if value == u32::from_ne_bytes(*b"mft1").to_be() { return Ok(LutType::Lut8); } else if value == u32::from_ne_bytes(*b"mft2").to_be() { return Ok(LutType::Lut16); } else if value == u32::from_ne_bytes(*b"mAB ").to_be() { return Ok(LutType::LutMab); } else if value == u32::from_ne_bytes(*b"mBA ").to_be() { return Ok(LutType::LutMba); } Err(CmsError::InvalidProfile) } } impl From for u32 { fn from(val: LutType) -> Self { match val { LutType::Lut8 => u32::from_ne_bytes(*b"mft1").to_be(), LutType::Lut16 => u32::from_ne_bytes(*b"mft2").to_be(), LutType::LutMab => u32::from_ne_bytes(*b"mAB ").to_be(), LutType::LutMba => u32::from_ne_bytes(*b"mBA ").to_be(), } } } impl TryFrom for DataColorSpace { type Error = CmsError; fn try_from(value: u32) -> Result { if value == u32::from_ne_bytes(*b"XYZ ").to_be() { return Ok(DataColorSpace::Xyz); } else if value == u32::from_ne_bytes(*b"Lab ").to_be() { return Ok(DataColorSpace::Lab); } else if value == u32::from_ne_bytes(*b"Luv ").to_be() { return Ok(DataColorSpace::Luv); } else if value == u32::from_ne_bytes(*b"YCbr").to_be() { return Ok(DataColorSpace::YCbr); } else if value == u32::from_ne_bytes(*b"Yxy ").to_be() { return Ok(DataColorSpace::Yxy); } else if value == u32::from_ne_bytes(*b"RGB ").to_be() { return Ok(DataColorSpace::Rgb); } else if value == u32::from_ne_bytes(*b"GRAY").to_be() { return Ok(DataColorSpace::Gray); } else if value == u32::from_ne_bytes(*b"HSV ").to_be() { return Ok(DataColorSpace::Hsv); } else if value == u32::from_ne_bytes(*b"HLS ").to_be() { return Ok(DataColorSpace::Hls); } else if value == u32::from_ne_bytes(*b"CMYK").to_be() { return Ok(DataColorSpace::Cmyk); } else if value == u32::from_ne_bytes(*b"CMY ").to_be() { return Ok(DataColorSpace::Cmy); } else if value == u32::from_ne_bytes(*b"2CLR").to_be() { return Ok(DataColorSpace::Color2); } else if value == u32::from_ne_bytes(*b"3CLR").to_be() { return Ok(DataColorSpace::Color3); } else if value == u32::from_ne_bytes(*b"4CLR").to_be() { return Ok(DataColorSpace::Color4); } else if value == u32::from_ne_bytes(*b"5CLR").to_be() { return Ok(DataColorSpace::Color5); } else if value == u32::from_ne_bytes(*b"6CLR").to_be() { return Ok(DataColorSpace::Color6); } else if value == u32::from_ne_bytes(*b"7CLR").to_be() { return Ok(DataColorSpace::Color7); } else if value == u32::from_ne_bytes(*b"8CLR").to_be() { return Ok(DataColorSpace::Color8); } else if value == u32::from_ne_bytes(*b"9CLR").to_be() { return Ok(DataColorSpace::Color9); } else if value == u32::from_ne_bytes(*b"ACLR").to_be() { return Ok(DataColorSpace::Color10); } else if value == u32::from_ne_bytes(*b"BCLR").to_be() { return Ok(DataColorSpace::Color11); } else if value == u32::from_ne_bytes(*b"CCLR").to_be() { return Ok(DataColorSpace::Color12); } else if value == u32::from_ne_bytes(*b"DCLR").to_be() { return Ok(DataColorSpace::Color13); } else if value == u32::from_ne_bytes(*b"ECLR").to_be() { return Ok(DataColorSpace::Color14); } else if value == u32::from_ne_bytes(*b"FCLR").to_be() { return Ok(DataColorSpace::Color15); } Err(CmsError::InvalidProfile) } } impl From for u32 { fn from(val: DataColorSpace) -> Self { match val { DataColorSpace::Xyz => u32::from_ne_bytes(*b"XYZ ").to_be(), DataColorSpace::Lab => u32::from_ne_bytes(*b"Lab ").to_be(), DataColorSpace::Luv => u32::from_ne_bytes(*b"Luv ").to_be(), DataColorSpace::YCbr => u32::from_ne_bytes(*b"YCbr").to_be(), DataColorSpace::Yxy => u32::from_ne_bytes(*b"Yxy ").to_be(), DataColorSpace::Rgb => u32::from_ne_bytes(*b"RGB ").to_be(), DataColorSpace::Gray => u32::from_ne_bytes(*b"GRAY").to_be(), DataColorSpace::Hsv => u32::from_ne_bytes(*b"HSV ").to_be(), DataColorSpace::Hls => u32::from_ne_bytes(*b"HLS ").to_be(), DataColorSpace::Cmyk => u32::from_ne_bytes(*b"CMYK").to_be(), DataColorSpace::Cmy => u32::from_ne_bytes(*b"CMY ").to_be(), DataColorSpace::Color2 => u32::from_ne_bytes(*b"2CLR").to_be(), DataColorSpace::Color3 => u32::from_ne_bytes(*b"3CLR").to_be(), DataColorSpace::Color4 => u32::from_ne_bytes(*b"4CLR").to_be(), DataColorSpace::Color5 => u32::from_ne_bytes(*b"5CLR").to_be(), DataColorSpace::Color6 => u32::from_ne_bytes(*b"6CLR").to_be(), DataColorSpace::Color7 => u32::from_ne_bytes(*b"7CLR").to_be(), DataColorSpace::Color8 => u32::from_ne_bytes(*b"8CLR").to_be(), DataColorSpace::Color9 => u32::from_ne_bytes(*b"9CLR").to_be(), DataColorSpace::Color10 => u32::from_ne_bytes(*b"ACLR").to_be(), DataColorSpace::Color11 => u32::from_ne_bytes(*b"BCLR").to_be(), DataColorSpace::Color12 => u32::from_ne_bytes(*b"CCLR").to_be(), DataColorSpace::Color13 => u32::from_ne_bytes(*b"DCLR").to_be(), DataColorSpace::Color14 => u32::from_ne_bytes(*b"ECLR").to_be(), DataColorSpace::Color15 => u32::from_ne_bytes(*b"FCLR").to_be(), } } } #[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] pub enum TechnologySignatures { FilmScanner, DigitalCamera, ReflectiveScanner, InkJetPrinter, ThermalWaxPrinter, ElectrophotographicPrinter, ElectrostaticPrinter, DyeSublimationPrinter, PhotographicPaperPrinter, FilmWriter, VideoMonitor, VideoCamera, ProjectionTelevision, CathodeRayTubeDisplay, PassiveMatrixDisplay, ActiveMatrixDisplay, LiquidCrystalDisplay, OrganicLedDisplay, PhotoCd, PhotographicImageSetter, Gravure, OffsetLithography, Silkscreen, Flexography, MotionPictureFilmScanner, MotionPictureFilmRecorder, DigitalMotionPictureCamera, DigitalCinemaProjector, Unknown(u32), } impl From for TechnologySignatures { fn from(value: u32) -> Self { if value == u32::from_ne_bytes(*b"fscn").to_be() { return TechnologySignatures::FilmScanner; } else if value == u32::from_ne_bytes(*b"dcam").to_be() { return TechnologySignatures::DigitalCamera; } else if value == u32::from_ne_bytes(*b"rscn").to_be() { return TechnologySignatures::ReflectiveScanner; } else if value == u32::from_ne_bytes(*b"ijet").to_be() { return TechnologySignatures::InkJetPrinter; } else if value == u32::from_ne_bytes(*b"twax").to_be() { return TechnologySignatures::ThermalWaxPrinter; } else if value == u32::from_ne_bytes(*b"epho").to_be() { return TechnologySignatures::ElectrophotographicPrinter; } else if value == u32::from_ne_bytes(*b"esta").to_be() { return TechnologySignatures::ElectrostaticPrinter; } else if value == u32::from_ne_bytes(*b"dsub").to_be() { return TechnologySignatures::DyeSublimationPrinter; } else if value == u32::from_ne_bytes(*b"rpho").to_be() { return TechnologySignatures::PhotographicPaperPrinter; } else if value == u32::from_ne_bytes(*b"fprn").to_be() { return TechnologySignatures::FilmWriter; } else if value == u32::from_ne_bytes(*b"vidm").to_be() { return TechnologySignatures::VideoMonitor; } else if value == u32::from_ne_bytes(*b"vidc").to_be() { return TechnologySignatures::VideoCamera; } else if value == u32::from_ne_bytes(*b"pjtv").to_be() { return TechnologySignatures::ProjectionTelevision; } else if value == u32::from_ne_bytes(*b"CRT ").to_be() { return TechnologySignatures::CathodeRayTubeDisplay; } else if value == u32::from_ne_bytes(*b"PMD ").to_be() { return TechnologySignatures::PassiveMatrixDisplay; } else if value == u32::from_ne_bytes(*b"AMD ").to_be() { return TechnologySignatures::ActiveMatrixDisplay; } else if value == u32::from_ne_bytes(*b"LCD ").to_be() { return TechnologySignatures::LiquidCrystalDisplay; } else if value == u32::from_ne_bytes(*b"OLED").to_be() { return TechnologySignatures::OrganicLedDisplay; } else if value == u32::from_ne_bytes(*b"KPCD").to_be() { return TechnologySignatures::PhotoCd; } else if value == u32::from_ne_bytes(*b"imgs").to_be() { return TechnologySignatures::PhotographicImageSetter; } else if value == u32::from_ne_bytes(*b"grav").to_be() { return TechnologySignatures::Gravure; } else if value == u32::from_ne_bytes(*b"offs").to_be() { return TechnologySignatures::OffsetLithography; } else if value == u32::from_ne_bytes(*b"silk").to_be() { return TechnologySignatures::Silkscreen; } else if value == u32::from_ne_bytes(*b"flex").to_be() { return TechnologySignatures::Flexography; } else if value == u32::from_ne_bytes(*b"mpfs").to_be() { return TechnologySignatures::MotionPictureFilmScanner; } else if value == u32::from_ne_bytes(*b"mpfr").to_be() { return TechnologySignatures::MotionPictureFilmRecorder; } else if value == u32::from_ne_bytes(*b"dmpc").to_be() { return TechnologySignatures::DigitalMotionPictureCamera; } else if value == u32::from_ne_bytes(*b"dcpj").to_be() { return TechnologySignatures::DigitalCinemaProjector; } TechnologySignatures::Unknown(value) } } #[derive(Debug, Clone)] pub enum LutWarehouse { Lut(LutDataType), Multidimensional(LutMultidimensionalType), } #[derive(Debug, Clone)] pub struct LutDataType { // used by lut8Type/lut16Type (mft2) only pub num_input_channels: u8, pub num_output_channels: u8, pub num_clut_grid_points: u8, pub matrix: Matrix3d, pub num_input_table_entries: u16, pub num_output_table_entries: u16, pub input_table: LutStore, pub clut_table: LutStore, pub output_table: LutStore, pub lut_type: LutType, } impl LutDataType { pub(crate) fn has_same_kind(&self) -> bool { matches!( (&self.input_table, &self.clut_table, &self.output_table), ( LutStore::Store8(_), LutStore::Store8(_), LutStore::Store8(_) ) | ( LutStore::Store16(_), LutStore::Store16(_), LutStore::Store16(_) ) ) } } #[derive(Debug, Clone)] pub struct LutMultidimensionalType { pub num_input_channels: u8, pub num_output_channels: u8, pub grid_points: [u8; 16], pub clut: Option, pub a_curves: Vec, pub b_curves: Vec, pub m_curves: Vec, pub matrix: Matrix3d, pub bias: Vector3d, } #[repr(u32)] #[derive(Clone, Copy, Debug, Default, Ord, PartialOrd, Eq, PartialEq, Hash)] pub enum RenderingIntent { AbsoluteColorimetric = 3, Saturation = 2, RelativeColorimetric = 1, #[default] Perceptual = 0, } impl TryFrom for RenderingIntent { type Error = CmsError; #[inline] fn try_from(value: u32) -> Result { match value { 0 => Ok(RenderingIntent::Perceptual), 1 => Ok(RenderingIntent::RelativeColorimetric), 2 => Ok(RenderingIntent::Saturation), 3 => Ok(RenderingIntent::AbsoluteColorimetric), _ => Err(CmsError::InvalidRenderingIntent), } } } impl From for u32 { #[inline] fn from(value: RenderingIntent) -> Self { match value { RenderingIntent::AbsoluteColorimetric => 3, RenderingIntent::Saturation => 2, RenderingIntent::RelativeColorimetric => 1, RenderingIntent::Perceptual => 0, } } } /// ICC Header #[repr(C)] #[derive(Debug, Clone, Copy)] pub(crate) struct ProfileHeader { pub size: u32, // Size of the profile (computed) pub cmm_type: u32, // Preferred CMM type (ignored) pub version: ProfileVersion, // Version (4.3 or 4.4 if CICP is included) pub profile_class: ProfileClass, // Display device profile pub data_color_space: DataColorSpace, // RGB input color space pub pcs: DataColorSpace, // Profile connection space pub creation_date_time: ColorDateTime, // Date and time pub signature: ProfileSignature, // Profile signature pub platform: u32, // Platform target (ignored) pub flags: u32, // Flags (not embedded, can be used independently) pub device_manufacturer: u32, // Device manufacturer (ignored) pub device_model: u32, // Device model (ignored) pub device_attributes: [u8; 8], // Device attributes (ignored) pub rendering_intent: RenderingIntent, // Relative colorimetric rendering intent pub illuminant: Xyz, // D50 standard illuminant X pub creator: u32, // Profile creator (ignored) pub profile_id: [u8; 16], // Profile id checksum (ignored) pub reserved: [u8; 28], // Reserved (ignored) pub tag_count: u32, // Technically not part of header, but required } impl ProfileHeader { #[allow(dead_code)] pub(crate) fn new(size: u32) -> Self { Self { size, cmm_type: 0, version: ProfileVersion::V4_3, profile_class: ProfileClass::DisplayDevice, data_color_space: DataColorSpace::Rgb, pcs: DataColorSpace::Xyz, creation_date_time: ColorDateTime::default(), signature: ProfileSignature::Acsp, platform: 0, flags: 0x00000000, device_manufacturer: 0, device_model: 0, device_attributes: [0; 8], rendering_intent: RenderingIntent::Perceptual, illuminant: Chromaticity::D50.to_xyz(), creator: 0, profile_id: [0; 16], reserved: [0; 28], tag_count: 0, } } /// Creates profile from the buffer pub(crate) fn new_from_slice(slice: &[u8]) -> Result { if slice.len() < size_of::() { return Err(CmsError::InvalidProfile); } let mut cursor = std::io::Cursor::new(slice); let mut buffer = [0u8; size_of::()]; cursor .read_exact(&mut buffer) .map_err(|_| CmsError::InvalidProfile)?; let header = Self { size: u32::from_be_bytes(buffer[0..4].try_into().unwrap()), cmm_type: u32::from_be_bytes(buffer[4..8].try_into().unwrap()), version: ProfileVersion::try_from(u32::from_be_bytes( buffer[8..12].try_into().unwrap(), ))?, profile_class: ProfileClass::try_from(u32::from_be_bytes( buffer[12..16].try_into().unwrap(), ))?, data_color_space: DataColorSpace::try_from(u32::from_be_bytes( buffer[16..20].try_into().unwrap(), ))?, pcs: DataColorSpace::try_from(u32::from_be_bytes(buffer[20..24].try_into().unwrap()))?, creation_date_time: ColorDateTime::new_from_slice(buffer[24..36].try_into().unwrap())?, signature: ProfileSignature::try_from(u32::from_be_bytes( buffer[36..40].try_into().unwrap(), ))?, platform: u32::from_be_bytes(buffer[40..44].try_into().unwrap()), flags: u32::from_be_bytes(buffer[44..48].try_into().unwrap()), device_manufacturer: u32::from_be_bytes(buffer[48..52].try_into().unwrap()), device_model: u32::from_be_bytes(buffer[52..56].try_into().unwrap()), device_attributes: buffer[56..64].try_into().unwrap(), rendering_intent: RenderingIntent::try_from(u32::from_be_bytes( buffer[64..68].try_into().unwrap(), ))?, illuminant: Xyz::new( s15_fixed16_number_to_float(i32::from_be_bytes(buffer[68..72].try_into().unwrap())), s15_fixed16_number_to_float(i32::from_be_bytes(buffer[72..76].try_into().unwrap())), s15_fixed16_number_to_float(i32::from_be_bytes(buffer[76..80].try_into().unwrap())), ), creator: u32::from_be_bytes(buffer[80..84].try_into().unwrap()), profile_id: buffer[84..100].try_into().unwrap(), reserved: buffer[100..128].try_into().unwrap(), tag_count: u32::from_be_bytes(buffer[128..132].try_into().unwrap()), }; Ok(header) } } /// A [Coding Independent Code Point](https://en.wikipedia.org/wiki/Coding-independent_code_points). #[repr(C)] #[derive(Debug, Clone, Copy)] pub struct CicpProfile { pub color_primaries: CicpColorPrimaries, pub transfer_characteristics: TransferCharacteristics, pub matrix_coefficients: MatrixCoefficients, pub full_range: bool, } #[derive(Debug, Clone)] pub struct LocalizableString { /// An ISO 639-1 value is expected; any text w. more than two symbols will be truncated pub language: String, /// An ISO 3166-1 value is expected; any text w. more than two symbols will be truncated pub country: String, pub value: String, } impl LocalizableString { /// Creates new localizable string /// /// # Arguments /// /// * `language`: an ISO 639-1 value is expected, any text more than 2 symbols will be truncated /// * `country`: an ISO 3166-1 value is expected, any text more than 2 symbols will be truncated /// * `value`: String value /// pub fn new(language: String, country: String, value: String) -> Self { Self { language, country, value, } } } #[derive(Debug, Clone)] pub struct DescriptionString { pub ascii_string: String, pub unicode_language_code: u32, pub unicode_string: String, pub script_code_code: i8, pub mac_string: String, } #[derive(Debug, Clone)] pub enum ProfileText { PlainString(String), Localizable(Vec), Description(DescriptionString), } impl ProfileText { pub(crate) fn has_values(&self) -> bool { match self { ProfileText::PlainString(_) => true, ProfileText::Localizable(lc) => !lc.is_empty(), ProfileText::Description(_) => true, } } } #[derive(Debug, Clone, Copy)] pub enum StandardObserver { D50, D65, Unknown, } impl From for StandardObserver { fn from(value: u32) -> Self { if value == 1 { return StandardObserver::D50; } else if value == 2 { return StandardObserver::D65; } StandardObserver::Unknown } } #[derive(Debug, Clone, Copy)] pub struct ViewingConditions { pub illuminant: Xyz, pub surround: Xyz, pub observer: StandardObserver, } #[derive(Debug, Clone, Copy)] pub enum MeasurementGeometry { Unknown, /// 0°:45° or 45°:0° D45to45, /// 0°:d or d:0° D0to0, } impl From for MeasurementGeometry { fn from(value: u32) -> Self { if value == 1 { Self::D45to45 } else if value == 2 { Self::D0to0 } else { Self::Unknown } } } #[derive(Debug, Clone, Copy)] pub enum StandardIlluminant { Unknown, D50, D65, D93, F2, D55, A, EquiPower, F8, } impl From for StandardIlluminant { fn from(value: u32) -> Self { match value { 1 => StandardIlluminant::D50, 2 => StandardIlluminant::D65, 3 => StandardIlluminant::D93, 4 => StandardIlluminant::F2, 5 => StandardIlluminant::D55, 6 => StandardIlluminant::A, 7 => StandardIlluminant::EquiPower, 8 => StandardIlluminant::F8, _ => Self::Unknown, } } } impl From for u32 { fn from(value: StandardIlluminant) -> Self { match value { StandardIlluminant::Unknown => 0u32, StandardIlluminant::D50 => 1u32, StandardIlluminant::D65 => 2u32, StandardIlluminant::D93 => 3, StandardIlluminant::F2 => 4, StandardIlluminant::D55 => 5, StandardIlluminant::A => 6, StandardIlluminant::EquiPower => 7, StandardIlluminant::F8 => 8, } } } #[derive(Debug, Clone, Copy)] pub struct Measurement { pub observer: StandardObserver, pub backing: Xyz, pub geometry: MeasurementGeometry, pub flare: f32, pub illuminant: StandardIlluminant, } /// ICC Profile representation #[repr(C)] #[derive(Debug, Clone, Default)] pub struct ColorProfile { pub pcs: DataColorSpace, pub color_space: DataColorSpace, pub profile_class: ProfileClass, pub rendering_intent: RenderingIntent, pub red_colorant: Xyzd, pub green_colorant: Xyzd, pub blue_colorant: Xyzd, pub white_point: Xyzd, pub black_point: Option, pub media_white_point: Option, pub luminance: Option, pub measurement: Option, pub red_trc: Option, pub green_trc: Option, pub blue_trc: Option, pub gray_trc: Option, pub cicp: Option, pub chromatic_adaptation: Option, pub lut_a_to_b_perceptual: Option, pub lut_a_to_b_colorimetric: Option, pub lut_a_to_b_saturation: Option, pub lut_b_to_a_perceptual: Option, pub lut_b_to_a_colorimetric: Option, pub lut_b_to_a_saturation: Option, pub gamut: Option, pub copyright: Option, pub description: Option, pub device_manufacturer: Option, pub device_model: Option, pub char_target: Option, pub viewing_conditions: Option, pub viewing_conditions_description: Option, pub technology: Option, pub calibration_date: Option, /// Version for internal and viewing purposes only. /// On encoding added value to profile will always be V4. pub(crate) version_internal: ProfileVersion, } #[derive(Debug, Clone, Copy, PartialOrd, PartialEq, Hash)] pub struct ParsingOptions { // Maximum allowed profile size in bytes pub max_profile_size: usize, // Maximum allowed CLUT size in bytes pub max_allowed_clut_size: usize, // Maximum allowed TRC size in elements count pub max_allowed_trc_size: usize, } impl Default for ParsingOptions { fn default() -> Self { Self { max_profile_size: MAX_PROFILE_SIZE, max_allowed_clut_size: 10_000_000, max_allowed_trc_size: 40_000, } } } impl ColorProfile { /// Returns profile version pub fn version(&self) -> ProfileVersion { self.version_internal } pub fn new_from_slice(slice: &[u8]) -> Result { Self::new_from_slice_with_options(slice, Default::default()) } pub fn new_from_slice_with_options( slice: &[u8], options: ParsingOptions, ) -> Result { let header = ProfileHeader::new_from_slice(slice)?; let tags_count = header.tag_count as usize; if slice.len() >= options.max_profile_size { return Err(CmsError::InvalidProfile); } let tags_end = tags_count .safe_mul(TAG_SIZE)? .safe_add(size_of::())?; if slice.len() < tags_end { return Err(CmsError::InvalidProfile); } let tags_slice = &slice[size_of::()..tags_end]; let mut profile = ColorProfile { rendering_intent: header.rendering_intent, pcs: header.pcs, profile_class: header.profile_class, color_space: header.data_color_space, white_point: header.illuminant.to_xyzd(), version_internal: header.version, ..Default::default() }; let color_space = profile.color_space; for tag in tags_slice.chunks_exact(TAG_SIZE) { let tag_value = u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]]); let tag_entry = u32::from_be_bytes([tag[4], tag[5], tag[6], tag[7]]); let tag_size = u32::from_be_bytes([tag[8], tag[9], tag[10], tag[11]]) as usize; // Just ignore unknown tags if let Ok(tag) = Tag::try_from(tag_value) { match tag { Tag::RedXyz => { if color_space == DataColorSpace::Rgb { profile.red_colorant = Self::read_xyz_tag(slice, tag_entry as usize, tag_size)?; } } Tag::GreenXyz => { if color_space == DataColorSpace::Rgb { profile.green_colorant = Self::read_xyz_tag(slice, tag_entry as usize, tag_size)?; } } Tag::BlueXyz => { if color_space == DataColorSpace::Rgb { profile.blue_colorant = Self::read_xyz_tag(slice, tag_entry as usize, tag_size)?; } } Tag::RedToneReproduction => { if color_space == DataColorSpace::Rgb { profile.red_trc = Self::read_trc_tag_s( slice, tag_entry as usize, tag_size, &options, )?; } } Tag::GreenToneReproduction => { if color_space == DataColorSpace::Rgb { profile.green_trc = Self::read_trc_tag_s( slice, tag_entry as usize, tag_size, &options, )?; } } Tag::BlueToneReproduction => { if color_space == DataColorSpace::Rgb { profile.blue_trc = Self::read_trc_tag_s( slice, tag_entry as usize, tag_size, &options, )?; } } Tag::GreyToneReproduction => { if color_space == DataColorSpace::Gray { profile.gray_trc = Self::read_trc_tag_s( slice, tag_entry as usize, tag_size, &options, )?; } } Tag::MediaWhitePoint => { profile.media_white_point = Self::read_xyz_tag(slice, tag_entry as usize, tag_size).map(Some)?; } Tag::Luminance => { profile.luminance = Self::read_xyz_tag(slice, tag_entry as usize, tag_size).map(Some)?; } Tag::Measurement => { profile.measurement = Self::read_meas_tag(slice, tag_entry as usize, tag_size)?; } Tag::CodeIndependentPoints => { // This tag may be present when the data colour space in the profile header is RGB, YCbCr, or XYZ, and the // profile class in the profile header is Input or Display. The tag shall not be present for other data colour spaces // or profile classes indicated in the profile header. if (profile.profile_class == ProfileClass::InputDevice || profile.profile_class == ProfileClass::DisplayDevice) && (profile.color_space == DataColorSpace::Rgb || profile.color_space == DataColorSpace::YCbr || profile.color_space == DataColorSpace::Xyz) { profile.cicp = Self::read_cicp_tag(slice, tag_entry as usize, tag_size)?; } } Tag::ChromaticAdaptation => { profile.chromatic_adaptation = Self::read_chad_tag(slice, tag_entry as usize, tag_size)?; } Tag::BlackPoint => { profile.black_point = Self::read_xyz_tag(slice, tag_entry as usize, tag_size).map(Some)? } Tag::DeviceToPcsLutPerceptual => { profile.lut_a_to_b_perceptual = Self::read_lut_tag(slice, tag_entry, tag_size, &options)?; } Tag::DeviceToPcsLutColorimetric => { profile.lut_a_to_b_colorimetric = Self::read_lut_tag(slice, tag_entry, tag_size, &options)?; } Tag::DeviceToPcsLutSaturation => { profile.lut_a_to_b_saturation = Self::read_lut_tag(slice, tag_entry, tag_size, &options)?; } Tag::PcsToDeviceLutPerceptual => { profile.lut_b_to_a_perceptual = Self::read_lut_tag(slice, tag_entry, tag_size, &options)?; } Tag::PcsToDeviceLutColorimetric => { profile.lut_b_to_a_colorimetric = Self::read_lut_tag(slice, tag_entry, tag_size, &options)?; } Tag::PcsToDeviceLutSaturation => { profile.lut_b_to_a_saturation = Self::read_lut_tag(slice, tag_entry, tag_size, &options)?; } Tag::Gamut => { profile.gamut = Self::read_lut_tag(slice, tag_entry, tag_size, &options)?; } Tag::Copyright => { profile.copyright = Self::read_string_tag(slice, tag_entry as usize, tag_size)?; } Tag::ProfileDescription => { profile.description = Self::read_string_tag(slice, tag_entry as usize, tag_size)?; } Tag::ViewingConditionsDescription => { profile.viewing_conditions_description = Self::read_string_tag(slice, tag_entry as usize, tag_size)?; } Tag::DeviceModel => { profile.device_model = Self::read_string_tag(slice, tag_entry as usize, tag_size)?; } Tag::DeviceManufacturer => { profile.device_manufacturer = Self::read_string_tag(slice, tag_entry as usize, tag_size)?; } Tag::CharTarget => { profile.char_target = Self::read_string_tag(slice, tag_entry as usize, tag_size)?; } Tag::Chromaticity => {} Tag::ObserverConditions => { profile.viewing_conditions = Self::read_viewing_conditions(slice, tag_entry as usize, tag_size)?; } Tag::Technology => { profile.technology = Self::read_tech_tag(slice, tag_entry as usize, tag_size)?; } Tag::CalibrationDateTime => { profile.calibration_date = Self::read_date_time_tag(slice, tag_entry as usize, tag_size)?; } } } } Ok(profile) } } impl ColorProfile { #[inline] pub fn colorant_matrix(&self) -> Matrix3d { Matrix3d { v: [ [ self.red_colorant.x, self.green_colorant.x, self.blue_colorant.x, ], [ self.red_colorant.y, self.green_colorant.y, self.blue_colorant.y, ], [ self.red_colorant.z, self.green_colorant.z, self.blue_colorant.z, ], ], } } /// Computes colorants matrix. Returns not transposed matrix. /// /// To work on `const` context this method does have restrictions. /// If invalid values were provided it may return invalid matrix or NaNs. pub const fn colorants_matrix(white_point: XyY, primaries: ColorPrimaries) -> Matrix3d { let red_xyz = primaries.red.to_xyzd(); let green_xyz = primaries.green.to_xyzd(); let blue_xyz = primaries.blue.to_xyzd(); let xyz_matrix = Matrix3d { v: [ [red_xyz.x, green_xyz.x, blue_xyz.x], [red_xyz.y, green_xyz.y, blue_xyz.y], [red_xyz.z, green_xyz.z, blue_xyz.z], ], }; let colorants = ColorProfile::rgb_to_xyz_d(xyz_matrix, white_point.to_xyzd()); adapt_to_d50_d(colorants, white_point) } /// Updates RGB triple colorimetry from 3 [Chromaticity] and white point pub const fn update_rgb_colorimetry(&mut self, white_point: XyY, primaries: ColorPrimaries) { let red_xyz = primaries.red.to_xyzd(); let green_xyz = primaries.green.to_xyzd(); let blue_xyz = primaries.blue.to_xyzd(); self.chromatic_adaptation = Some(BRADFORD_D); self.update_rgb_colorimetry_triplet(white_point, red_xyz, green_xyz, blue_xyz) } /// Updates RGB triple colorimetry from 3 [Xyzd] and white point /// /// To work on `const` context this method does have restrictions. /// If invalid values were provided it may return invalid matrix or NaNs. pub const fn update_rgb_colorimetry_triplet( &mut self, white_point: XyY, red_xyz: Xyzd, green_xyz: Xyzd, blue_xyz: Xyzd, ) { let xyz_matrix = Matrix3d { v: [ [red_xyz.x, green_xyz.x, blue_xyz.x], [red_xyz.y, green_xyz.y, blue_xyz.y], [red_xyz.z, green_xyz.z, blue_xyz.z], ], }; let colorants = ColorProfile::rgb_to_xyz_d(xyz_matrix, white_point.to_xyzd()); let colorants = adapt_to_d50_d(colorants, white_point); self.update_colorants(colorants); } pub(crate) const fn update_colorants(&mut self, colorants: Matrix3d) { // note: there's a transpose type of operation going on here self.red_colorant.x = colorants.v[0][0]; self.red_colorant.y = colorants.v[1][0]; self.red_colorant.z = colorants.v[2][0]; self.green_colorant.x = colorants.v[0][1]; self.green_colorant.y = colorants.v[1][1]; self.green_colorant.z = colorants.v[2][1]; self.blue_colorant.x = colorants.v[0][2]; self.blue_colorant.y = colorants.v[1][2]; self.blue_colorant.z = colorants.v[2][2]; } /// Updates RGB triple colorimetry from CICP pub fn update_rgb_colorimetry_from_cicp(&mut self, cicp: CicpProfile) -> bool { self.cicp = Some(cicp); if !cicp.color_primaries.has_chromaticity() || !cicp.transfer_characteristics.has_transfer_curve() { return false; } let primaries_xy: ColorPrimaries = match cicp.color_primaries.try_into() { Ok(primaries) => primaries, Err(_) => return false, }; let white_point: Chromaticity = match cicp.color_primaries.white_point() { Ok(v) => v, Err(_) => return false, }; self.update_rgb_colorimetry(white_point.to_xyyb(), primaries_xy); let red_trc: ToneReprCurve = match cicp.transfer_characteristics.try_into() { Ok(trc) => trc, Err(_) => return false, }; self.green_trc = Some(red_trc.clone()); self.blue_trc = Some(red_trc.clone()); self.red_trc = Some(red_trc); false } pub const fn rgb_to_xyz(&self, xyz_matrix: Matrix3f, wp: Xyz) -> Matrix3f { let xyz_inverse = xyz_matrix.inverse(); let s = xyz_inverse.mul_vector(wp.to_vector()); let mut v = xyz_matrix.mul_row_vector::<0>(s); v = v.mul_row_vector::<1>(s); v.mul_row_vector::<2>(s) } ///TODO: make primary instead of [rgb_to_xyz] in the next major version pub(crate) const fn rgb_to_xyz_static(xyz_matrix: Matrix3f, wp: Xyz) -> Matrix3f { let xyz_inverse = xyz_matrix.inverse(); let s = xyz_inverse.mul_vector(wp.to_vector()); let mut v = xyz_matrix.mul_row_vector::<0>(s); v = v.mul_row_vector::<1>(s); v.mul_row_vector::<2>(s) } /// If Primaries is invalid will return invalid matrix on const context. /// This assumes not transposed matrix and returns not transposed matrix. pub const fn rgb_to_xyz_d(xyz_matrix: Matrix3d, wp: Xyzd) -> Matrix3d { let xyz_inverse = xyz_matrix.inverse(); let s = xyz_inverse.mul_vector(wp.to_vector_d()); let mut v = xyz_matrix.mul_row_vector::<0>(s); v = v.mul_row_vector::<1>(s); v = v.mul_row_vector::<2>(s); v } pub fn rgb_to_xyz_matrix(&self) -> Matrix3d { let xyz_matrix = self.colorant_matrix(); let white_point = Chromaticity::D50.to_xyzd(); ColorProfile::rgb_to_xyz_d(xyz_matrix, white_point) } /// Computes transform matrix RGB -> XYZ -> RGB /// Current profile is used as source, other as destination pub fn transform_matrix(&self, dest: &ColorProfile) -> Matrix3d { let source = self.rgb_to_xyz_matrix(); let dst = dest.rgb_to_xyz_matrix(); let dest_inverse = dst.inverse(); dest_inverse.mat_mul(source) } /// Returns volume of colors stored in profile pub fn profile_volume(&self) -> Option { let red_prim = self.red_colorant; let green_prim = self.green_colorant; let blue_prim = self.blue_colorant; let tetrahedral_vertices = Matrix3d { v: [ [red_prim.x, red_prim.y, red_prim.z], [green_prim.x, green_prim.y, green_prim.z], [blue_prim.x, blue_prim.y, blue_prim.z], ], }; let det = tetrahedral_vertices.determinant()?; Some((det / 6.0f64) as f32) } pub(crate) fn has_device_to_pcs_lut(&self) -> bool { self.lut_a_to_b_perceptual.is_some() || self.lut_a_to_b_saturation.is_some() || self.lut_a_to_b_colorimetric.is_some() } pub(crate) fn has_pcs_to_device_lut(&self) -> bool { self.lut_b_to_a_perceptual.is_some() || self.lut_b_to_a_saturation.is_some() || self.lut_b_to_a_colorimetric.is_some() } } #[cfg(test)] mod tests { use super::*; use std::fs; #[test] fn test_gray() { if let Ok(gray_icc) = fs::read("./assets/Generic Gray Gamma 2.2 Profile.icc") { let f_p = ColorProfile::new_from_slice(&gray_icc).unwrap(); assert!(f_p.gray_trc.is_some()); } } #[test] fn test_perceptual() { if let Ok(srgb_perceptual_icc) = fs::read("./assets/srgb_perceptual.icc") { let f_p = ColorProfile::new_from_slice(&srgb_perceptual_icc).unwrap(); assert_eq!(f_p.pcs, DataColorSpace::Lab); assert_eq!(f_p.color_space, DataColorSpace::Rgb); assert_eq!(f_p.version(), ProfileVersion::V4_2); assert!(f_p.lut_a_to_b_perceptual.is_some()); assert!(f_p.lut_b_to_a_perceptual.is_some()); } } #[test] fn test_us_swop_coated() { if let Ok(us_swop_coated) = fs::read("./assets/us_swop_coated.icc") { let f_p = ColorProfile::new_from_slice(&us_swop_coated).unwrap(); assert_eq!(f_p.pcs, DataColorSpace::Lab); assert_eq!(f_p.color_space, DataColorSpace::Cmyk); assert_eq!(f_p.version(), ProfileVersion::V2_0); assert!(f_p.lut_a_to_b_perceptual.is_some()); assert!(f_p.lut_b_to_a_perceptual.is_some()); assert!(f_p.lut_a_to_b_colorimetric.is_some()); assert!(f_p.lut_b_to_a_colorimetric.is_some()); assert!(f_p.gamut.is_some()); assert!(f_p.copyright.is_some()); assert!(f_p.description.is_some()); } } #[test] fn test_matrix_shaper() { if let Ok(matrix_shaper) = fs::read("./assets/Display P3.icc") { let f_p = ColorProfile::new_from_slice(&matrix_shaper).unwrap(); assert_eq!(f_p.pcs, DataColorSpace::Xyz); assert_eq!(f_p.color_space, DataColorSpace::Rgb); assert_eq!(f_p.version(), ProfileVersion::V4_0); assert!(f_p.red_trc.is_some()); assert!(f_p.blue_trc.is_some()); assert!(f_p.green_trc.is_some()); assert_ne!(f_p.red_colorant, Xyzd::default()); assert_ne!(f_p.blue_colorant, Xyzd::default()); assert_ne!(f_p.green_colorant, Xyzd::default()); assert!(f_p.copyright.is_some()); assert!(f_p.description.is_some()); } } } moxcms-0.7.7/src/reader.rs000064400000000000000000001062151046102023000135510ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::err::try_vec; use crate::helpers::{read_matrix_3d, read_vector_3d}; use crate::profile::LutDataType; use crate::safe_math::{SafeAdd, SafeMul, SafePowi}; use crate::tag::{TAG_SIZE, TagTypeDefinition}; use crate::{ CicpColorPrimaries, CicpProfile, CmsError, ColorDateTime, ColorProfile, DescriptionString, LocalizableString, LutMultidimensionalType, LutStore, LutType, LutWarehouse, Matrix3d, Matrix3f, MatrixCoefficients, Measurement, MeasurementGeometry, ParsingOptions, ProfileText, StandardIlluminant, StandardObserver, TechnologySignatures, ToneReprCurve, TransferCharacteristics, Vector3d, ViewingConditions, Xyz, Xyzd, }; /// Produces the nearest float to `a` with a maximum error of 1/1024 which /// happens for large values like 0x40000040. #[inline] pub(crate) const fn s15_fixed16_number_to_float(a: i32) -> f32 { a as f32 / 65536. } #[inline] pub(crate) const fn s15_fixed16_number_to_double(a: i32) -> f64 { a as f64 / 65536. } #[inline] pub(crate) const fn uint16_number_to_float(a: u32) -> f32 { a as f32 / 65536. } #[inline] pub(crate) const fn uint16_number_to_float_fast(a: u32) -> f32 { a as f32 * (1. / 65536.) } // #[inline] // pub(crate) fn uint8_number_to_float(a: u8) -> f32 { // a as f32 / 255.0 // } #[inline] pub(crate) fn uint8_number_to_float_fast(a: u8) -> f32 { a as f32 * (1. / 255.0) } fn utf16be_to_utf16(slice: &[u8]) -> Result, CmsError> { let mut vec = try_vec![0u16; slice.len() / 2]; for (dst, chunk) in vec.iter_mut().zip(slice.chunks_exact(2)) { *dst = u16::from_be_bytes([chunk[0], chunk[1]]); } Ok(vec) } impl ColorProfile { #[inline] pub(crate) fn read_lut_type( slice: &[u8], entry: usize, tag_size: usize, ) -> Result { let tag_size = if tag_size == 0 { TAG_SIZE } else { tag_size }; let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..last_tag_offset]; if tag.len() < 48 { return Err(CmsError::InvalidProfile); } let tag_type = u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]]); LutType::try_from(tag_type) } #[inline] pub(crate) fn read_viewing_conditions( slice: &[u8], entry: usize, tag_size: usize, ) -> Result, CmsError> { if tag_size < 36 { return Ok(None); } if slice.len() < entry.safe_add(36)? { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..entry.safe_add(36)?]; let tag_type = TagTypeDefinition::from(u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]])); // Ignore unknown if tag_type != TagTypeDefinition::DefViewingConditions { return Ok(None); } let illuminant_x = i32::from_be_bytes([tag[8], tag[9], tag[10], tag[11]]); let illuminant_y = i32::from_be_bytes([tag[12], tag[13], tag[14], tag[15]]); let illuminant_z = i32::from_be_bytes([tag[16], tag[17], tag[18], tag[19]]); let surround_x = i32::from_be_bytes([tag[20], tag[21], tag[22], tag[23]]); let surround_y = i32::from_be_bytes([tag[24], tag[25], tag[26], tag[27]]); let surround_z = i32::from_be_bytes([tag[28], tag[29], tag[30], tag[31]]); let illuminant_type = u32::from_be_bytes([tag[32], tag[33], tag[34], tag[35]]); let illuminant = Xyz::new( s15_fixed16_number_to_float(illuminant_x), s15_fixed16_number_to_float(illuminant_y), s15_fixed16_number_to_float(illuminant_z), ); let surround = Xyz::new( s15_fixed16_number_to_float(surround_x), s15_fixed16_number_to_float(surround_y), s15_fixed16_number_to_float(surround_z), ); let observer = StandardObserver::from(illuminant_type); Ok(Some(ViewingConditions { illuminant, surround, observer, })) } pub(crate) fn read_string_tag( slice: &[u8], entry: usize, tag_size: usize, ) -> Result, CmsError> { let tag_size = if tag_size == 0 { TAG_SIZE } else { tag_size }; if tag_size < 4 { return Ok(None); } let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..last_tag_offset]; if tag.len() < 8 { return Ok(None); } let tag_type = TagTypeDefinition::from(u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]])); // Ignore unknown if tag_type == TagTypeDefinition::Text { let sliced_from_to_end = &tag[8..tag.len()]; let str = String::from_utf8_lossy(sliced_from_to_end); return Ok(Some(ProfileText::PlainString(str.to_string()))); } else if tag_type == TagTypeDefinition::MultiLocalizedUnicode { if tag.len() < 28 { return Err(CmsError::InvalidProfile); } // let record_size = u32::from_be_bytes([tag[12], tag[13], tag[14], tag[15]]) as usize; // // Record size is reserved to be 12. // if record_size != 12 { // return Err(CmsError::InvalidIcc); // } let records_count = u32::from_be_bytes([tag[8], tag[9], tag[10], tag[11]]) as usize; let primary_language_code = String::from_utf8_lossy(&[tag[16], tag[17]]).to_string(); let primary_country_code = String::from_utf8_lossy(&[tag[18], tag[19]]).to_string(); let first_string_record_length = u32::from_be_bytes([tag[20], tag[21], tag[22], tag[23]]) as usize; let first_record_offset = u32::from_be_bytes([tag[24], tag[25], tag[26], tag[27]]) as usize; if tag.len() < first_record_offset.safe_add(first_string_record_length)? { return Ok(None); } let resliced = &tag[first_record_offset..first_record_offset + first_string_record_length]; let cvt = utf16be_to_utf16(resliced)?; let string_record = String::from_utf16_lossy(&cvt); let mut records = vec![LocalizableString { language: primary_language_code, country: primary_country_code, value: string_record, }]; for record in 1..records_count { // Localizable header must be at least 12 bytes let localizable_header_offset = if record == 1 { 28 } else { 28 + 12 * (record - 1) }; if tag.len() < localizable_header_offset + 12 { return Err(CmsError::InvalidProfile); } let choked = &tag[localizable_header_offset..localizable_header_offset + 12]; let language_code = String::from_utf8_lossy(&[choked[0], choked[1]]).to_string(); let country_code = String::from_utf8_lossy(&[choked[2], choked[3]]).to_string(); let record_length = u32::from_be_bytes([choked[4], choked[5], choked[6], choked[7]]) as usize; let string_offset = u32::from_be_bytes([choked[8], choked[9], choked[10], choked[11]]) as usize; if tag.len() < string_offset.safe_add(record_length)? { return Ok(None); } let resliced = &tag[string_offset..string_offset + record_length]; let cvt = utf16be_to_utf16(resliced)?; let string_record = String::from_utf16_lossy(&cvt); records.push(LocalizableString { country: country_code, language: language_code, value: string_record, }); } return Ok(Some(ProfileText::Localizable(records))); } else if tag_type == TagTypeDefinition::Description { if tag.len() < 12 { return Err(CmsError::InvalidProfile); } let ascii_length = u32::from_be_bytes([tag[8], tag[9], tag[10], tag[11]]) as usize; if tag.len() < 12.safe_add(ascii_length)? { return Err(CmsError::InvalidProfile); } let sliced = &tag[12..12 + ascii_length]; let ascii_string = String::from_utf8_lossy(sliced).to_string(); let mut last_position = 12 + ascii_length; if tag.len() < last_position + 8 { return Err(CmsError::InvalidProfile); } let uc = &tag[last_position..last_position + 8]; let unicode_code = u32::from_be_bytes([uc[0], uc[1], uc[2], uc[3]]); let unicode_length = u32::from_be_bytes([uc[4], uc[5], uc[6], uc[7]]) as usize * 2; if tag.len() < unicode_length.safe_add(8)?.safe_add(last_position)? { return Ok(None); } last_position += 8; let uc = &tag[last_position..last_position + unicode_length]; let wc = utf16be_to_utf16(uc)?; let unicode_string = String::from_utf16_lossy(&wc).to_string(); // last_position += unicode_length; // // if tag.len() < last_position + 2 { // return Err(CmsError::InvalidIcc); // } // let uc = &tag[last_position..last_position + 2]; // let script_code = uc[0]; // let mac_length = uc[1] as usize; // last_position += 2; // if tag.len() < last_position + mac_length { // return Err(CmsError::InvalidIcc); // } // // let uc = &tag[last_position..last_position + unicode_length]; // let wc = utf16be_to_utf16(uc); // let mac_string = String::from_utf16_lossy(&wc).to_string(); return Ok(Some(ProfileText::Description(DescriptionString { ascii_string, unicode_language_code: unicode_code, unicode_string, mac_string: "".to_string(), script_code_code: -1, }))); } Ok(None) } #[inline] fn read_lut_table_f32(table: &[u8], lut_type: LutType) -> Result { if lut_type == LutType::Lut16 { let mut clut = try_vec![0u16; table.len() / 2]; for (src, dst) in table.chunks_exact(2).zip(clut.iter_mut()) { *dst = u16::from_be_bytes([src[0], src[1]]); } Ok(LutStore::Store16(clut)) } else if lut_type == LutType::Lut8 { let mut clut = try_vec![0u8; table.len()]; for (&src, dst) in table.iter().zip(clut.iter_mut()) { *dst = src; } Ok(LutStore::Store8(clut)) } else { unreachable!("This should never happen, report to https://github.com/awxkee/moxcms") } } #[inline] fn read_nested_tone_curves( slice: &[u8], offset: usize, length: usize, options: &ParsingOptions, ) -> Result>, CmsError> { let mut curve_offset: usize = offset; let mut curves = Vec::new(); for _ in 0..length { if slice.len() < curve_offset.safe_add(12)? { return Err(CmsError::InvalidProfile); } let mut tag_size = 0usize; let new_curve = Self::read_trc_tag(slice, curve_offset, 0, &mut tag_size, options)?; match new_curve { None => return Err(CmsError::InvalidProfile), Some(curve) => curves.push(curve), } curve_offset += tag_size; // 4 byte aligned if curve_offset % 4 != 0 { curve_offset += 4 - curve_offset % 4; } } Ok(Some(curves)) } #[inline] pub(crate) fn read_lut_abm_type( slice: &[u8], entry: usize, tag_size: usize, to_pcs: bool, options: &ParsingOptions, ) -> Result, CmsError> { if tag_size < 48 { return Ok(None); } let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..last_tag_offset]; if tag.len() < 48 { return Err(CmsError::InvalidProfile); } let tag_type = u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]]); let tag_type_definition = TagTypeDefinition::from(tag_type); if tag_type_definition != TagTypeDefinition::MabLut && tag_type_definition != TagTypeDefinition::MbaLut { return Ok(None); } let in_channels = tag[8]; let out_channels = tag[9]; if in_channels > 4 && out_channels > 4 { return Ok(None); } let a_curve_offset = u32::from_be_bytes([tag[28], tag[29], tag[30], tag[31]]) as usize; let clut_offset = u32::from_be_bytes([tag[24], tag[25], tag[26], tag[27]]) as usize; let m_curve_offset = u32::from_be_bytes([tag[20], tag[21], tag[22], tag[23]]) as usize; let matrix_offset = u32::from_be_bytes([tag[16], tag[17], tag[18], tag[19]]) as usize; let b_curve_offset = u32::from_be_bytes([tag[12], tag[13], tag[14], tag[15]]) as usize; let transform: Matrix3d; let bias: Vector3d; if matrix_offset != 0 { let matrix_end = matrix_offset.safe_add(12 * 4)?; if tag.len() < matrix_end { return Err(CmsError::InvalidProfile); } let m_tag = &tag[matrix_offset..matrix_end]; bias = read_vector_3d(&m_tag[36..48])?; transform = read_matrix_3d(m_tag)?; } else { transform = Matrix3d::IDENTITY; bias = Vector3d::default(); } let mut grid_points: [u8; 16] = [0; 16]; let clut_table: Option = if clut_offset != 0 { // Check if CLUT formed correctly if clut_offset.safe_add(20)? > tag.len() { return Err(CmsError::InvalidProfile); } let clut_sizes_slice = &tag[clut_offset..clut_offset.safe_add(16)?]; for (&s, v) in clut_sizes_slice.iter().zip(grid_points.iter_mut()) { *v = s; } let mut clut_size = 1u32; for &i in grid_points.iter().take(in_channels as usize) { clut_size *= i as u32; } clut_size *= out_channels as u32; if clut_size == 0 { return Err(CmsError::InvalidProfile); } if clut_size > 10_000_000 { return Err(CmsError::InvalidProfile); } let clut_offset20 = clut_offset.safe_add(20)?; let clut_header = &tag[clut_offset..clut_offset20]; let entry_size = clut_header[16]; if entry_size != 1 && entry_size != 2 { return Err(CmsError::InvalidProfile); } let clut_end = clut_offset20.safe_add(clut_size.safe_mul(entry_size as u32)? as usize)?; if tag.len() < clut_end { return Err(CmsError::InvalidProfile); } let shaped_clut_table = &tag[clut_offset20..clut_end]; Some(Self::read_lut_table_f32( shaped_clut_table, if entry_size == 1 { LutType::Lut8 } else { LutType::Lut16 }, )?) } else { None }; let a_curves = if a_curve_offset == 0 { Vec::new() } else { Self::read_nested_tone_curves( tag, a_curve_offset, if to_pcs { in_channels as usize } else { out_channels as usize }, options, )? .ok_or(CmsError::InvalidProfile)? }; let m_curves = if m_curve_offset == 0 { Vec::new() } else { Self::read_nested_tone_curves( tag, m_curve_offset, if to_pcs { out_channels as usize } else { in_channels as usize }, options, )? .ok_or(CmsError::InvalidProfile)? }; let b_curves = if b_curve_offset == 0 { Vec::new() } else { Self::read_nested_tone_curves( tag, b_curve_offset, if to_pcs { out_channels as usize } else { in_channels as usize }, options, )? .ok_or(CmsError::InvalidProfile)? }; let wh = LutWarehouse::Multidimensional(LutMultidimensionalType { num_input_channels: in_channels, num_output_channels: out_channels, matrix: transform, clut: clut_table, a_curves, b_curves, m_curves, grid_points, bias, }); Ok(Some(wh)) } #[inline] pub(crate) fn read_lut_a_to_b_type( slice: &[u8], entry: usize, tag_size: usize, parsing_options: &ParsingOptions, ) -> Result, CmsError> { if tag_size < 48 { return Ok(None); } let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..last_tag_offset]; if tag.len() < 48 { return Err(CmsError::InvalidProfile); } let tag_type = u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]]); let lut_type = LutType::try_from(tag_type)?; assert!(lut_type == LutType::Lut8 || lut_type == LutType::Lut16); if lut_type == LutType::Lut16 && tag.len() < 52 { return Err(CmsError::InvalidProfile); } let num_input_table_entries: u16 = match lut_type { LutType::Lut8 => 256, LutType::Lut16 => u16::from_be_bytes([tag[48], tag[49]]), _ => unreachable!(), }; let num_output_table_entries: u16 = match lut_type { LutType::Lut8 => 256, LutType::Lut16 => u16::from_be_bytes([tag[50], tag[51]]), _ => unreachable!(), }; if !(2..=4096).contains(&num_input_table_entries) || !(2..=4096).contains(&num_output_table_entries) { return Err(CmsError::InvalidProfile); } let input_offset: usize = match lut_type { LutType::Lut8 => 48, LutType::Lut16 => 52, _ => unreachable!(), }; let entry_size: usize = match lut_type { LutType::Lut8 => 1, LutType::Lut16 => 2, _ => unreachable!(), }; let in_chan = tag[8]; let out_chan = tag[9]; let is_3_to_4 = in_chan == 3 || out_chan == 4; let is_4_to_3 = in_chan == 4 || out_chan == 3; if !is_3_to_4 && !is_4_to_3 { return Err(CmsError::InvalidProfile); } let grid_points = tag[10]; let clut_size = (grid_points as u32).safe_powi(in_chan as u32)? as usize; if !(1..=parsing_options.max_allowed_clut_size).contains(&clut_size) { return Err(CmsError::InvalidProfile); } assert!(tag.len() >= 48); let transform = read_matrix_3d(&tag[12..48])?; let lut_input_size = num_input_table_entries.safe_mul(in_chan as u16)? as usize; let linearization_table_end = lut_input_size .safe_mul(entry_size)? .safe_add(input_offset)?; if tag.len() < linearization_table_end { return Err(CmsError::InvalidProfile); } let shaped_input_table = &tag[input_offset..linearization_table_end]; let linearization_table = Self::read_lut_table_f32(shaped_input_table, lut_type)?; let clut_offset = linearization_table_end; let clut_data_size = clut_size .safe_mul(out_chan as usize)? .safe_mul(entry_size)?; if tag.len() < clut_offset.safe_add(clut_data_size)? { return Err(CmsError::InvalidProfile); } let shaped_clut_table = &tag[clut_offset..clut_offset + clut_data_size]; let clut_table = Self::read_lut_table_f32(shaped_clut_table, lut_type)?; let output_offset = clut_offset.safe_add(clut_data_size)?; let output_size = (num_output_table_entries as usize).safe_mul(out_chan as usize)?; let shaped_output_table = &tag[output_offset..output_offset.safe_add(output_size.safe_mul(entry_size)?)?]; let gamma_table = Self::read_lut_table_f32(shaped_output_table, lut_type)?; let wh = LutWarehouse::Lut(LutDataType { num_input_table_entries, num_output_table_entries, num_input_channels: in_chan, num_output_channels: out_chan, num_clut_grid_points: grid_points, matrix: transform, input_table: linearization_table, clut_table, output_table: gamma_table, lut_type, }); Ok(Some(wh)) } pub(crate) fn read_lut_tag( slice: &[u8], tag_entry: u32, tag_size: usize, parsing_options: &ParsingOptions, ) -> Result, CmsError> { let lut_type = Self::read_lut_type(slice, tag_entry as usize, tag_size)?; Ok(if lut_type == LutType::Lut8 || lut_type == LutType::Lut16 { Self::read_lut_a_to_b_type(slice, tag_entry as usize, tag_size, parsing_options)? } else if lut_type == LutType::LutMba || lut_type == LutType::LutMab { Self::read_lut_abm_type( slice, tag_entry as usize, tag_size, lut_type == LutType::LutMab, parsing_options, )? } else { None }) } pub(crate) fn read_trc_tag_s( slice: &[u8], entry: usize, tag_size: usize, options: &ParsingOptions, ) -> Result, CmsError> { let mut _empty = 0usize; Self::read_trc_tag(slice, entry, tag_size, &mut _empty, options) } pub(crate) fn read_trc_tag( slice: &[u8], entry: usize, tag_size: usize, read_size: &mut usize, options: &ParsingOptions, ) -> Result, CmsError> { if slice.len() < entry.safe_add(4)? { return Ok(None); } let small_tag = &slice[entry..entry + 4]; // We require always recognize tone curves. let curve_type = TagTypeDefinition::from(u32::from_be_bytes([ small_tag[0], small_tag[1], small_tag[2], small_tag[3], ])); if tag_size != 0 && tag_size < TAG_SIZE { return Ok(None); } let last_tag_offset = if tag_size != 0 { tag_size + entry } else { slice.len() }; if last_tag_offset > slice.len() { return Err(CmsError::MalformedTrcCurve("Data exhausted".to_string())); } let tag = &slice[entry..last_tag_offset]; if tag.len() < TAG_SIZE { return Err(CmsError::MalformedTrcCurve("Data exhausted".to_string())); } if curve_type == TagTypeDefinition::LutToneCurve { let entry_count = u32::from_be_bytes([tag[8], tag[9], tag[10], tag[11]]) as usize; if entry_count == 0 { return Ok(Some(ToneReprCurve::Lut(vec![]))); } if entry_count > options.max_allowed_trc_size { return Err(CmsError::CurveLutIsTooLarge); } let curve_end = entry_count.safe_mul(size_of::())?.safe_add(12)?; if tag.len() < curve_end { return Err(CmsError::MalformedTrcCurve( "Curve end ends to early".to_string(), )); } let curve_sliced = &tag[12..curve_end]; let mut curve_values = try_vec![0u16; entry_count]; for (value, curve_value) in curve_sliced.chunks_exact(2).zip(curve_values.iter_mut()) { let gamma_s15 = u16::from_be_bytes([value[0], value[1]]); *curve_value = gamma_s15; } *read_size = curve_end; Ok(Some(ToneReprCurve::Lut(curve_values))) } else if curve_type == TagTypeDefinition::ParametricToneCurve { let entry_count = u16::from_be_bytes([tag[8], tag[9]]) as usize; if entry_count > 4 { return Err(CmsError::MalformedTrcCurve( "Parametric curve has unknown entries count".to_string(), )); } const COUNT_TO_LENGTH: [usize; 5] = [1, 3, 4, 5, 7]; //PARAMETRIC_CURVE_TYPE if tag.len() < 12 + COUNT_TO_LENGTH[entry_count] * size_of::() { return Err(CmsError::MalformedTrcCurve( "Parametric curve has unknown entries count exhaust data too early".to_string(), )); } let curve_sliced = &tag[12..12 + COUNT_TO_LENGTH[entry_count] * size_of::()]; let mut params = try_vec![0f32; COUNT_TO_LENGTH[entry_count]]; for (value, param_value) in curve_sliced.chunks_exact(4).zip(params.iter_mut()) { let parametric_value = i32::from_be_bytes([value[0], value[1], value[2], value[3]]); *param_value = s15_fixed16_number_to_float(parametric_value); } if entry_count == 1 || entry_count == 2 { // we have a type 1 or type 2 function that has a division by `a` let a: f32 = params[1]; if a == 0.0 { return Err(CmsError::ParametricCurveZeroDivision); } } *read_size = 12 + COUNT_TO_LENGTH[entry_count] * 4; Ok(Some(ToneReprCurve::Parametric(params))) } else { Err(CmsError::MalformedTrcCurve( "Unknown parametric curve tag".to_string(), )) } } #[inline] pub(crate) fn read_chad_tag( slice: &[u8], entry: usize, tag_size: usize, ) -> Result, CmsError> { let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } if slice[entry..].len() < 8 { return Err(CmsError::InvalidProfile); } if tag_size < 8 { return Ok(None); } if (tag_size - 8) / 4 != 9 { return Ok(None); } let tag0 = &slice[entry..entry.safe_add(8)?]; let c_type = TagTypeDefinition::from(u32::from_be_bytes([tag0[0], tag0[1], tag0[2], tag0[3]])); if c_type != TagTypeDefinition::S15Fixed16Array { return Err(CmsError::InvalidProfile); } if slice.len() < 9 * size_of::() + 8 { return Err(CmsError::InvalidProfile); } let tag = &slice[entry + 8..last_tag_offset]; if tag.len() != size_of::() { return Err(CmsError::InvalidProfile); } let matrix = read_matrix_3d(tag)?; Ok(Some(matrix)) } #[inline] pub(crate) fn read_tech_tag( slice: &[u8], entry: usize, tag_size: usize, ) -> Result, CmsError> { if tag_size < TAG_SIZE { return Err(CmsError::InvalidProfile); } let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..entry.safe_add(12)?]; let tag_type = u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]]); let def = TagTypeDefinition::from(tag_type); if def == TagTypeDefinition::Signature { let sig = u32::from_be_bytes([tag[8], tag[9], tag[10], tag[11]]); let tech_sig = TechnologySignatures::from(sig); return Ok(Some(tech_sig)); } Ok(None) } #[inline] pub(crate) fn read_date_time_tag( slice: &[u8], entry: usize, tag_size: usize, ) -> Result, CmsError> { if tag_size < 20 { return Ok(None); } let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..entry.safe_add(20)?]; let tag_type = u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]]); let def = TagTypeDefinition::from(tag_type); if def == TagTypeDefinition::DateTime { let tag_value = &slice[8..20]; let time = ColorDateTime::new_from_slice(tag_value)?; return Ok(Some(time)); } Ok(None) } #[inline] pub(crate) fn read_meas_tag( slice: &[u8], entry: usize, tag_size: usize, ) -> Result, CmsError> { if tag_size < TAG_SIZE { return Ok(None); } let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..entry + 12]; let tag_type = u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]]); let def = TagTypeDefinition::from(tag_type); if def != TagTypeDefinition::Measurement { return Ok(None); } if 36 + entry > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..entry + 36]; let observer = StandardObserver::from(u32::from_be_bytes([tag[8], tag[9], tag[10], tag[11]])); let q15_16_x = i32::from_be_bytes([tag[12], tag[13], tag[14], tag[15]]); let q15_16_y = i32::from_be_bytes([tag[16], tag[17], tag[18], tag[19]]); let q15_16_z = i32::from_be_bytes([tag[20], tag[21], tag[22], tag[23]]); let x = s15_fixed16_number_to_float(q15_16_x); let y = s15_fixed16_number_to_float(q15_16_y); let z = s15_fixed16_number_to_float(q15_16_z); let xyz = Xyz::new(x, y, z); let geometry = MeasurementGeometry::from(u32::from_be_bytes([tag[24], tag[25], tag[26], tag[27]])); let flare = uint16_number_to_float(u32::from_be_bytes([tag[28], tag[29], tag[30], tag[31]])); let illuminant = StandardIlluminant::from(u32::from_be_bytes([tag[32], tag[33], tag[34], tag[35]])); Ok(Some(Measurement { flare, illuminant, geometry, observer, backing: xyz, })) } #[inline] pub(crate) fn read_xyz_tag( slice: &[u8], entry: usize, tag_size: usize, ) -> Result { if tag_size < TAG_SIZE { return Ok(Xyzd::default()); } let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..entry + 12]; let tag_type = u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]]); let def = TagTypeDefinition::from(tag_type); if def != TagTypeDefinition::Xyz { return Ok(Xyzd::default()); } let tag = &slice[entry..last_tag_offset]; if tag.len() < 20 { return Err(CmsError::InvalidProfile); } let q15_16_x = i32::from_be_bytes([tag[8], tag[9], tag[10], tag[11]]); let q15_16_y = i32::from_be_bytes([tag[12], tag[13], tag[14], tag[15]]); let q15_16_z = i32::from_be_bytes([tag[16], tag[17], tag[18], tag[19]]); let x = s15_fixed16_number_to_double(q15_16_x); let y = s15_fixed16_number_to_double(q15_16_y); let z = s15_fixed16_number_to_double(q15_16_z); Ok(Xyzd { x, y, z }) } #[inline] pub(crate) fn read_cicp_tag( slice: &[u8], entry: usize, tag_size: usize, ) -> Result, CmsError> { if tag_size < TAG_SIZE { return Ok(None); } let last_tag_offset = tag_size.safe_add(entry)?; if last_tag_offset > slice.len() { return Err(CmsError::InvalidProfile); } let tag = &slice[entry..last_tag_offset]; if tag.len() < 12 { return Err(CmsError::InvalidProfile); } let tag_type = u32::from_be_bytes([tag[0], tag[1], tag[2], tag[3]]); let def = TagTypeDefinition::from(tag_type); if def != TagTypeDefinition::Cicp { return Ok(None); } let primaries = CicpColorPrimaries::try_from(tag[8])?; let transfer_characteristics = TransferCharacteristics::try_from(tag[9])?; let matrix_coefficients = MatrixCoefficients::try_from(tag[10])?; let full_range = tag[11] == 1; Ok(Some(CicpProfile { color_primaries: primaries, transfer_characteristics, matrix_coefficients, full_range, })) } } moxcms-0.7.7/src/rgb.rs000064400000000000000000000377621046102023000130730ustar 00000000000000/* * // Copyright 2024 (c) the Radzivon Bartoshyk. All rights reserved. * // * // Use of this source code is governed by a BSD-style * // license that can be found in the LICENSE file. */ use crate::math::{FusedMultiplyAdd, m_clamp, m_max, m_min}; use crate::mlaf::mlaf; use crate::{Matrix3f, Vector3, Xyz}; use num_traits::{AsPrimitive, Bounded, Float, Num, Pow, Signed}; use pxfm::{ f_exp, f_exp2, f_exp2f, f_exp10, f_exp10f, f_expf, f_log, f_log2, f_log2f, f_log10, f_log10f, f_logf, f_pow, f_powf, }; use std::cmp::Ordering; use std::ops::{Add, AddAssign, Div, DivAssign, Index, IndexMut, Mul, MulAssign, Neg, Sub}; #[repr(C)] #[derive(Debug, PartialOrd, PartialEq, Clone, Copy, Default)] /// Represents any RGB values pub struct Rgb { /// Red component pub r: T, /// Green component pub g: T, /// Blue component pub b: T, } impl Rgb { pub fn new(r: T, g: T, b: T) -> Rgb { Rgb { r, g, b } } } impl Rgb where T: Copy, { pub fn dup(v: T) -> Rgb { Rgb { r: v, g: v, b: v } } #[inline] pub const fn to_vector(self) -> Vector3 { Vector3 { v: [self.r, self.g, self.b], } } } impl Rgb { #[inline(always)] pub fn apply(&self, matrix: Matrix3f) -> Rgb { let new_r = mlaf( mlaf(self.r * matrix.v[0][0], self.g, matrix.v[0][1]), self.b, matrix.v[0][2], ); let new_g = mlaf( mlaf(self.r * matrix.v[1][0], self.g, matrix.v[1][1]), self.b, matrix.v[1][2], ); let new_b = mlaf( mlaf(self.r * matrix.v[2][0], self.g, matrix.v[2][1]), self.b, matrix.v[2][2], ); Rgb { r: new_r, g: new_g, b: new_b, } } #[inline(always)] pub fn to_xyz(&self, matrix: Matrix3f) -> Xyz { let new_self = self.apply(matrix); Xyz { x: new_self.r, y: new_self.g, z: new_self.b, } } #[inline(always)] pub fn is_out_of_gamut(&self) -> bool { !(0.0..=1.0).contains(&self.r) || !(0.0..=1.0).contains(&self.g) || !(0.0..=1.0).contains(&self.b) } } impl Index for Rgb { type Output = T; fn index(&self, index: usize) -> &T { match index { 0 => &self.r, 1 => &self.g, 2 => &self.b, _ => panic!("Index out of bounds for Rgb"), } } } impl IndexMut for Rgb { fn index_mut(&mut self, index: usize) -> &mut T { match index { 0 => &mut self.r, 1 => &mut self.g, 2 => &mut self.b, _ => panic!("Index out of bounds for RGB"), } } } macro_rules! generated_float_definition_rgb { ($T: ty) => { impl Rgb<$T> { #[inline] pub fn zeroed() -> Rgb<$T> { Rgb::<$T>::new(0., 0., 0.) } #[inline] pub fn ones() -> Rgb<$T> { Rgb::<$T>::new(1., 1., 1.) } #[inline] pub fn white() -> Rgb<$T> { Rgb::<$T>::ones() } #[inline] pub fn black() -> Rgb<$T> { Rgb::<$T>::zeroed() } } }; } generated_float_definition_rgb!(f32); generated_float_definition_rgb!(f64); macro_rules! generated_integral_definition_rgb { ($T: ty) => { impl Rgb<$T> { #[inline] pub fn zeroed() -> Rgb<$T> { Rgb::<$T>::new(0, 0, 0) } #[inline] pub fn capped() -> Rgb<$T> { Rgb::<$T>::new(<$T>::MAX, <$T>::MAX, <$T>::MAX) } #[inline] pub fn white() -> Rgb<$T> { Rgb::<$T>::capped() } #[inline] pub fn black() -> Rgb<$T> { Rgb::<$T>::new(0, 0, 0) } } }; } generated_integral_definition_rgb!(u8); generated_integral_definition_rgb!(u16); generated_integral_definition_rgb!(i8); generated_integral_definition_rgb!(i16); generated_integral_definition_rgb!(i32); generated_integral_definition_rgb!(u32); pub trait FusedPow { fn f_pow(&self, power: T) -> Self; } pub trait FusedLog2 { fn f_log2(&self) -> Self; } pub trait FusedLog10 { fn f_log10(&self) -> Self; } pub trait FusedLog { fn f_log(&self) -> Self; } pub trait FusedExp { fn f_exp(&self) -> Self; } pub trait FusedExp2 { fn f_exp2(&self) -> Self; } pub trait FusedExp10 { fn f_exp10(&self) -> Self; } impl FusedPow> for Rgb { fn f_pow(&self, power: Rgb) -> Rgb { Rgb::new( f_powf(self.r, power.r), f_powf(self.g, power.g), f_powf(self.b, power.b), ) } } impl FusedPow> for Rgb { fn f_pow(&self, power: Rgb) -> Rgb { Rgb::new( f_pow(self.r, power.r), f_pow(self.g, power.g), f_pow(self.b, power.b), ) } } impl FusedLog2> for Rgb { #[inline] fn f_log2(&self) -> Rgb { Rgb::new(f_log2f(self.r), f_log2f(self.g), f_log2f(self.b)) } } impl FusedLog2> for Rgb { #[inline] fn f_log2(&self) -> Rgb { Rgb::new(f_log2(self.r), f_log2(self.g), f_log2(self.b)) } } impl FusedLog> for Rgb { #[inline] fn f_log(&self) -> Rgb { Rgb::new(f_logf(self.r), f_logf(self.g), f_logf(self.b)) } } impl FusedLog> for Rgb { #[inline] fn f_log(&self) -> Rgb { Rgb::new(f_log(self.r), f_log(self.g), f_log(self.b)) } } impl FusedLog10> for Rgb { #[inline] fn f_log10(&self) -> Rgb { Rgb::new(f_log10f(self.r), f_log10f(self.g), f_log10f(self.b)) } } impl FusedLog10> for Rgb { #[inline] fn f_log10(&self) -> Rgb { Rgb::new(f_log10(self.r), f_log10(self.g), f_log10(self.b)) } } impl FusedExp> for Rgb { #[inline] fn f_exp(&self) -> Rgb { Rgb::new(f_expf(self.r), f_expf(self.g), f_expf(self.b)) } } impl FusedExp> for Rgb { #[inline] fn f_exp(&self) -> Rgb { Rgb::new(f_exp(self.r), f_exp(self.g), f_exp(self.b)) } } impl FusedExp2> for Rgb { #[inline] fn f_exp2(&self) -> Rgb { Rgb::new(f_exp2f(self.r), f_exp2f(self.g), f_exp2f(self.b)) } } impl FusedExp2> for Rgb { #[inline] fn f_exp2(&self) -> Rgb { Rgb::new(f_exp2(self.r), f_exp2(self.g), f_exp2(self.b)) } } impl FusedExp10> for Rgb { #[inline] fn f_exp10(&self) -> Rgb { Rgb::new(f_exp10f(self.r), f_exp10f(self.g), f_exp10f(self.b)) } } impl FusedExp10> for Rgb { #[inline] fn f_exp10(&self) -> Rgb { Rgb::new(f_exp10(self.r), f_exp10(self.g), f_exp10(self.b)) } } impl Rgb where T: Copy + AsPrimitive, { pub fn euclidean_distance(&self, other: Rgb) -> f32 { let dr = self.r.as_() - other.r.as_(); let dg = self.g.as_() - other.g.as_(); let db = self.b.as_() - other.b.as_(); (dr * dr + dg * dg + db * db).sqrt() } } impl Rgb where T: Copy + AsPrimitive, { pub fn taxicab_distance(&self, other: Self) -> f32 { let dr = self.r.as_() - other.r.as_(); let dg = self.g.as_() - other.g.as_(); let db = self.b.as_() - other.b.as_(); dr.abs() + dg.abs() + db.abs() } } impl Add for Rgb where T: Add, { type Output = Rgb; #[inline] fn add(self, rhs: Self) -> Self::Output { Rgb::new(self.r + rhs.r, self.g + rhs.g, self.b + rhs.b) } } impl Sub for Rgb where T: Sub, { type Output = Rgb; #[inline] fn sub(self, rhs: Self) -> Self::Output { Rgb::new(self.r - rhs.r, self.g - rhs.g, self.b - rhs.b) } } impl Sub for Rgb where T: Sub, { type Output = Rgb; #[inline] fn sub(self, rhs: T) -> Self::Output { Rgb::new(self.r - rhs, self.g - rhs, self.b - rhs) } } impl Add for Rgb where T: Add, { type Output = Rgb; #[inline] fn add(self, rhs: T) -> Self::Output { Rgb::new(self.r + rhs, self.g + rhs, self.b + rhs) } } impl Rgb where T: Signed, { #[inline] pub fn abs(self) -> Self { Rgb::new(self.r.abs(), self.g.abs(), self.b.abs()) } } impl Div for Rgb where T: Div, { type Output = Rgb; #[inline] fn div(self, rhs: Self) -> Self::Output { Rgb::new(self.r / rhs.r, self.g / rhs.g, self.b / rhs.b) } } impl Div for Rgb where T: Div, { type Output = Rgb; #[inline] fn div(self, rhs: T) -> Self::Output { Rgb::new(self.r / rhs, self.g / rhs, self.b / rhs) } } impl Mul for Rgb where T: Mul, { type Output = Rgb; #[inline] fn mul(self, rhs: Self) -> Self::Output { Rgb::new(self.r * rhs.r, self.g * rhs.g, self.b * rhs.b) } } impl Mul for Rgb where T: Mul, { type Output = Rgb; #[inline] fn mul(self, rhs: T) -> Self::Output { Rgb::new(self.r * rhs, self.g * rhs, self.b * rhs) } } impl MulAssign for Rgb where T: MulAssign, { #[inline] fn mul_assign(&mut self, rhs: Self) { self.r *= rhs.r; self.g *= rhs.g; self.b *= rhs.b; } } macro_rules! generated_mul_assign_definition_rgb { ($T: ty) => { impl MulAssign<$T> for Rgb where T: MulAssign<$T>, { #[inline] fn mul_assign(&mut self, rhs: $T) { self.r *= rhs; self.g *= rhs; self.b *= rhs; } } }; } generated_mul_assign_definition_rgb!(i8); generated_mul_assign_definition_rgb!(u8); generated_mul_assign_definition_rgb!(u16); generated_mul_assign_definition_rgb!(i16); generated_mul_assign_definition_rgb!(u32); generated_mul_assign_definition_rgb!(i32); generated_mul_assign_definition_rgb!(f32); generated_mul_assign_definition_rgb!(f64); impl AddAssign for Rgb where T: AddAssign, { #[inline] fn add_assign(&mut self, rhs: Self) { self.r += rhs.r; self.g += rhs.g; self.b += rhs.b; } } macro_rules! generated_add_assign_definition_rgb { ($T: ty) => { impl AddAssign<$T> for Rgb where T: AddAssign<$T>, { #[inline] fn add_assign(&mut self, rhs: $T) { self.r += rhs; self.g += rhs; self.b += rhs; } } }; } generated_add_assign_definition_rgb!(i8); generated_add_assign_definition_rgb!(u8); generated_add_assign_definition_rgb!(u16); generated_add_assign_definition_rgb!(i16); generated_add_assign_definition_rgb!(u32); generated_add_assign_definition_rgb!(i32); generated_add_assign_definition_rgb!(f32); generated_add_assign_definition_rgb!(f64); impl DivAssign for Rgb where T: DivAssign, { #[inline] fn div_assign(&mut self, rhs: Self) { self.r /= rhs.r; self.g /= rhs.g; self.b /= rhs.b; } } macro_rules! generated_div_assign_definition_rgb { ($T: ty) => { impl DivAssign<$T> for Rgb where T: DivAssign<$T>, { #[inline] fn div_assign(&mut self, rhs: $T) { self.r /= rhs; self.g /= rhs; self.b /= rhs; } } }; } generated_div_assign_definition_rgb!(u8); generated_div_assign_definition_rgb!(i8); generated_div_assign_definition_rgb!(u16); generated_div_assign_definition_rgb!(i16); generated_div_assign_definition_rgb!(u32); generated_div_assign_definition_rgb!(i32); generated_div_assign_definition_rgb!(f32); generated_div_assign_definition_rgb!(f64); impl Neg for Rgb where T: Neg, { type Output = Rgb; #[inline] fn neg(self) -> Self::Output { Rgb::new(-self.r, -self.g, -self.b) } } impl Rgb where T: FusedMultiplyAdd, { pub fn mla(&self, b: Rgb, c: Rgb) -> Rgb { Rgb::new( self.r.mla(b.r, c.r), self.g.mla(b.g, c.g), self.b.mla(b.b, c.b), ) } } impl Rgb where T: Num + PartialOrd + Copy + Bounded, { /// Clamp function to clamp each channel within a given range #[inline] #[allow(clippy::manual_clamp)] pub fn clamp(&self, min_value: T, max_value: T) -> Rgb { Rgb::new( m_clamp(self.r, min_value, max_value), m_clamp(self.g, min_value, max_value), m_clamp(self.b, min_value, max_value), ) } /// Min function to define min #[inline] pub fn min(&self, other_min: T) -> Rgb { Rgb::new( m_min(self.r, other_min), m_min(self.g, other_min), m_min(self.b, other_min), ) } /// Max function to define max #[inline] pub fn max(&self, other_max: T) -> Rgb { Rgb::new( m_max(self.r, other_max), m_max(self.g, other_max), m_max(self.b, other_max), ) } /// Clamp function to clamp each channel within a given range #[inline] #[allow(clippy::manual_clamp)] pub fn clamp_p(&self, min_value: Rgb, max_value: Rgb) -> Rgb { Rgb::new( m_clamp(self.r, max_value.r, min_value.r), m_clamp(self.g, max_value.g, min_value.g), m_clamp(self.b, max_value.b, min_value.b), ) } /// Min function to define min #[inline] pub fn min_p(&self, other_min: Rgb) -> Rgb { Rgb::new( m_min(self.r, other_min.r), m_min(self.g, other_min.g), m_min(self.b, other_min.b), ) } /// Max function to define max #[inline] pub fn max_p(&self, other_max: Rgb) -> Rgb { Rgb::new( m_max(self.r, other_max.r), m_max(self.g, other_max.g), m_max(self.b, other_max.b), ) } } impl Rgb where T: Float + 'static, f32: AsPrimitive, { #[inline] pub fn sqrt(&self) -> Rgb { let zeros = 0f32.as_(); Rgb::new( if self.r.partial_cmp(&zeros).unwrap_or(Ordering::Less) == Ordering::Less { 0f32.as_() } else { self.r.sqrt() }, if self.g.partial_cmp(&zeros).unwrap_or(Ordering::Less) == Ordering::Less { 0f32.as_() } else { self.g.sqrt() }, if self.b.partial_cmp(&zeros).unwrap_or(Ordering::Less) == Ordering::Less { 0f32.as_() } else { self.b.sqrt() }, ) } #[inline] pub fn cbrt(&self) -> Rgb { Rgb::new(self.r.cbrt(), self.g.cbrt(), self.b.cbrt()) } } impl Pow for Rgb where T: Float, { type Output = Rgb; #[inline] fn pow(self, rhs: T) -> Self::Output { Rgb::::new(self.r.powf(rhs), self.g.powf(rhs), self.b.powf(rhs)) } } impl Pow> for Rgb where T: Float, { type Output = Rgb; #[inline] fn pow(self, rhs: Rgb) -> Self::Output { Rgb::::new(self.r.powf(rhs.r), self.g.powf(rhs.g), self.b.powf(rhs.b)) } } impl Rgb { pub fn cast(self) -> Rgb where T: AsPrimitive, V: Copy + 'static, { Rgb::new(self.r.as_(), self.g.as_(), self.b.as_()) } } impl Rgb where T: Float + 'static, { pub fn round(self) -> Rgb { Rgb::new(self.r.round(), self.g.round(), self.b.round()) } } moxcms-0.7.7/src/safe_math.rs000064400000000000000000000071321046102023000142340ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::CmsError; use std::ops::Add; pub(crate) trait SafeAdd> { fn safe_add(&self, other: T) -> Result; } pub(crate) trait SafeMul> { fn safe_mul(&self, other: T) -> Result; } pub(crate) trait SafePowi> { fn safe_powi(&self, power: u32) -> Result; } macro_rules! safe_add_impl { ($type_name: ident) => { impl SafeAdd<$type_name> for $type_name { #[inline(always)] fn safe_add(&self, other: $type_name) -> Result<$type_name, CmsError> { if let Some(result) = self.checked_add(other) { return Ok(result); } Err(CmsError::OverflowingError) } } }; } safe_add_impl!(u16); safe_add_impl!(u32); safe_add_impl!(i32); safe_add_impl!(usize); safe_add_impl!(isize); macro_rules! safe_mul_impl { ($type_name: ident) => { impl SafeMul<$type_name> for $type_name { #[inline(always)] fn safe_mul(&self, other: $type_name) -> Result<$type_name, CmsError> { if let Some(result) = self.checked_mul(other) { return Ok(result); } Err(CmsError::OverflowingError) } } }; } safe_mul_impl!(u16); safe_mul_impl!(u32); safe_mul_impl!(i32); safe_mul_impl!(usize); safe_mul_impl!(isize); macro_rules! safe_powi_impl { ($type_name: ident) => { impl SafePowi<$type_name> for $type_name { #[inline(always)] fn safe_powi(&self, power: u32) -> Result<$type_name, CmsError> { if let Some(result) = self.checked_pow(power) { return Ok(result); } Err(CmsError::OverflowingError) } } }; } safe_powi_impl!(u8); safe_powi_impl!(u16); safe_powi_impl!(u32); safe_powi_impl!(i32); safe_powi_impl!(usize); safe_powi_impl!(isize); moxcms-0.7.7/src/srlab2.rs000064400000000000000000000066231046102023000134760ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::Xyz; use crate::mlaf::mlaf; use pxfm::f_cbrtf; #[inline] fn srlab2_gamma(x: f32) -> f32 { if x <= 216. / 24389. { x * (24389. / 2700.) } else { 1.16 * f_cbrtf(x) - 0.16 } } #[inline] fn srlab2_linearize(x: f32) -> f32 { if x <= 0.08 { x * (2700.0 / 24389.0) } else { let zx = (x + 0.16) / 1.16; zx * zx * zx } } #[derive(Copy, Clone, Debug, Default, PartialOrd, PartialEq)] pub struct Srlab2 { pub l: f32, pub a: f32, pub b: f32, } impl Srlab2 { #[inline] pub const fn new(l: f32, a: f32, b: f32) -> Srlab2 { Srlab2 { l, a, b } } #[inline] pub fn from_xyz(xyz: Xyz) -> Srlab2 { let lx = srlab2_gamma(xyz.x); let ly = srlab2_gamma(xyz.y); let lz = srlab2_gamma(xyz.z); let l = mlaf(mlaf(0.629054 * ly, -0.000008, lz), 0.37095, lx); let a = mlaf(mlaf(6.634684 * lx, -7.505078, ly), 0.870328, lz); let b = mlaf(mlaf(0.639569 * lx, 1.084576, ly), -1.724152, lz); Srlab2 { l, a, b } } #[inline] pub fn to_xyz(&self) -> Xyz { let x = mlaf(mlaf(self.l, 0.09041272, self.a), 0.045634452, self.b); let y = mlaf(mlaf(self.l, -0.05331593, self.a), -0.026917785, self.b); let z = mlaf(self.l, -0.58, self.b); let lx = srlab2_linearize(x); let ly = srlab2_linearize(y); let lz = srlab2_linearize(z); Xyz::new(lx, ly, lz) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_srlab2() { let xyz = Xyz::new(0.3, 0.65, 0.66); let srlab2 = Srlab2::from_xyz(xyz); let r_xyz = srlab2.to_xyz(); assert!((r_xyz.x - xyz.x).abs() < 1e-5); assert!((r_xyz.y - xyz.y).abs() < 1e-5); assert!((r_xyz.z - xyz.z).abs() < 1e-5); } } moxcms-0.7.7/src/tag.rs000064400000000000000000000315661046102023000130700ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::CmsError; pub(crate) const TAG_SIZE: usize = 12; #[derive(Debug, Copy, Clone, PartialEq, Ord, PartialOrd, Eq, Hash)] pub(crate) enum Tag { RedXyz, GreenXyz, BlueXyz, RedToneReproduction, GreenToneReproduction, BlueToneReproduction, GreyToneReproduction, MediaWhitePoint, CodeIndependentPoints, ChromaticAdaptation, BlackPoint, DeviceToPcsLutPerceptual, DeviceToPcsLutColorimetric, DeviceToPcsLutSaturation, PcsToDeviceLutPerceptual, PcsToDeviceLutColorimetric, PcsToDeviceLutSaturation, ProfileDescription, Copyright, ViewingConditionsDescription, DeviceManufacturer, DeviceModel, Gamut, Luminance, Measurement, Chromaticity, ObserverConditions, CharTarget, Technology, CalibrationDateTime, } impl TryFrom for Tag { type Error = CmsError; fn try_from(value: u32) -> Result { if value == u32::from_ne_bytes(*b"rXYZ").to_be() { return Ok(Self::RedXyz); } else if value == u32::from_ne_bytes(*b"gXYZ").to_be() { return Ok(Self::GreenXyz); } else if value == u32::from_ne_bytes(*b"bXYZ").to_be() { return Ok(Self::BlueXyz); } else if value == u32::from_ne_bytes(*b"rTRC").to_be() { return Ok(Self::RedToneReproduction); } else if value == u32::from_ne_bytes(*b"gTRC").to_be() { return Ok(Self::GreenToneReproduction); } else if value == u32::from_ne_bytes(*b"bTRC").to_be() { return Ok(Self::BlueToneReproduction); } else if value == u32::from_ne_bytes(*b"kTRC").to_be() { return Ok(Self::GreyToneReproduction); } else if value == u32::from_ne_bytes(*b"wtpt").to_be() { return Ok(Self::MediaWhitePoint); } else if value == u32::from_ne_bytes(*b"cicp").to_be() { return Ok(Self::CodeIndependentPoints); } else if value == u32::from_ne_bytes(*b"chad").to_be() { return Ok(Self::ChromaticAdaptation); } else if value == u32::from_ne_bytes(*b"bkpt").to_be() { return Ok(Self::BlackPoint); } else if value == u32::from_ne_bytes(*b"A2B0").to_be() { return Ok(Self::DeviceToPcsLutPerceptual); } else if value == u32::from_ne_bytes(*b"A2B1").to_be() { return Ok(Self::DeviceToPcsLutColorimetric); } else if value == u32::from_ne_bytes(*b"A2B2").to_be() { return Ok(Self::DeviceToPcsLutSaturation); } else if value == u32::from_ne_bytes(*b"B2A0").to_be() { return Ok(Self::PcsToDeviceLutPerceptual); } else if value == u32::from_ne_bytes(*b"B2A1").to_be() { return Ok(Self::PcsToDeviceLutColorimetric); } else if value == u32::from_ne_bytes(*b"B2A2").to_be() { return Ok(Self::PcsToDeviceLutSaturation); } else if value == u32::from_ne_bytes(*b"desc").to_be() { return Ok(Self::ProfileDescription); } else if value == u32::from_ne_bytes(*b"cprt").to_be() { return Ok(Self::Copyright); } else if value == u32::from_ne_bytes(*b"vued").to_be() { return Ok(Self::ViewingConditionsDescription); } else if value == u32::from_ne_bytes(*b"dmnd").to_be() { return Ok(Self::DeviceManufacturer); } else if value == u32::from_ne_bytes(*b"dmdd").to_be() { return Ok(Self::DeviceModel); } else if value == u32::from_ne_bytes(*b"gamt").to_be() { return Ok(Self::Gamut); } else if value == u32::from_ne_bytes(*b"lumi").to_be() { return Ok(Self::Luminance); } else if value == u32::from_ne_bytes(*b"meas").to_be() { return Ok(Self::Measurement); } else if value == u32::from_ne_bytes(*b"chrm").to_be() { return Ok(Self::Chromaticity); } else if value == u32::from_ne_bytes(*b"view").to_be() { return Ok(Self::ObserverConditions); } else if value == u32::from_ne_bytes(*b"targ").to_be() { return Ok(Self::CharTarget); } else if value == u32::from_ne_bytes(*b"tech").to_be() { return Ok(Self::Technology); } else if value == u32::from_ne_bytes(*b"calt").to_be() { return Ok(Self::CalibrationDateTime); } Err(CmsError::UnknownTag(value)) } } impl From for u32 { fn from(value: Tag) -> Self { match value { Tag::RedXyz => u32::from_ne_bytes(*b"rXYZ").to_be(), Tag::GreenXyz => u32::from_ne_bytes(*b"gXYZ").to_be(), Tag::BlueXyz => u32::from_ne_bytes(*b"bXYZ").to_be(), Tag::RedToneReproduction => u32::from_ne_bytes(*b"rTRC").to_be(), Tag::GreenToneReproduction => u32::from_ne_bytes(*b"gTRC").to_be(), Tag::BlueToneReproduction => u32::from_ne_bytes(*b"bTRC").to_be(), Tag::GreyToneReproduction => u32::from_ne_bytes(*b"kTRC").to_be(), Tag::MediaWhitePoint => u32::from_ne_bytes(*b"wtpt").to_be(), Tag::CodeIndependentPoints => u32::from_ne_bytes(*b"cicp").to_be(), Tag::ChromaticAdaptation => u32::from_ne_bytes(*b"chad").to_be(), Tag::BlackPoint => u32::from_ne_bytes(*b"bkpt").to_be(), Tag::DeviceToPcsLutPerceptual => u32::from_ne_bytes(*b"A2B0").to_be(), Tag::DeviceToPcsLutColorimetric => u32::from_ne_bytes(*b"A2B1").to_be(), Tag::DeviceToPcsLutSaturation => u32::from_ne_bytes(*b"A2B2").to_be(), Tag::PcsToDeviceLutPerceptual => u32::from_ne_bytes(*b"B2A0").to_be(), Tag::PcsToDeviceLutColorimetric => u32::from_ne_bytes(*b"B2A1").to_be(), Tag::PcsToDeviceLutSaturation => u32::from_ne_bytes(*b"B2A2").to_be(), Tag::ProfileDescription => u32::from_ne_bytes(*b"desc").to_be(), Tag::Copyright => u32::from_ne_bytes(*b"cprt").to_be(), Tag::ViewingConditionsDescription => u32::from_ne_bytes(*b"vued").to_be(), Tag::DeviceManufacturer => u32::from_ne_bytes(*b"dmnd").to_be(), Tag::DeviceModel => u32::from_ne_bytes(*b"dmdd").to_be(), Tag::Gamut => u32::from_ne_bytes(*b"gamt").to_be(), Tag::Luminance => u32::from_ne_bytes(*b"lumi").to_be(), Tag::Measurement => u32::from_ne_bytes(*b"meas").to_be(), Tag::Chromaticity => u32::from_ne_bytes(*b"chrm").to_be(), Tag::ObserverConditions => u32::from_ne_bytes(*b"view").to_be(), Tag::CharTarget => u32::from_ne_bytes(*b"targ").to_be(), Tag::Technology => u32::from_ne_bytes(*b"tech").to_be(), Tag::CalibrationDateTime => u32::from_ne_bytes(*b"calt").to_be(), } } } #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] pub(crate) enum TagTypeDefinition { Text, MultiLocalizedUnicode, Description, MabLut, MbaLut, ParametricToneCurve, LutToneCurve, Xyz, MultiProcessElement, DefViewingConditions, Signature, Cicp, DateTime, S15Fixed16Array, U8Array, U16Fixed16Array, U16Array, U32Array, U64Array, Measurement, NotAllowed, } impl From for TagTypeDefinition { fn from(value: u32) -> Self { if value == u32::from_ne_bytes(*b"mluc").to_be() { return TagTypeDefinition::MultiLocalizedUnicode; } else if value == u32::from_ne_bytes(*b"desc").to_be() { return TagTypeDefinition::Description; } else if value == u32::from_ne_bytes(*b"text").to_be() { return TagTypeDefinition::Text; } else if value == u32::from_ne_bytes(*b"mAB ").to_be() { return TagTypeDefinition::MabLut; } else if value == u32::from_ne_bytes(*b"mBA ").to_be() { return TagTypeDefinition::MbaLut; } else if value == u32::from_ne_bytes(*b"para").to_be() { return TagTypeDefinition::ParametricToneCurve; } else if value == u32::from_ne_bytes(*b"curv").to_be() { return TagTypeDefinition::LutToneCurve; } else if value == u32::from_ne_bytes(*b"XYZ ").to_be() { return TagTypeDefinition::Xyz; } else if value == u32::from_ne_bytes(*b"mpet").to_be() { return TagTypeDefinition::MultiProcessElement; } else if value == u32::from_ne_bytes(*b"view").to_be() { return TagTypeDefinition::DefViewingConditions; } else if value == u32::from_ne_bytes(*b"sig ").to_be() { return TagTypeDefinition::Signature; } else if value == u32::from_ne_bytes(*b"cicp").to_be() { return TagTypeDefinition::Cicp; } else if value == u32::from_ne_bytes(*b"dtim").to_be() { return TagTypeDefinition::DateTime; } else if value == u32::from_ne_bytes(*b"meas").to_be() { return TagTypeDefinition::Measurement; } else if value == u32::from_ne_bytes(*b"sf32").to_be() { return TagTypeDefinition::S15Fixed16Array; } else if value == u32::from_ne_bytes(*b"uf32").to_be() { return TagTypeDefinition::U16Fixed16Array; } else if value == u32::from_ne_bytes(*b"ui16").to_be() { return TagTypeDefinition::U16Array; } else if value == u32::from_ne_bytes(*b"ui32").to_be() { return TagTypeDefinition::U32Array; } else if value == u32::from_ne_bytes(*b"ui64").to_be() { return TagTypeDefinition::U64Array; } else if value == u32::from_ne_bytes(*b"ui08").to_be() { return TagTypeDefinition::U8Array; } TagTypeDefinition::NotAllowed } } impl From for u32 { fn from(value: TagTypeDefinition) -> Self { match value { TagTypeDefinition::MultiLocalizedUnicode => u32::from_ne_bytes(*b"mluc").to_be(), TagTypeDefinition::Description => u32::from_ne_bytes(*b"desc").to_be(), TagTypeDefinition::Text => u32::from_ne_bytes(*b"text").to_be(), TagTypeDefinition::MabLut => u32::from_ne_bytes(*b"mAB ").to_be(), TagTypeDefinition::MbaLut => u32::from_ne_bytes(*b"mBA ").to_be(), TagTypeDefinition::ParametricToneCurve => u32::from_ne_bytes(*b"para").to_be(), TagTypeDefinition::LutToneCurve => u32::from_ne_bytes(*b"curv").to_be(), TagTypeDefinition::Xyz => u32::from_ne_bytes(*b"XYZ ").to_be(), TagTypeDefinition::MultiProcessElement => u32::from_ne_bytes(*b"mpet").to_be(), TagTypeDefinition::DefViewingConditions => u32::from_ne_bytes(*b"view").to_be(), TagTypeDefinition::Signature => u32::from_ne_bytes(*b"sig ").to_be(), TagTypeDefinition::Cicp => u32::from_ne_bytes(*b"cicp").to_be(), TagTypeDefinition::DateTime => u32::from_ne_bytes(*b"dtim").to_be(), TagTypeDefinition::S15Fixed16Array => u32::from_ne_bytes(*b"sf32").to_be(), TagTypeDefinition::U16Fixed16Array => u32::from_ne_bytes(*b"uf32").to_be(), TagTypeDefinition::U8Array => u32::from_ne_bytes(*b"ui08").to_be(), TagTypeDefinition::U16Array => u32::from_ne_bytes(*b"ui16").to_be(), TagTypeDefinition::U32Array => u32::from_ne_bytes(*b"ui32").to_be(), TagTypeDefinition::U64Array => u32::from_ne_bytes(*b"ui64").to_be(), TagTypeDefinition::Measurement => u32::from_ne_bytes(*b"meas").to_be(), TagTypeDefinition::NotAllowed => 0, } } } moxcms-0.7.7/src/transform.rs000064400000000000000000001400131046102023000143140ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::conversions::{ LutBarycentricReduction, RgbXyzFactory, RgbXyzFactoryOpt, ToneReproductionRgbToGray, TransformMatrixShaper, make_gray_to_unfused, make_gray_to_x, make_lut_transform, make_rgb_to_gray, }; use crate::err::CmsError; use crate::trc::GammaLutInterpolate; use crate::{ColorProfile, DataColorSpace, LutWarehouse, RenderingIntent, Vector3f, Xyzd}; use num_traits::AsPrimitive; use std::marker::PhantomData; /// Transformation executor itself pub trait TransformExecutor { /// Count of samples always must match. /// If there is N samples of *Cmyk* source then N samples of *Rgb* is expected as an output. fn transform(&self, src: &[V], dst: &mut [V]) -> Result<(), CmsError>; } /// Helper for intermediate transformation stages pub trait Stage { fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError>; } /// Helper for intermediate transformation stages pub trait InPlaceStage { fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError>; } /// Barycentric interpolation weights size. /// /// Bigger weights increases precision. #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Default)] pub enum BarycentricWeightScale { #[default] /// Low scale weights is enough for common case. /// /// However, it might crush dark zones and gradients. /// Weights increasing costs 5% performance. Low, #[cfg(feature = "options")] High, } /// Declares additional transformation options #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] pub struct TransformOptions { pub rendering_intent: RenderingIntent, /// If set it will try to use Transfer Characteristics from CICP /// on transform. This might be more precise and faster. pub allow_use_cicp_transfer: bool, /// Prefers fixed point where implemented as default. /// Most of the applications actually do not need floating point. /// /// Do not change it if you're not sure that extreme precision is required, /// in most cases it is a simple way to spend energy to warming up environment /// a little. /// /// Q2.13 for RGB->XYZ->RGB is used. /// LUT interpolation use Q0.15. pub prefer_fixed_point: bool, /// Interpolation method for 3D LUT /// /// This parameter has no effect on LAB/XYZ interpolation and scene linear RGB. /// /// Technically, it should be assumed to perform cube dividing interpolation: /// - Source colorspace is gamma-encoded (discards scene linear RGB and XYZ). /// - Colorspace is uniform. /// - Colorspace has linear scaling (discards LAB). /// - Interpolation doesn't shift hues (discards LAB). /// /// For LAB, XYZ and scene linear RGB `trilinear/quadlinear` always in force. pub interpolation_method: InterpolationMethod, /// Barycentric weights scale. /// /// This value controls LUT weights precision. pub barycentric_weight_scale: BarycentricWeightScale, /// For floating points transform, it will try to detect gamma function on *Matrix Shaper* profiles. /// If gamma function is found, then it will be used instead of LUT table. /// This allows to work with excellent precision with extended range, /// at a cost of execution time. pub allow_extended_range_rgb_xyz: bool, // pub black_point_compensation: bool, } #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Default)] /// Defines the interpolation method. /// /// All methods produce very close results that almost not possible to separate without /// some automation tools. /// /// This implementation chooses the fastest method as default. pub enum InterpolationMethod { /// General Tetrahedron interpolation. /// This is used in lcms2 and others CMS. #[cfg(feature = "options")] Tetrahedral, /// Divides cube into a pyramids and interpolate then in the pyramid. #[cfg(feature = "options")] Pyramid, /// Interpolation by dividing cube into prisms. #[cfg(feature = "options")] Prism, /// Trilinear/Quadlinear interpolation #[default] Linear, } impl Default for TransformOptions { fn default() -> Self { Self { rendering_intent: RenderingIntent::default(), allow_use_cicp_transfer: true, prefer_fixed_point: true, interpolation_method: InterpolationMethod::default(), barycentric_weight_scale: BarycentricWeightScale::default(), allow_extended_range_rgb_xyz: false, // black_point_compensation: false, } } } pub type Transform8BitExecutor = dyn TransformExecutor + Send + Sync; pub type Transform16BitExecutor = dyn TransformExecutor + Send + Sync; pub type TransformF32BitExecutor = dyn TransformExecutor + Send + Sync; pub type TransformF64BitExecutor = dyn TransformExecutor + Send + Sync; /// Layout declares a data layout. /// For RGB it shows also the channel order. /// To handle different data bit-depth appropriate executor must be used. /// Cmyk8 uses the same layout as Rgba8. #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] pub enum Layout { Rgb = 0, Rgba = 1, Gray = 2, GrayAlpha = 3, Inks5 = 4, Inks6 = 5, Inks7 = 6, Inks8 = 7, Inks9 = 8, Inks10 = 9, Inks11 = 10, Inks12 = 11, Inks13 = 12, Inks14 = 13, Inks15 = 14, } impl Layout { /// Returns Red channel index #[inline(always)] pub const fn r_i(self) -> usize { match self { Layout::Rgb => 0, Layout::Rgba => 0, Layout::Gray => unimplemented!(), Layout::GrayAlpha => unimplemented!(), _ => unimplemented!(), } } /// Returns Green channel index #[inline(always)] pub const fn g_i(self) -> usize { match self { Layout::Rgb => 1, Layout::Rgba => 1, Layout::Gray => unimplemented!(), Layout::GrayAlpha => unimplemented!(), _ => unimplemented!(), } } /// Returns Blue channel index #[inline(always)] pub const fn b_i(self) -> usize { match self { Layout::Rgb => 2, Layout::Rgba => 2, Layout::Gray => unimplemented!(), Layout::GrayAlpha => unimplemented!(), _ => unimplemented!(), } } #[inline(always)] pub const fn a_i(self) -> usize { match self { Layout::Rgb => unimplemented!(), Layout::Rgba => 3, Layout::Gray => unimplemented!(), Layout::GrayAlpha => 1, _ => unimplemented!(), } } #[inline(always)] pub const fn has_alpha(self) -> bool { match self { Layout::Rgb => false, Layout::Rgba => true, Layout::Gray => false, Layout::GrayAlpha => true, _ => false, } } #[inline] pub const fn channels(self) -> usize { match self { Layout::Rgb => 3, Layout::Rgba => 4, Layout::Gray => 1, Layout::GrayAlpha => 2, Layout::Inks5 => 5, Layout::Inks6 => 6, Layout::Inks7 => 7, Layout::Inks8 => 8, Layout::Inks9 => 9, Layout::Inks10 => 10, Layout::Inks11 => 11, Layout::Inks12 => 12, Layout::Inks13 => 13, Layout::Inks14 => 14, Layout::Inks15 => 15, } } pub(crate) fn from_inks(inks: usize) -> Self { match inks { 1 => Layout::Gray, 2 => Layout::GrayAlpha, 3 => Layout::Rgb, 4 => Layout::Rgba, 5 => Layout::Inks5, 6 => Layout::Inks6, 7 => Layout::Inks7, 8 => Layout::Inks8, 9 => Layout::Inks9, 10 => Layout::Inks10, 11 => Layout::Inks11, 12 => Layout::Inks12, 13 => Layout::Inks13, 14 => Layout::Inks14, 15 => Layout::Inks15, _ => unreachable!("Impossible amount of inks"), } } } impl From for Layout { fn from(value: u8) -> Self { match value { 0 => Layout::Rgb, 1 => Layout::Rgba, 2 => Layout::Gray, 3 => Layout::GrayAlpha, _ => unimplemented!(), } } } impl Layout { #[inline(always)] pub const fn resolve(value: u8) -> Self { match value { 0 => Layout::Rgb, 1 => Layout::Rgba, 2 => Layout::Gray, 3 => Layout::GrayAlpha, 4 => Layout::Inks5, 5 => Layout::Inks6, 6 => Layout::Inks7, 7 => Layout::Inks8, 8 => Layout::Inks9, 9 => Layout::Inks10, 10 => Layout::Inks11, 11 => Layout::Inks12, 12 => Layout::Inks13, 13 => Layout::Inks14, 14 => Layout::Inks15, _ => unimplemented!(), } } } #[doc(hidden)] pub trait PointeeSizeExpressible { fn _as_usize(self) -> usize; const FINITE: bool; const NOT_FINITE_GAMMA_TABLE_SIZE: usize; const NOT_FINITE_LINEAR_TABLE_SIZE: usize; const IS_U8: bool; const IS_U16: bool; } impl PointeeSizeExpressible for u8 { #[inline(always)] fn _as_usize(self) -> usize { self as usize } const FINITE: bool = true; const NOT_FINITE_GAMMA_TABLE_SIZE: usize = 1; const NOT_FINITE_LINEAR_TABLE_SIZE: usize = 1; const IS_U8: bool = true; const IS_U16: bool = false; } impl PointeeSizeExpressible for u16 { #[inline(always)] fn _as_usize(self) -> usize { self as usize } const FINITE: bool = true; const NOT_FINITE_GAMMA_TABLE_SIZE: usize = 1; const NOT_FINITE_LINEAR_TABLE_SIZE: usize = 1; const IS_U8: bool = false; const IS_U16: bool = true; } impl PointeeSizeExpressible for f32 { #[inline(always)] fn _as_usize(self) -> usize { const MAX_14_BIT: f32 = ((1 << 14u32) - 1) as f32; ((self * MAX_14_BIT).max(0f32).min(MAX_14_BIT) as u16) as usize } const FINITE: bool = false; const NOT_FINITE_GAMMA_TABLE_SIZE: usize = 32768; const NOT_FINITE_LINEAR_TABLE_SIZE: usize = 1 << 14u32; const IS_U8: bool = false; const IS_U16: bool = false; } impl PointeeSizeExpressible for f64 { #[inline(always)] fn _as_usize(self) -> usize { const MAX_16_BIT: f64 = ((1 << 16u32) - 1) as f64; ((self * MAX_16_BIT).max(0.).min(MAX_16_BIT) as u16) as usize } const FINITE: bool = false; const NOT_FINITE_GAMMA_TABLE_SIZE: usize = 65536; const NOT_FINITE_LINEAR_TABLE_SIZE: usize = 1 << 16; const IS_U8: bool = false; const IS_U16: bool = false; } impl ColorProfile { /// Checks if profile is valid *Matrix Shaper* profile pub fn is_matrix_shaper(&self) -> bool { self.color_space == DataColorSpace::Rgb && self.red_colorant != Xyzd::default() && self.green_colorant != Xyzd::default() && self.blue_colorant != Xyzd::default() && self.red_trc.is_some() && self.green_trc.is_some() && self.blue_trc.is_some() } /// Creates transform between source and destination profile /// Use for 16 bit-depth data bit-depth only. pub fn create_transform_16bit( &self, src_layout: Layout, dst_pr: &ColorProfile, dst_layout: Layout, options: TransformOptions, ) -> Result, CmsError> { self.create_transform_nbit::(src_layout, dst_pr, dst_layout, options) } /// Creates transform between source and destination profile /// Use for 12 bit-depth data bit-depth only. pub fn create_transform_12bit( &self, src_layout: Layout, dst_pr: &ColorProfile, dst_layout: Layout, options: TransformOptions, ) -> Result, CmsError> { self.create_transform_nbit::(src_layout, dst_pr, dst_layout, options) } /// Creates transform between source and destination profile /// Use for 10 bit-depth data bit-depth only. pub fn create_transform_10bit( &self, src_layout: Layout, dst_pr: &ColorProfile, dst_layout: Layout, options: TransformOptions, ) -> Result, CmsError> { self.create_transform_nbit::(src_layout, dst_pr, dst_layout, options) } /// Creates transform between source and destination profile /// Data has to be normalized into [0, 1] range. /// ICC profiles and LUT tables do not exist in infinite precision. /// Thus, this implementation considers `f32` as 14-bit values. /// Floating point transformer works in extended mode, that means returned data might be negative /// or more than 1. pub fn create_transform_f32( &self, src_layout: Layout, dst_pr: &ColorProfile, dst_layout: Layout, options: TransformOptions, ) -> Result, CmsError> { self.create_transform_nbit::(src_layout, dst_pr, dst_layout, options) } /// Creates transform between source and destination profile /// Data has to be normalized into [0, 1] range. /// ICC profiles and LUT tables do not exist in infinite precision. /// Thus, this implementation considers `f64` as 16-bit values. /// Floating point transformer works in extended mode, that means returned data might be negative /// or more than 1. pub fn create_transform_f64( &self, src_layout: Layout, dst_pr: &ColorProfile, dst_layout: Layout, options: TransformOptions, ) -> Result, CmsError> { self.create_transform_nbit::(src_layout, dst_pr, dst_layout, options) } fn create_transform_nbit< T: Copy + Default + AsPrimitive + PointeeSizeExpressible + Send + Sync + AsPrimitive + RgbXyzFactory + RgbXyzFactoryOpt + GammaLutInterpolate, const BIT_DEPTH: usize, const LINEAR_CAP: usize, const GAMMA_CAP: usize, >( &self, src_layout: Layout, dst_pr: &ColorProfile, dst_layout: Layout, options: TransformOptions, ) -> Result + Send + Sync>, CmsError> where f32: AsPrimitive, u32: AsPrimitive, (): LutBarycentricReduction, (): LutBarycentricReduction, { if self.color_space == DataColorSpace::Rgb && dst_pr.pcs == DataColorSpace::Xyz && dst_pr.color_space == DataColorSpace::Rgb && self.pcs == DataColorSpace::Xyz && self.is_matrix_shaper() && dst_pr.is_matrix_shaper() { if src_layout == Layout::Gray || src_layout == Layout::GrayAlpha { return Err(CmsError::InvalidLayout); } if dst_layout == Layout::Gray || dst_layout == Layout::GrayAlpha { return Err(CmsError::InvalidLayout); } if self.has_device_to_pcs_lut() || dst_pr.has_pcs_to_device_lut() { return make_lut_transform::( src_layout, self, dst_layout, dst_pr, options, ); } let transform = self.transform_matrix(dst_pr); if !T::FINITE && options.allow_extended_range_rgb_xyz { if let Some(gamma_evaluator) = dst_pr.try_extended_gamma_evaluator() { if let Some(linear_evaluator) = self.try_extended_linearizing_evaluator() { use crate::conversions::{ TransformShaperFloatInOut, make_rgb_xyz_rgb_transform_float_in_out, }; let p = TransformShaperFloatInOut { linear_evaluator, gamma_evaluator, adaptation_matrix: transform.to_f32(), phantom_data: PhantomData, }; return make_rgb_xyz_rgb_transform_float_in_out::( src_layout, dst_layout, p, BIT_DEPTH, ); } let lin_r = self.build_r_linearize_table::( options.allow_use_cicp_transfer, )?; let lin_g = self.build_g_linearize_table::( options.allow_use_cicp_transfer, )?; let lin_b = self.build_b_linearize_table::( options.allow_use_cicp_transfer, )?; use crate::conversions::{ TransformShaperRgbFloat, make_rgb_xyz_rgb_transform_float, }; let p = TransformShaperRgbFloat { r_linear: lin_r, g_linear: lin_g, b_linear: lin_b, gamma_evaluator, adaptation_matrix: transform.to_f32(), phantom_data: PhantomData, }; return make_rgb_xyz_rgb_transform_float::( src_layout, dst_layout, p, BIT_DEPTH, ); } } if self.are_all_trc_the_same() && dst_pr.are_all_trc_the_same() { let linear = self.build_r_linearize_table::( options.allow_use_cicp_transfer, )?; let gamma = dst_pr.build_gamma_table::( &dst_pr.red_trc, options.allow_use_cicp_transfer, )?; let profile_transform = crate::conversions::TransformMatrixShaperOptimized { linear, gamma, adaptation_matrix: transform.to_f32(), }; return T::make_optimized_transform::( src_layout, dst_layout, profile_transform, options, ); } let lin_r = self.build_r_linearize_table::( options.allow_use_cicp_transfer, )?; let lin_g = self.build_g_linearize_table::( options.allow_use_cicp_transfer, )?; let lin_b = self.build_b_linearize_table::( options.allow_use_cicp_transfer, )?; let gamma_r = dst_pr.build_gamma_table::( &dst_pr.red_trc, options.allow_use_cicp_transfer, )?; let gamma_g = dst_pr.build_gamma_table::( &dst_pr.green_trc, options.allow_use_cicp_transfer, )?; let gamma_b = dst_pr.build_gamma_table::( &dst_pr.blue_trc, options.allow_use_cicp_transfer, )?; let profile_transform = TransformMatrixShaper { r_linear: lin_r, g_linear: lin_g, b_linear: lin_b, r_gamma: gamma_r, g_gamma: gamma_g, b_gamma: gamma_b, adaptation_matrix: transform.to_f32(), }; T::make_transform::( src_layout, dst_layout, profile_transform, options, ) } else if (self.color_space == DataColorSpace::Gray && self.gray_trc.is_some()) && (dst_pr.color_space == DataColorSpace::Rgb || (dst_pr.color_space == DataColorSpace::Gray && dst_pr.gray_trc.is_some())) && self.pcs == DataColorSpace::Xyz && dst_pr.pcs == DataColorSpace::Xyz { if src_layout != Layout::GrayAlpha && src_layout != Layout::Gray { return Err(CmsError::InvalidLayout); } if self.has_device_to_pcs_lut() || dst_pr.has_pcs_to_device_lut() { return make_lut_transform::( src_layout, self, dst_layout, dst_pr, options, ); } let gray_linear = self.build_gray_linearize_table::()?; if dst_pr.color_space == DataColorSpace::Gray { if !T::FINITE && options.allow_extended_range_rgb_xyz { if let Some(gamma_evaluator) = dst_pr.try_extended_gamma_evaluator() { if let Some(linear_evaluator) = self.try_extended_linearizing_evaluator() { // Gray -> Gray case extended range use crate::conversions::make_gray_to_one_trc_extended; return make_gray_to_one_trc_extended::( src_layout, dst_layout, linear_evaluator, gamma_evaluator, BIT_DEPTH, ); } } } // Gray -> Gray case let gray_gamma = dst_pr.build_gamma_table::( &dst_pr.gray_trc, options.allow_use_cicp_transfer, )?; make_gray_to_x::( src_layout, dst_layout, &gray_linear, &gray_gamma, BIT_DEPTH, GAMMA_CAP, ) } else { #[allow(clippy::collapsible_if)] if dst_pr.are_all_trc_the_same() { if !T::FINITE && options.allow_extended_range_rgb_xyz { if let Some(gamma_evaluator) = dst_pr.try_extended_gamma_evaluator() { if let Some(linear_evaluator) = self.try_extended_linearizing_evaluator() { // Gray -> RGB where all TRC is the same with extended range use crate::conversions::make_gray_to_one_trc_extended; return make_gray_to_one_trc_extended::( src_layout, dst_layout, linear_evaluator, gamma_evaluator, BIT_DEPTH, ); } } } // Gray -> RGB where all TRC is the same let rgb_gamma = dst_pr.build_gamma_table::( &dst_pr.red_trc, options.allow_use_cicp_transfer, )?; make_gray_to_x::( src_layout, dst_layout, &gray_linear, &rgb_gamma, BIT_DEPTH, GAMMA_CAP, ) } else { // Gray -> RGB where all TRC is NOT the same if !T::FINITE && options.allow_extended_range_rgb_xyz { if let Some(gamma_evaluator) = dst_pr.try_extended_gamma_evaluator() { if let Some(linear_evaluator) = self.try_extended_linearizing_evaluator() { // Gray -> RGB where all TRC is NOT the same with extended range use crate::conversions::make_gray_to_rgb_extended; return make_gray_to_rgb_extended::( src_layout, dst_layout, linear_evaluator, gamma_evaluator, BIT_DEPTH, ); } } } let red_gamma = dst_pr.build_gamma_table::( &dst_pr.red_trc, options.allow_use_cicp_transfer, )?; let green_gamma = dst_pr.build_gamma_table::( &dst_pr.green_trc, options.allow_use_cicp_transfer, )?; let blue_gamma = dst_pr.build_gamma_table::( &dst_pr.blue_trc, options.allow_use_cicp_transfer, )?; let mut gray_linear2 = Box::new([0f32; 65536]); for (dst, src) in gray_linear2.iter_mut().zip(gray_linear.iter()) { *dst = *src; } make_gray_to_unfused::( src_layout, dst_layout, gray_linear2, red_gamma, green_gamma, blue_gamma, BIT_DEPTH, GAMMA_CAP, ) } } } else if self.color_space == DataColorSpace::Rgb && (dst_pr.color_space == DataColorSpace::Gray && dst_pr.gray_trc.is_some()) && dst_pr.pcs == DataColorSpace::Xyz && self.pcs == DataColorSpace::Xyz { if src_layout == Layout::Gray || src_layout == Layout::GrayAlpha { return Err(CmsError::InvalidLayout); } if dst_layout != Layout::Gray && dst_layout != Layout::GrayAlpha { return Err(CmsError::InvalidLayout); } if self.has_device_to_pcs_lut() || dst_pr.has_pcs_to_device_lut() { return make_lut_transform::( src_layout, self, dst_layout, dst_pr, options, ); } let transform = self.transform_matrix(dst_pr).to_f32(); let vector = Vector3f { v: [transform.v[1][0], transform.v[1][1], transform.v[1][2]], }; if !T::FINITE && options.allow_extended_range_rgb_xyz { if let Some(gamma_evaluator) = dst_pr.try_extended_gamma_evaluator() { if let Some(linear_evaluator) = self.try_extended_linearizing_evaluator() { use crate::conversions::make_rgb_to_gray_extended; return Ok(make_rgb_to_gray_extended::( src_layout, dst_layout, linear_evaluator, gamma_evaluator, vector, BIT_DEPTH, )); } } } let lin_r = self.build_r_linearize_table::( options.allow_use_cicp_transfer, )?; let lin_g = self.build_g_linearize_table::( options.allow_use_cicp_transfer, )?; let lin_b = self.build_b_linearize_table::( options.allow_use_cicp_transfer, )?; let gray_linear = dst_pr.build_gamma_table::( &dst_pr.gray_trc, options.allow_use_cicp_transfer, )?; let trc_box = ToneReproductionRgbToGray:: { r_linear: lin_r, g_linear: lin_g, b_linear: lin_b, gray_gamma: gray_linear, }; Ok(make_rgb_to_gray::( src_layout, dst_layout, trc_box, vector, GAMMA_CAP, BIT_DEPTH, )) } else if (self.color_space.is_three_channels() || self.color_space == DataColorSpace::Cmyk || self.color_space == DataColorSpace::Color4) && (dst_pr.color_space.is_three_channels() || dst_pr.color_space == DataColorSpace::Cmyk || dst_pr.color_space == DataColorSpace::Color4) && (dst_pr.pcs == DataColorSpace::Xyz || dst_pr.pcs == DataColorSpace::Lab) && (self.pcs == DataColorSpace::Xyz || self.pcs == DataColorSpace::Lab) { if src_layout == Layout::Gray || src_layout == Layout::GrayAlpha { return Err(CmsError::InvalidLayout); } if dst_layout == Layout::Gray || dst_layout == Layout::GrayAlpha { return Err(CmsError::InvalidLayout); } make_lut_transform::( src_layout, self, dst_layout, dst_pr, options, ) } else { make_lut_transform::( src_layout, self, dst_layout, dst_pr, options, ) } } /// Creates transform between source and destination profile /// Only 8 bit is supported. pub fn create_transform_8bit( &self, src_layout: Layout, dst_pr: &ColorProfile, dst_layout: Layout, options: TransformOptions, ) -> Result, CmsError> { self.create_transform_nbit::(src_layout, dst_pr, dst_layout, options) } pub(crate) fn get_device_to_pcs(&self, intent: RenderingIntent) -> Option<&LutWarehouse> { match intent { RenderingIntent::AbsoluteColorimetric => self.lut_a_to_b_colorimetric.as_ref(), RenderingIntent::Saturation => self.lut_a_to_b_saturation.as_ref(), RenderingIntent::RelativeColorimetric => self.lut_a_to_b_colorimetric.as_ref(), RenderingIntent::Perceptual => self.lut_a_to_b_perceptual.as_ref(), } } pub(crate) fn get_pcs_to_device(&self, intent: RenderingIntent) -> Option<&LutWarehouse> { match intent { RenderingIntent::AbsoluteColorimetric => self.lut_b_to_a_colorimetric.as_ref(), RenderingIntent::Saturation => self.lut_b_to_a_saturation.as_ref(), RenderingIntent::RelativeColorimetric => self.lut_b_to_a_colorimetric.as_ref(), RenderingIntent::Perceptual => self.lut_b_to_a_perceptual.as_ref(), } } } #[cfg(test)] mod tests { use crate::{ColorProfile, DataColorSpace, Layout, RenderingIntent, TransformOptions}; use rand::Rng; #[test] fn test_transform_rgb8() { let mut srgb_profile = ColorProfile::new_srgb(); let bt2020_profile = ColorProfile::new_bt2020(); let random_point_x = rand::rng().random_range(0..255); let transform = bt2020_profile .create_transform_8bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); let src = vec![random_point_x; 256 * 256 * 3]; let mut dst = vec![random_point_x; 256 * 256 * 3]; transform.transform(&src, &mut dst).unwrap(); let transform = bt2020_profile .create_transform_8bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions { ..TransformOptions::default() }, ) .unwrap(); transform.transform(&src, &mut dst).unwrap(); srgb_profile.rendering_intent = RenderingIntent::RelativeColorimetric; let transform = bt2020_profile .create_transform_8bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions { ..TransformOptions::default() }, ) .unwrap(); transform.transform(&src, &mut dst).unwrap(); srgb_profile.rendering_intent = RenderingIntent::Saturation; let transform = bt2020_profile .create_transform_8bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions { ..TransformOptions::default() }, ) .unwrap(); transform.transform(&src, &mut dst).unwrap(); } #[test] fn test_transform_rgba8() { let srgb_profile = ColorProfile::new_srgb(); let bt2020_profile = ColorProfile::new_bt2020(); let random_point_x = rand::rng().random_range(0..255); let transform = bt2020_profile .create_transform_8bit( Layout::Rgba, &srgb_profile, Layout::Rgba, TransformOptions::default(), ) .unwrap(); let src = vec![random_point_x; 256 * 256 * 4]; let mut dst = vec![random_point_x; 256 * 256 * 4]; transform.transform(&src, &mut dst).unwrap(); } #[test] fn test_transform_gray_to_rgb8() { let gray_profile = ColorProfile::new_gray_with_gamma(2.2f32); let bt2020_profile = ColorProfile::new_bt2020(); let random_point_x = rand::rng().random_range(0..255); let transform = gray_profile .create_transform_8bit( Layout::Gray, &bt2020_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); let src = vec![random_point_x; 256 * 256]; let mut dst = vec![random_point_x; 256 * 256 * 3]; transform.transform(&src, &mut dst).unwrap(); } #[test] fn test_transform_gray_to_rgba8() { let srgb_profile = ColorProfile::new_gray_with_gamma(2.2f32); let bt2020_profile = ColorProfile::new_bt2020(); let random_point_x = rand::rng().random_range(0..255); let transform = srgb_profile .create_transform_8bit( Layout::Gray, &bt2020_profile, Layout::Rgba, TransformOptions::default(), ) .unwrap(); let src = vec![random_point_x; 256 * 256]; let mut dst = vec![random_point_x; 256 * 256 * 4]; transform.transform(&src, &mut dst).unwrap(); } #[test] fn test_transform_gray_to_gray_alpha8() { let srgb_profile = ColorProfile::new_gray_with_gamma(2.2f32); let bt2020_profile = ColorProfile::new_bt2020(); let random_point_x = rand::rng().random_range(0..255); let transform = srgb_profile .create_transform_8bit( Layout::Gray, &bt2020_profile, Layout::GrayAlpha, TransformOptions::default(), ) .unwrap(); let src = vec![random_point_x; 256 * 256]; let mut dst = vec![random_point_x; 256 * 256 * 2]; transform.transform(&src, &mut dst).unwrap(); } #[test] fn test_transform_rgb10() { let srgb_profile = ColorProfile::new_srgb(); let bt2020_profile = ColorProfile::new_bt2020(); let random_point_x = rand::rng().random_range(0..((1 << 10) - 1)); let transform = bt2020_profile .create_transform_10bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); let src = vec![random_point_x; 256 * 256 * 3]; let mut dst = vec![random_point_x; 256 * 256 * 3]; transform.transform(&src, &mut dst).unwrap(); } #[test] fn test_transform_rgb12() { let srgb_profile = ColorProfile::new_srgb(); let bt2020_profile = ColorProfile::new_bt2020(); let random_point_x = rand::rng().random_range(0..((1 << 12) - 1)); let transform = bt2020_profile .create_transform_12bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); let src = vec![random_point_x; 256 * 256 * 3]; let mut dst = vec![random_point_x; 256 * 256 * 3]; transform.transform(&src, &mut dst).unwrap(); } #[test] fn test_transform_rgb16() { let srgb_profile = ColorProfile::new_srgb(); let bt2020_profile = ColorProfile::new_bt2020(); let random_point_x = rand::rng().random_range(0..((1u32 << 16u32) - 1u32)) as u16; let transform = bt2020_profile .create_transform_16bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); let src = vec![random_point_x; 256 * 256 * 3]; let mut dst = vec![random_point_x; 256 * 256 * 3]; transform.transform(&src, &mut dst).unwrap(); } #[test] fn test_transform_round_trip_rgb8() { let srgb_profile = ColorProfile::new_srgb(); let bt2020_profile = ColorProfile::new_bt2020(); let transform = srgb_profile .create_transform_8bit( Layout::Rgb, &bt2020_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); let mut src = vec![0u8; 256 * 256 * 3]; for dst in src.chunks_exact_mut(3) { dst[0] = 175; dst[1] = 75; dst[2] = 13; } let mut dst = vec![0u8; 256 * 256 * 3]; transform.transform(&src, &mut dst).unwrap(); let transform_inverse = bt2020_profile .create_transform_8bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); transform_inverse.transform(&dst, &mut src).unwrap(); for src in src.chunks_exact_mut(3) { let diff0 = (src[0] as i32 - 175).abs(); let diff1 = (src[1] as i32 - 75).abs(); let diff2 = (src[2] as i32 - 13).abs(); assert!( diff0 < 3, "On channel 0 difference should be less than 3, but it was {diff0}" ); assert!( diff1 < 3, "On channel 1 difference should be less than 3, but it was {diff1}" ); assert!( diff2 < 3, "On channel 2 difference should be less than 3, but it was {diff2}" ); } } #[test] fn test_transform_round_trip_rgb10() { let srgb_profile = ColorProfile::new_srgb(); let bt2020_profile = ColorProfile::new_bt2020(); let transform = srgb_profile .create_transform_10bit( Layout::Rgb, &bt2020_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); let mut src = vec![0u16; 256 * 256 * 3]; for dst in src.chunks_exact_mut(3) { dst[0] = 175; dst[1] = 256; dst[2] = 512; } let mut dst = vec![0u16; 256 * 256 * 3]; transform.transform(&src, &mut dst).unwrap(); let transform_inverse = bt2020_profile .create_transform_10bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); transform_inverse.transform(&dst, &mut src).unwrap(); for src in src.chunks_exact_mut(3) { let diff0 = (src[0] as i32 - 175).abs(); let diff1 = (src[1] as i32 - 256).abs(); let diff2 = (src[2] as i32 - 512).abs(); assert!( diff0 < 15, "On channel 0 difference should be less than 15, but it was {diff0}" ); assert!( diff1 < 15, "On channel 1 difference should be less than 15, but it was {diff1}" ); assert!( diff2 < 15, "On channel 2 difference should be less than 15, but it was {diff2}" ); } } #[test] fn test_transform_round_trip_rgb12() { let srgb_profile = ColorProfile::new_srgb(); let bt2020_profile = ColorProfile::new_bt2020(); let transform = srgb_profile .create_transform_12bit( Layout::Rgb, &bt2020_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); let mut src = vec![0u16; 256 * 256 * 3]; for dst in src.chunks_exact_mut(3) { dst[0] = 1750; dst[1] = 2560; dst[2] = 3143; } let mut dst = vec![0u16; 256 * 256 * 3]; transform.transform(&src, &mut dst).unwrap(); let transform_inverse = bt2020_profile .create_transform_12bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); transform_inverse.transform(&dst, &mut src).unwrap(); for src in src.chunks_exact_mut(3) { let diff0 = (src[0] as i32 - 1750).abs(); let diff1 = (src[1] as i32 - 2560).abs(); let diff2 = (src[2] as i32 - 3143).abs(); assert!( diff0 < 25, "On channel 0 difference should be less than 25, but it was {diff0}" ); assert!( diff1 < 25, "On channel 1 difference should be less than 25, but it was {diff1}" ); assert!( diff2 < 25, "On channel 2 difference should be less than 25, but it was {diff2}" ); } } #[test] fn test_transform_round_trip_rgb16() { let srgb_profile = ColorProfile::new_srgb(); let bt2020_profile = ColorProfile::new_bt2020(); let transform = srgb_profile .create_transform_16bit( Layout::Rgb, &bt2020_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); let mut src = vec![0u16; 256 * 256 * 3]; for dst in src.chunks_exact_mut(3) { dst[0] = 1760; dst[1] = 2560; dst[2] = 5120; } let mut dst = vec![0u16; 256 * 256 * 3]; transform.transform(&src, &mut dst).unwrap(); let transform_inverse = bt2020_profile .create_transform_16bit( Layout::Rgb, &srgb_profile, Layout::Rgb, TransformOptions::default(), ) .unwrap(); transform_inverse.transform(&dst, &mut src).unwrap(); for src in src.chunks_exact_mut(3) { let diff0 = (src[0] as i32 - 1760).abs(); let diff1 = (src[1] as i32 - 2560).abs(); let diff2 = (src[2] as i32 - 5120).abs(); assert!( diff0 < 35, "On channel 0 difference should be less than 35, but it was {diff0}" ); assert!( diff1 < 35, "On channel 1 difference should be less than 35, but it was {diff1}" ); assert!( diff2 < 35, "On channel 2 difference should be less than 35, but it was {diff2}" ); } } #[test] fn test_transform_rgb_to_gray_extended() { let srgb = ColorProfile::new_srgb(); let mut gray_profile = ColorProfile::new_gray_with_gamma(1.0); gray_profile.color_space = DataColorSpace::Gray; gray_profile.gray_trc = srgb.red_trc.clone(); let mut test_profile = vec![0.; 4]; test_profile[2] = 1.; let mut dst = vec![0.; 1]; let mut inverse = vec![0.; 4]; let cvt0 = srgb .create_transform_f32( Layout::Rgba, &gray_profile, Layout::Gray, TransformOptions { allow_extended_range_rgb_xyz: true, ..Default::default() }, ) .unwrap(); cvt0.transform(&test_profile, &mut dst).unwrap(); assert!((dst[0] - 0.273046) < 1e-4); let cvt_inverse = gray_profile .create_transform_f32( Layout::Gray, &srgb, Layout::Rgba, TransformOptions { allow_extended_range_rgb_xyz: false, ..Default::default() }, ) .unwrap(); cvt_inverse.transform(&dst, &mut inverse).unwrap(); assert!((inverse[0] - 0.273002833) < 1e-4); let cvt1 = srgb .create_transform_f32( Layout::Rgba, &gray_profile, Layout::Gray, TransformOptions { allow_extended_range_rgb_xyz: false, ..Default::default() }, ) .unwrap(); cvt1.transform(&test_profile, &mut dst).unwrap(); assert!((dst[0] - 0.27307168) < 1e-5); inverse.fill(0.); let cvt_inverse = gray_profile .create_transform_f32( Layout::Gray, &srgb, Layout::Rgba, TransformOptions { allow_extended_range_rgb_xyz: true, ..Default::default() }, ) .unwrap(); cvt_inverse.transform(&dst, &mut inverse).unwrap(); assert!((inverse[0] - 0.273002833) < 1e-4); } } moxcms-0.7.7/src/trc.rs000064400000000000000000001450121046102023000130750ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::cicp::create_rec709_parametric; use crate::matan::is_curve_linear16; use crate::math::m_clamp; use crate::mlaf::{mlaf, neg_mlaf}; use crate::transform::PointeeSizeExpressible; use crate::writer::FloatToFixedU8Fixed8; use crate::{CmsError, ColorProfile, DataColorSpace, Rgb, TransferCharacteristics}; use num_traits::AsPrimitive; use pxfm::{dirty_powf, f_pow, f_powf}; #[derive(Clone, Debug)] pub enum ToneReprCurve { Lut(Vec), Parametric(Vec), } impl ToneReprCurve { pub fn inverse(&self) -> Result { match self { ToneReprCurve::Lut(lut) => { let inverse_length = lut.len().max(256); Ok(ToneReprCurve::Lut(invert_lut(lut, inverse_length))) } ToneReprCurve::Parametric(parametric) => ParametricCurve::new(parametric) .and_then(|x| x.invert()) .map(|x| ToneReprCurve::Parametric([x.g, x.a, x.b, x.c, x.d, x.e, x.f].to_vec())) .ok_or(CmsError::BuildTransferFunction), } } /// Creates tone curve evaluator pub fn make_linear_evaluator( &self, ) -> Result, CmsError> { match self { ToneReprCurve::Lut(lut) => { if lut.is_empty() { return Ok(Box::new(ToneCurveEvaluatorLinear {})); } if lut.len() == 1 { let gamma = u8_fixed_8number_to_float(lut[0]); return Ok(Box::new(ToneCurveEvaluatorPureGamma { gamma })); } let converted_curve = lut.iter().map(|&x| x as f32 / 65535.0).collect::>(); Ok(Box::new(ToneCurveLutEvaluator { lut: converted_curve, })) } ToneReprCurve::Parametric(parametric) => { let parametric_curve = ParametricCurve::new(parametric).ok_or(CmsError::BuildTransferFunction)?; Ok(Box::new(ToneCurveParametricEvaluator { parametric: parametric_curve, })) } } } /// Creates tone curve evaluator from transfer characteristics pub fn make_cicp_linear_evaluator( transfer_characteristics: TransferCharacteristics, ) -> Result, CmsError> { if !transfer_characteristics.has_transfer_curve() { return Err(CmsError::BuildTransferFunction); } Ok(Box::new(ToneCurveCicpLinearEvaluator { trc: transfer_characteristics, })) } /// Creates tone curve inverse evaluator pub fn make_gamma_evaluator( &self, ) -> Result, CmsError> { match self { ToneReprCurve::Lut(lut) => { if lut.is_empty() { return Ok(Box::new(ToneCurveEvaluatorLinear {})); } if lut.len() == 1 { let gamma = 1. / u8_fixed_8number_to_float(lut[0]); return Ok(Box::new(ToneCurveEvaluatorPureGamma { gamma })); } let inverted_lut = invert_lut(lut, 16384); let converted_curve = inverted_lut .iter() .map(|&x| x as f32 / 65535.0) .collect::>(); Ok(Box::new(ToneCurveLutEvaluator { lut: converted_curve, })) } ToneReprCurve::Parametric(parametric) => { let parametric_curve = ParametricCurve::new(parametric) .and_then(|x| x.invert()) .ok_or(CmsError::BuildTransferFunction)?; Ok(Box::new(ToneCurveParametricEvaluator { parametric: parametric_curve, })) } } } /// Creates tone curve inverse evaluator from transfer characteristics pub fn make_cicp_gamma_evaluator( transfer_characteristics: TransferCharacteristics, ) -> Result, CmsError> { if !transfer_characteristics.has_transfer_curve() { return Err(CmsError::BuildTransferFunction); } Ok(Box::new(ToneCurveCicpGammaEvaluator { trc: transfer_characteristics, })) } } struct ToneCurveCicpLinearEvaluator { trc: TransferCharacteristics, } struct ToneCurveCicpGammaEvaluator { trc: TransferCharacteristics, } impl ToneCurveEvaluator for ToneCurveCicpLinearEvaluator { fn evaluate_tristimulus(&self, rgb: Rgb) -> Rgb { Rgb::new( self.trc.linearize(rgb.r as f64) as f32, self.trc.linearize(rgb.g as f64) as f32, self.trc.linearize(rgb.b as f64) as f32, ) } fn evaluate_value(&self, value: f32) -> f32 { self.trc.linearize(value as f64) as f32 } } impl ToneCurveEvaluator for ToneCurveCicpGammaEvaluator { fn evaluate_tristimulus(&self, rgb: Rgb) -> Rgb { Rgb::new( self.trc.gamma(rgb.r as f64) as f32, self.trc.gamma(rgb.g as f64) as f32, self.trc.gamma(rgb.b as f64) as f32, ) } fn evaluate_value(&self, value: f32) -> f32 { self.trc.gamma(value as f64) as f32 } } struct ToneCurveLutEvaluator { lut: Vec, } impl ToneCurveEvaluator for ToneCurveLutEvaluator { fn evaluate_value(&self, value: f32) -> f32 { lut_interp_linear_float(value, &self.lut) } fn evaluate_tristimulus(&self, rgb: Rgb) -> Rgb { Rgb::new( lut_interp_linear_float(rgb.r, &self.lut), lut_interp_linear_float(rgb.g, &self.lut), lut_interp_linear_float(rgb.b, &self.lut), ) } } pub(crate) fn build_trc_table(num_entries: i32, eotf: impl Fn(f64) -> f64) -> Vec { let mut table = vec![0u16; num_entries as usize]; for (i, table_value) in table.iter_mut().enumerate() { let x: f64 = i as f64 / (num_entries - 1) as f64; let y: f64 = eotf(x); let mut output: f64; output = y * 65535.0 + 0.5; if output > 65535.0 { output = 65535.0 } if output < 0.0 { output = 0.0 } *table_value = output.floor() as u16; } table } /// Creates Tone Reproduction curve from gamma pub fn curve_from_gamma(gamma: f32) -> ToneReprCurve { ToneReprCurve::Lut(vec![gamma.to_u8_fixed8()]) } #[derive(Debug)] struct ParametricCurve { g: f32, a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, } impl ParametricCurve { #[allow(clippy::many_single_char_names)] fn new(params: &[f32]) -> Option { // convert from the variable number of parameters // contained in profiles to a unified representation. let g: f32 = params[0]; match params[1..] { [] => Some(ParametricCurve { g, a: 1., b: 0., c: 1., d: 0., e: 0., f: 0., }), [a, b] => Some(ParametricCurve { g, a, b, c: 0., d: -b / a, e: 0., f: 0., }), [a, b, c] => Some(ParametricCurve { g, a, b, c: 0., d: -b / a, e: c, f: c, }), [a, b, c, d] => Some(ParametricCurve { g, a, b, c, d, e: 0., f: 0., }), [a, b, c, d, e, f] => Some(ParametricCurve { g, a, b, c, d, e, f, }), _ => None, } } fn is_linear(&self) -> bool { (self.g - 1.0).abs() < 1e-5 && (self.a - 1.0).abs() < 1e-5 && self.b.abs() < 1e-5 && self.c.abs() < 1e-5 } fn eval(&self, x: f32) -> f32 { if x < self.d { self.c * x + self.f } else { f_powf(self.a * x + self.b, self.g) + self.e } } #[allow(dead_code)] #[allow(clippy::many_single_char_names)] fn invert(&self) -> Option { // First check if the function is continuous at the cross-over point d. let d1 = f_powf(self.a * self.d + self.b, self.g) + self.e; let d2 = self.c * self.d + self.f; if (d1 - d2).abs() > 0.1 { return None; } let d = d1; // y = (a * x + b)^g + e // y - e = (a * x + b)^g // (y - e)^(1/g) = a*x + b // (y - e)^(1/g) - b = a*x // (y - e)^(1/g)/a - b/a = x // ((y - e)/a^g)^(1/g) - b/a = x // ((1/(a^g)) * y - e/(a^g))^(1/g) - b/a = x let a = 1. / f_powf(self.a, self.g); let b = -self.e / f_powf(self.a, self.g); let g = 1. / self.g; let e = -self.b / self.a; // y = c * x + f // y - f = c * x // y/c - f/c = x let (c, f); if d <= 0. { c = 1.; f = 0.; } else { c = 1. / self.c; f = -self.f / self.c; } // if self.d > 0. and self.c == 0 as is likely with type 1 and 2 parametric function // then c and f will not be finite. if !(g.is_finite() && a.is_finite() && b.is_finite() && c.is_finite() && d.is_finite() && e.is_finite() && f.is_finite()) { return None; } Some(ParametricCurve { g, a, b, c, d, e, f, }) } } #[inline] pub(crate) fn u8_fixed_8number_to_float(x: u16) -> f32 { // 0x0000 = 0. // 0x0100 = 1. // 0xffff = 255 + 255/256 (x as i32 as f64 / 256.0) as f32 } fn passthrough_table() -> Box<[f32; N]> { let mut gamma_table = Box::new([0f32; N]); let max_value = if T::FINITE { (1 << BIT_DEPTH) - 1 } else { T::NOT_FINITE_LINEAR_TABLE_SIZE - 1 }; let cap_values = if T::FINITE { (1u32 << BIT_DEPTH) as usize } else { T::NOT_FINITE_LINEAR_TABLE_SIZE }; assert!(cap_values <= N, "Invalid lut table construction"); let scale_value = 1f64 / max_value as f64; for (i, g) in gamma_table.iter_mut().enumerate().take(cap_values) { *g = (i as f64 * scale_value) as f32; } gamma_table } fn linear_forward_table( gamma: u16, ) -> Box<[f32; N]> { let mut gamma_table = Box::new([0f32; N]); let gamma_float: f32 = u8_fixed_8number_to_float(gamma); let max_value = if T::FINITE { (1 << BIT_DEPTH) - 1 } else { T::NOT_FINITE_LINEAR_TABLE_SIZE - 1 }; let cap_values = if T::FINITE { (1u32 << BIT_DEPTH) as usize } else { T::NOT_FINITE_LINEAR_TABLE_SIZE }; assert!(cap_values <= N, "Invalid lut table construction"); let scale_value = 1f64 / max_value as f64; for (i, g) in gamma_table.iter_mut().enumerate().take(cap_values) { *g = f_pow(i as f64 * scale_value, gamma_float as f64) as f32; } gamma_table } #[inline(always)] pub(crate) fn lut_interp_linear_float(x: f32, table: &[f32]) -> f32 { let value = x.min(1.).max(0.) * (table.len() - 1) as f32; let upper: i32 = value.ceil() as i32; let lower: i32 = value.floor() as i32; let diff = upper as f32 - value; let tu = table[upper as usize]; mlaf(neg_mlaf(tu, tu, diff), table[lower as usize], diff) } /// Lut interpolation float where values is already clamped #[inline(always)] #[allow(dead_code)] pub(crate) fn lut_interp_linear_float_clamped(x: f32, table: &[f32]) -> f32 { let value = x * (table.len() - 1) as f32; let upper: i32 = value.ceil() as i32; let lower: i32 = value.floor() as i32; let diff = upper as f32 - value; let tu = table[upper as usize]; mlaf(neg_mlaf(tu, tu, diff), table[lower as usize], diff) } #[inline] pub(crate) fn lut_interp_linear(input_value: f64, table: &[u16]) -> f32 { let mut input_value = input_value; if table.is_empty() { return input_value as f32; } input_value *= (table.len() - 1) as f64; let upper: i32 = input_value.ceil() as i32; let lower: i32 = input_value.floor() as i32; let w0 = table[(upper as usize).min(table.len() - 1)] as f64; let w1 = 1. - (upper as f64 - input_value); let w2 = table[(lower as usize).min(table.len() - 1)] as f64; let w3 = upper as f64 - input_value; let value: f32 = mlaf(w2 * w3, w0, w1) as f32; value * (1.0 / 65535.0) } fn linear_lut_interpolate( table: &[u16], ) -> Box<[f32; N]> { let mut gamma_table = Box::new([0f32; N]); let max_value = if T::FINITE { (1 << BIT_DEPTH) - 1 } else { T::NOT_FINITE_LINEAR_TABLE_SIZE - 1 }; let cap_values = if T::FINITE { (1u32 << BIT_DEPTH) as usize } else { T::NOT_FINITE_LINEAR_TABLE_SIZE }; assert!(cap_values <= N, "Invalid lut table construction"); let scale_value = 1f64 / max_value as f64; for (i, g) in gamma_table.iter_mut().enumerate().take(cap_values) { *g = lut_interp_linear(i as f64 * scale_value, table); } gamma_table } fn linear_curve_parametric( params: &[f32], ) -> Option> { let params = ParametricCurve::new(params)?; let mut gamma_table = Box::new([0f32; N]); let max_value = if T::FINITE { (1 << BIT_DEPTH) - 1 } else { T::NOT_FINITE_LINEAR_TABLE_SIZE - 1 }; let cap_value = if T::FINITE { 1 << BIT_DEPTH } else { T::NOT_FINITE_LINEAR_TABLE_SIZE }; let scale_value = 1f32 / max_value as f32; for (i, g) in gamma_table.iter_mut().enumerate().take(cap_value) { let x = i as f32 * scale_value; *g = m_clamp(params.eval(x), 0.0, 1.0); } Some(gamma_table) } fn linear_curve_parametric_s(params: &[f32]) -> Option> { let params = ParametricCurve::new(params)?; let mut gamma_table = Box::new([0f32; N]); let scale_value = 1f32 / (N - 1) as f32; for (i, g) in gamma_table.iter_mut().enumerate().take(N) { let x = i as f32 * scale_value; *g = m_clamp(params.eval(x), 0.0, 1.0); } Some(gamma_table) } pub(crate) fn make_gamma_linear_table< T: Default + Copy + 'static + PointeeSizeExpressible, const BUCKET: usize, const N: usize, >( bit_depth: usize, ) -> Box<[T; BUCKET]> where f32: AsPrimitive, { let mut table = Box::new([T::default(); BUCKET]); let max_range = if T::FINITE { (1f64 / ((N - 1) as f64 / (1 << bit_depth) as f64)) as f32 } else { (1f64 / ((N - 1) as f64)) as f32 }; for (v, output) in table.iter_mut().take(N).enumerate() { if T::FINITE { *output = (v as f32 * max_range).round().as_(); } else { *output = (v as f32 * max_range).as_(); } } table } #[inline] fn lut_interp_linear_gamma_impl< T: Default + Copy + 'static + PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( input_value: u32, table: &[u16], ) -> T where u32: AsPrimitive, { // Start scaling input_value to the length of the array: GAMMA_CAP*(length-1). // We'll divide out the GAMMA_CAP next let mut value: u32 = input_value * (table.len() - 1) as u32; let cap_value = N - 1; // equivalent to ceil(value/GAMMA_CAP) let upper: u32 = value.div_ceil(cap_value as u32); // equivalent to floor(value/GAMMA_CAP) let lower: u32 = value / cap_value as u32; // interp is the distance from upper to value scaled to 0..GAMMA_CAP let interp: u32 = value % cap_value as u32; let lw_value = table[lower as usize]; let hw_value = table[upper as usize]; // the table values range from 0..65535 value = mlaf( hw_value as u32 * interp, lw_value as u32, (N - 1) as u32 - interp, ); // 0..(65535*GAMMA_CAP) // round and scale let max_colors = if T::FINITE { (1 << BIT_DEPTH) - 1 } else { 1 }; value += (cap_value * 65535 / max_colors / 2) as u32; // scale to 0...max_colors value /= (cap_value * 65535 / max_colors) as u32; value.as_() } #[inline] fn lut_interp_linear_gamma_impl_f32< T: Default + Copy + 'static + PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( input_value: u32, table: &[u16], ) -> T where f32: AsPrimitive, { // Start scaling input_value to the length of the array: GAMMA_CAP*(length-1). // We'll divide out the GAMMA_CAP next let guess: u32 = input_value * (table.len() - 1) as u32; let cap_value = N - 1; // equivalent to ceil(value/GAMMA_CAP) let upper: u32 = guess.div_ceil(cap_value as u32); // equivalent to floor(value/GAMMA_CAP) let lower: u32 = guess / cap_value as u32; // interp is the distance from upper to value scaled to 0..GAMMA_CAP let interp: u32 = guess % cap_value as u32; let lw_value = table[lower as usize]; let hw_value = table[upper as usize]; // the table values range from 0..65535 let mut value = mlaf( hw_value as f32 * interp as f32, lw_value as f32, (N - 1) as f32 - interp as f32, ); // 0..(65535*GAMMA_CAP) // round and scale let max_colors = if T::FINITE { (1 << BIT_DEPTH) - 1 } else { 1 }; value /= (cap_value * 65535 / max_colors) as f32; value.as_() } #[doc(hidden)] pub trait GammaLutInterpolate { fn gamma_lut_interp< T: Default + Copy + 'static + PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( input_value: u32, table: &[u16], ) -> T where u32: AsPrimitive, f32: AsPrimitive; } macro_rules! gamma_lut_interp_fixed { ($i_type: ident) => { impl GammaLutInterpolate for $i_type { #[inline] fn gamma_lut_interp< T: Default + Copy + 'static + PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( input_value: u32, table: &[u16], ) -> T where u32: AsPrimitive, { lut_interp_linear_gamma_impl::(input_value, table) } } }; } gamma_lut_interp_fixed!(u8); gamma_lut_interp_fixed!(u16); macro_rules! gammu_lut_interp_float { ($f_type: ident) => { impl GammaLutInterpolate for $f_type { #[inline] fn gamma_lut_interp< T: Default + Copy + 'static + PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( input_value: u32, table: &[u16], ) -> T where f32: AsPrimitive, u32: AsPrimitive, { lut_interp_linear_gamma_impl_f32::(input_value, table) } } }; } gammu_lut_interp_float!(f32); gammu_lut_interp_float!(f64); pub(crate) fn make_gamma_lut< T: Default + Copy + 'static + PointeeSizeExpressible + GammaLutInterpolate, const BUCKET: usize, const N: usize, const BIT_DEPTH: usize, >( table: &[u16], ) -> Box<[T; BUCKET]> where u32: AsPrimitive, f32: AsPrimitive, { let mut new_table = Box::new([T::default(); BUCKET]); for (v, output) in new_table.iter_mut().take(N).enumerate() { *output = T::gamma_lut_interp::(v as u32, table); } new_table } #[inline] pub(crate) fn lut_interp_linear16(input_value: u16, table: &[u16]) -> u16 { // Start scaling input_value to the length of the array: 65535*(length-1). // We'll divide out the 65535 next let mut value: u32 = input_value as u32 * (table.len() as u32 - 1); let upper: u16 = value.div_ceil(65535) as u16; // equivalent to ceil(value/65535) let lower: u16 = (value / 65535) as u16; // equivalent to floor(value/65535) // interp is the distance from upper to value scaled to 0..65535 let interp: u32 = value % 65535; // 0..65535*65535 value = (table[upper as usize] as u32 * interp + table[lower as usize] as u32 * (65535 - interp)) / 65535; value as u16 } #[inline] pub(crate) fn lut_interp_linear16_boxed(input_value: u16, table: &[u16; N]) -> u16 { // Start scaling input_value to the length of the array: 65535*(length-1). // We'll divide out the 65535 next let mut value: u32 = input_value as u32 * (table.len() as u32 - 1); let upper: u16 = value.div_ceil(65535) as u16; // equivalent to ceil(value/65535) let lower: u16 = (value / 65535) as u16; // equivalent to floor(value/65535) // interp is the distance from upper to value scaled to 0..65535 let interp: u32 = value % 65535; // 0..65535*65535 value = (table[upper as usize] as u32 * interp + table[lower as usize] as u32 * (65535 - interp)) / 65535; value as u16 } fn make_gamma_pow_table< T: Default + Copy + 'static + PointeeSizeExpressible, const BUCKET: usize, const N: usize, >( gamma: f32, bit_depth: usize, ) -> Box<[T; BUCKET]> where f32: AsPrimitive, { let mut table = Box::new([T::default(); BUCKET]); let scale = 1f32 / (N - 1) as f32; let cap = ((1 << bit_depth) - 1) as f32; if T::FINITE { for (v, output) in table.iter_mut().take(N).enumerate() { *output = (cap * f_powf(v as f32 * scale, gamma)).round().as_(); } } else { for (v, output) in table.iter_mut().take(N).enumerate() { *output = (cap * f_powf(v as f32 * scale, gamma)).as_(); } } table } fn make_gamma_parametric_table< T: Default + Copy + 'static + PointeeSizeExpressible, const BUCKET: usize, const N: usize, const BIT_DEPTH: usize, >( parametric_curve: ParametricCurve, ) -> Box<[T; BUCKET]> where f32: AsPrimitive, { let mut table = Box::new([T::default(); BUCKET]); let scale = 1f32 / (N - 1) as f32; let cap = ((1 << BIT_DEPTH) - 1) as f32; if T::FINITE { for (v, output) in table.iter_mut().take(N).enumerate() { *output = (cap * parametric_curve.eval(v as f32 * scale)) .round() .as_(); } } else { for (v, output) in table.iter_mut().take(N).enumerate() { *output = (cap * parametric_curve.eval(v as f32 * scale)).as_(); } } table } #[inline] fn compare_parametric(src: &[f32], dst: &[f32]) -> bool { for (src, dst) in src.iter().zip(dst.iter()) { if (src - dst).abs() > 1e-4 { return false; } } true } fn lut_inverse_interp16(value: u16, lut_table: &[u16]) -> u16 { let mut l: i32 = 1; // 'int' Give spacing for negative values let mut r: i32 = 0x10000; let mut x: i32 = 0; let mut res: i32; let length = lut_table.len() as i32; let mut num_zeroes: i32 = 0; for &item in lut_table.iter() { if item == 0 { num_zeroes += 1 } else { break; } } if num_zeroes == 0 && value as i32 == 0 { return 0u16; } let mut num_of_polys: i32 = 0; for &item in lut_table.iter().rev() { if item == 0xffff { num_of_polys += 1 } else { break; } } // Does the curve belong to this case? if num_zeroes > 1 || num_of_polys > 1 { let a_0: i32; let b_0: i32; // Identify if value fall downto 0 or FFFF zone if value as i32 == 0 { return 0u16; } // if (Value == 0xFFFF) return 0xFFFF; // else restrict to valid zone if num_zeroes > 1 { a_0 = (num_zeroes - 1) * 0xffff / (length - 1); l = a_0 - 1 } if num_of_polys > 1 { b_0 = (length - 1 - num_of_polys) * 0xffff / (length - 1); r = b_0 + 1 } } if r <= l { // If this happens LutTable is not invertible return 0u16; } while r > l { x = (l + r) / 2; res = lut_interp_linear16((x - 1) as u16, lut_table) as i32; if res == value as i32 { // Found exact match. return (x - 1) as u16; } if res > value as i32 { r = x - 1 } else { l = x + 1 } } // Not found, should we interpolate? // Get surrounding nodes debug_assert!(x >= 1); let val2: f64 = (length - 1) as f64 * ((x - 1) as f64 / 65535.0); let cell0: i32 = val2.floor() as i32; let cell1: i32 = val2.ceil() as i32; if cell0 == cell1 { return x as u16; } let y0: f64 = lut_table[cell0 as usize] as f64; let x0: f64 = 65535.0 * cell0 as f64 / (length - 1) as f64; let y1: f64 = lut_table[cell1 as usize] as f64; let x1: f64 = 65535.0 * cell1 as f64 / (length - 1) as f64; let a: f64 = (y1 - y0) / (x1 - x0); let b: f64 = mlaf(y0, -a, x0); if a.abs() < 0.01f64 { return x as u16; } let f: f64 = (value as i32 as f64 - b) / a; if f < 0.0 { return 0u16; } if f >= 65535.0 { return 0xffffu16; } (f + 0.5f64).floor() as u16 } fn lut_inverse_interp16_boxed(value: u16, lut_table: &[u16; N]) -> u16 { let mut l: i32 = 1; // 'int' Give spacing for negative values let mut r: i32 = 0x10000; let mut x: i32 = 0; let mut res: i32; let length = lut_table.len() as i32; let mut num_zeroes: i32 = 0; for &item in lut_table.iter() { if item == 0 { num_zeroes += 1 } else { break; } } if num_zeroes == 0 && value as i32 == 0 { return 0u16; } let mut num_of_polys: i32 = 0; for &item in lut_table.iter().rev() { if item == 0xffff { num_of_polys += 1 } else { break; } } // Does the curve belong to this case? if num_zeroes > 1 || num_of_polys > 1 { let a_0: i32; let b_0: i32; // Identify if value fall downto 0 or FFFF zone if value as i32 == 0 { return 0u16; } // if (Value == 0xFFFF) return 0xFFFF; // else restrict to valid zone if num_zeroes > 1 { a_0 = (num_zeroes - 1) * 0xffff / (length - 1); l = a_0 - 1 } if num_of_polys > 1 { b_0 = (length - 1 - num_of_polys) * 0xffff / (length - 1); r = b_0 + 1 } } if r <= l { // If this happens LutTable is not invertible return 0u16; } while r > l { x = (l + r) / 2; res = lut_interp_linear16_boxed((x - 1) as u16, lut_table) as i32; if res == value as i32 { // Found exact match. return (x - 1) as u16; } if res > value as i32 { r = x - 1 } else { l = x + 1 } } // Not found, should we interpolate? // Get surrounding nodes debug_assert!(x >= 1); let val2: f64 = (length - 1) as f64 * ((x - 1) as f64 / 65535.0); let cell0: i32 = val2.floor() as i32; let cell1: i32 = val2.ceil() as i32; if cell0 == cell1 { return x as u16; } let y0: f64 = lut_table[cell0 as usize] as f64; let x0: f64 = 65535.0 * cell0 as f64 / (length - 1) as f64; let y1: f64 = lut_table[cell1 as usize] as f64; let x1: f64 = 65535.0 * cell1 as f64 / (length - 1) as f64; let a: f64 = (y1 - y0) / (x1 - x0); let b: f64 = mlaf(y0, -a, x0); if a.abs() < 0.01f64 { return x as u16; } let f: f64 = (value as i32 as f64 - b) / a; if f < 0.0 { return 0u16; } if f >= 65535.0 { return 0xffffu16; } (f + 0.5f64).floor() as u16 } fn invert_lut(table: &[u16], out_length: usize) -> Vec { // For now, we invert the lut by creating a lut of size out_length // and attempting to look up a value for each entry using lut_inverse_interp16 let mut output = vec![0u16; out_length]; let scale_value = 65535f64 / (out_length - 1) as f64; for (i, out) in output.iter_mut().enumerate() { let x: f64 = i as f64 * scale_value; let input: u16 = (x + 0.5f64).floor() as u16; *out = lut_inverse_interp16(input, table); } output } fn invert_lut_boxed(table: &[u16; N], out_length: usize) -> Vec { // For now, we invert the lut by creating a lut of size out_length // and attempting to look up a value for each entry using lut_inverse_interp16 let mut output = vec![0u16; out_length]; let scale_value = 65535f64 / (out_length - 1) as f64; for (i, out) in output.iter_mut().enumerate() { let x: f64 = i as f64 * scale_value; let input: u16 = (x + 0.5f64).floor() as u16; *out = lut_inverse_interp16_boxed(input, table); } output } impl ToneReprCurve { pub(crate) fn to_clut(&self) -> Result, CmsError> { match self { ToneReprCurve::Lut(lut) => { if lut.is_empty() { let passthrough_table = passthrough_table::(); Ok(passthrough_table.to_vec()) } else { Ok(lut .iter() .map(|&x| x as f32 * (1. / 65535.)) .collect::>()) } } ToneReprCurve::Parametric(_) => { let curve = self .build_linearize_table::() .ok_or(CmsError::InvalidTrcCurve)?; let max_value = f32::NOT_FINITE_LINEAR_TABLE_SIZE - 1; let sliced = &curve[..max_value]; Ok(sliced.to_vec()) } } } pub(crate) fn build_linearize_table< T: PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( &self, ) -> Option> { match self { ToneReprCurve::Parametric(params) => linear_curve_parametric::(params), ToneReprCurve::Lut(data) => match data.len() { 0 => Some(passthrough_table::()), 1 => Some(linear_forward_table::(data[0])), _ => Some(linear_lut_interpolate::(data)), }, } } pub(crate) fn build_gamma_table< T: Default + Copy + 'static + PointeeSizeExpressible + GammaLutInterpolate, const BUCKET: usize, const N: usize, const BIT_DEPTH: usize, >( &self, ) -> Option> where f32: AsPrimitive, u32: AsPrimitive, { match self { ToneReprCurve::Parametric(params) => { if params.len() == 5 { let srgb_params = vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]; let rec709_params = create_rec709_parametric(); let mut lc_params: [f32; 5] = [0.; 5]; for (dst, src) in lc_params.iter_mut().zip(params.iter()) { *dst = *src; } if compare_parametric(lc_params.as_slice(), srgb_params.as_slice()) { return Some( TransferCharacteristics::Srgb .make_gamma_table::(BIT_DEPTH), ); } if compare_parametric(lc_params.as_slice(), rec709_params.as_slice()) { return Some( TransferCharacteristics::Bt709 .make_gamma_table::(BIT_DEPTH), ); } } let parametric_curve = ParametricCurve::new(params); if let Some(v) = parametric_curve? .invert() .map(|x| make_gamma_parametric_table::(x)) { return Some(v); } let mut gamma_table_uint = Box::new([0; N]); let inverted_size: usize = N; let gamma_table = linear_curve_parametric_s::(params)?; for (&src, dst) in gamma_table.iter().zip(gamma_table_uint.iter_mut()) { *dst = (src * 65535f32) as u16; } let inverted = invert_lut_boxed(&gamma_table_uint, inverted_size); Some(make_gamma_lut::(&inverted)) } ToneReprCurve::Lut(data) => match data.len() { 0 => Some(make_gamma_linear_table::(BIT_DEPTH)), 1 => Some(make_gamma_pow_table::( 1. / u8_fixed_8number_to_float(data[0]), BIT_DEPTH, )), _ => { let mut inverted_size = data.len(); if inverted_size < 256 { inverted_size = 256 } let inverted = invert_lut(data, inverted_size); Some(make_gamma_lut::(&inverted)) } }, } } } impl ColorProfile { /// Produces LUT for 8 bit tone linearization pub fn build_8bit_lin_table( &self, trc: &Option, ) -> Result, CmsError> { trc.as_ref() .and_then(|trc| trc.build_linearize_table::()) .ok_or(CmsError::BuildTransferFunction) } /// Produces LUT for Gray transfer curve with N depth pub fn build_gray_linearize_table< T: PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( &self, ) -> Result, CmsError> { self.gray_trc .as_ref() .and_then(|trc| trc.build_linearize_table::()) .ok_or(CmsError::BuildTransferFunction) } /// Produces LUT for Red transfer curve with N depth pub fn build_r_linearize_table< T: PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( &self, use_cicp: bool, ) -> Result, CmsError> { if use_cicp { if let Some(tc) = self.cicp.as_ref().map(|c| c.transfer_characteristics) { if tc.has_transfer_curve() { return Ok(tc.make_linear_table::()); } } } self.red_trc .as_ref() .and_then(|trc| trc.build_linearize_table::()) .ok_or(CmsError::BuildTransferFunction) } /// Produces LUT for Green transfer curve with N depth pub fn build_g_linearize_table< T: PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( &self, use_cicp: bool, ) -> Result, CmsError> { if use_cicp { if let Some(tc) = self.cicp.as_ref().map(|c| c.transfer_characteristics) { if tc.has_transfer_curve() { return Ok(tc.make_linear_table::()); } } } self.green_trc .as_ref() .and_then(|trc| trc.build_linearize_table::()) .ok_or(CmsError::BuildTransferFunction) } /// Produces LUT for Blue transfer curve with N depth pub fn build_b_linearize_table< T: PointeeSizeExpressible, const N: usize, const BIT_DEPTH: usize, >( &self, use_cicp: bool, ) -> Result, CmsError> { if use_cicp { if let Some(tc) = self.cicp.as_ref().map(|c| c.transfer_characteristics) { if tc.has_transfer_curve() { return Ok(tc.make_linear_table::()); } } } self.blue_trc .as_ref() .and_then(|trc| trc.build_linearize_table::()) .ok_or(CmsError::BuildTransferFunction) } /// Build gamma table for 8 bit depth /// Only 4092 first bins are used and values scaled in 0..255 pub fn build_8bit_gamma_table( &self, trc: &Option, use_cicp: bool, ) -> Result, CmsError> { self.build_gamma_table::(trc, use_cicp) } /// Build gamma table for 10 bit depth /// Only 8192 first bins are used and values scaled in 0..1023 pub fn build_10bit_gamma_table( &self, trc: &Option, use_cicp: bool, ) -> Result, CmsError> { self.build_gamma_table::(trc, use_cicp) } /// Build gamma table for 12 bit depth /// Only 16384 first bins are used and values scaled in 0..4095 pub fn build_12bit_gamma_table( &self, trc: &Option, use_cicp: bool, ) -> Result, CmsError> { self.build_gamma_table::(trc, use_cicp) } /// Build gamma table for 16 bit depth /// Only 16384 first bins are used and values scaled in 0..65535 pub fn build_16bit_gamma_table( &self, trc: &Option, use_cicp: bool, ) -> Result, CmsError> { self.build_gamma_table::(trc, use_cicp) } /// Builds gamma table checking CICP for Transfer characteristics first. pub fn build_gamma_table< T: Default + Copy + 'static + PointeeSizeExpressible + GammaLutInterpolate, const BUCKET: usize, const N: usize, const BIT_DEPTH: usize, >( &self, trc: &Option, use_cicp: bool, ) -> Result, CmsError> where f32: AsPrimitive, u32: AsPrimitive, { if use_cicp { if let Some(tc) = self.cicp.as_ref().map(|c| c.transfer_characteristics) { if tc.has_transfer_curve() { return Ok(tc.make_gamma_table::(BIT_DEPTH)); } } } trc.as_ref() .and_then(|trc| trc.build_gamma_table::()) .ok_or(CmsError::BuildTransferFunction) } /// Checks if profile gamma can work in extended precision and we have implementation for this pub(crate) fn try_extended_gamma_evaluator( &self, ) -> Option> { if let Some(tc) = self.cicp.as_ref().map(|c| c.transfer_characteristics) { if tc.has_transfer_curve() { return Some(Box::new(ToneCurveCicpEvaluator { rgb_trc: tc.extended_gamma_tristimulus(), trc: tc.extended_gamma_single(), })); } } if !self.are_all_trc_the_same() { return None; } let reference_trc = if self.color_space == DataColorSpace::Gray { self.gray_trc.as_ref() } else { self.red_trc.as_ref() }; if let Some(red_trc) = reference_trc { return Self::make_gamma_evaluator_all_the_same(red_trc); } None } fn make_gamma_evaluator_all_the_same( red_trc: &ToneReprCurve, ) -> Option> { match red_trc { ToneReprCurve::Lut(lut) => { if lut.is_empty() { return Some(Box::new(ToneCurveEvaluatorLinear {})); } if lut.len() == 1 { let gamma = 1. / u8_fixed_8number_to_float(lut[0]); return Some(Box::new(ToneCurveEvaluatorPureGamma { gamma })); } None } ToneReprCurve::Parametric(params) => { if params.len() == 5 { let srgb_params = vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]; let rec709_params = create_rec709_parametric(); let mut lc_params: [f32; 5] = [0.; 5]; for (dst, src) in lc_params.iter_mut().zip(params.iter()) { *dst = *src; } if compare_parametric(lc_params.as_slice(), srgb_params.as_slice()) { return Some(Box::new(ToneCurveCicpEvaluator { rgb_trc: TransferCharacteristics::Srgb.extended_gamma_tristimulus(), trc: TransferCharacteristics::Srgb.extended_gamma_single(), })); } if compare_parametric(lc_params.as_slice(), rec709_params.as_slice()) { return Some(Box::new(ToneCurveCicpEvaluator { rgb_trc: TransferCharacteristics::Bt709.extended_gamma_tristimulus(), trc: TransferCharacteristics::Bt709.extended_gamma_single(), })); } } let parametric_curve = ParametricCurve::new(params); if let Some(v) = parametric_curve?.invert() { return Some(Box::new(ToneCurveParametricEvaluator { parametric: v })); } None } } } /// Check if all TRC are the same pub(crate) fn are_all_trc_the_same(&self) -> bool { if self.color_space == DataColorSpace::Gray { return true; } if let (Some(red_trc), Some(green_trc), Some(blue_trc)) = (&self.red_trc, &self.green_trc, &self.blue_trc) { if !matches!( (red_trc, green_trc, blue_trc), ( ToneReprCurve::Lut(_), ToneReprCurve::Lut(_), ToneReprCurve::Lut(_), ) | ( ToneReprCurve::Parametric(_), ToneReprCurve::Parametric(_), ToneReprCurve::Parametric(_) ) ) { return false; } if let (ToneReprCurve::Lut(lut0), ToneReprCurve::Lut(lut1), ToneReprCurve::Lut(lut2)) = (red_trc, green_trc, blue_trc) { if lut0 == lut1 || lut1 == lut2 { return true; } } if let ( ToneReprCurve::Parametric(lut0), ToneReprCurve::Parametric(lut1), ToneReprCurve::Parametric(lut2), ) = (red_trc, green_trc, blue_trc) { if lut0 == lut1 || lut1 == lut2 { return true; } } } false } /// Checks if profile is matrix shaper, have same TRC and TRC is linear. pub(crate) fn is_linear_matrix_shaper(&self) -> bool { if !self.is_matrix_shaper() { return false; } if !self.are_all_trc_the_same() { return false; } if let Some(red_trc) = &self.red_trc { return match red_trc { ToneReprCurve::Lut(lut) => { if lut.is_empty() { return true; } if is_curve_linear16(lut) { return true; } false } ToneReprCurve::Parametric(params) => { if let Some(curve) = ParametricCurve::new(params) { return curve.is_linear(); } false } }; } false } /// Checks if profile linearization can work in extended precision and we have implementation for this pub(crate) fn try_extended_linearizing_evaluator( &self, ) -> Option> { if let Some(tc) = self.cicp.as_ref().map(|c| c.transfer_characteristics) { if tc.has_transfer_curve() { return Some(Box::new(ToneCurveCicpEvaluator { rgb_trc: tc.extended_linear_tristimulus(), trc: tc.extended_linear_single(), })); } } if !self.are_all_trc_the_same() { return None; } let reference_trc = if self.color_space == DataColorSpace::Gray { self.gray_trc.as_ref() } else { self.red_trc.as_ref() }; if let Some(red_trc) = reference_trc { if let Some(value) = Self::make_linear_curve_evaluator_all_the_same(red_trc) { return value; } } None } fn make_linear_curve_evaluator_all_the_same( evaluator_curve: &ToneReprCurve, ) -> Option>> { match evaluator_curve { ToneReprCurve::Lut(lut) => { if lut.is_empty() { return Some(Some(Box::new(ToneCurveEvaluatorLinear {}))); } if lut.len() == 1 { let gamma = u8_fixed_8number_to_float(lut[0]); return Some(Some(Box::new(ToneCurveEvaluatorPureGamma { gamma }))); } } ToneReprCurve::Parametric(params) => { if params.len() == 5 { let srgb_params = vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]; let rec709_params = create_rec709_parametric(); let mut lc_params: [f32; 5] = [0.; 5]; for (dst, src) in lc_params.iter_mut().zip(params.iter()) { *dst = *src; } if compare_parametric(lc_params.as_slice(), srgb_params.as_slice()) { return Some(Some(Box::new(ToneCurveCicpEvaluator { rgb_trc: TransferCharacteristics::Srgb.extended_linear_tristimulus(), trc: TransferCharacteristics::Srgb.extended_linear_single(), }))); } if compare_parametric(lc_params.as_slice(), rec709_params.as_slice()) { return Some(Some(Box::new(ToneCurveCicpEvaluator { rgb_trc: TransferCharacteristics::Bt709.extended_linear_tristimulus(), trc: TransferCharacteristics::Bt709.extended_linear_single(), }))); } } let parametric_curve = ParametricCurve::new(params); if let Some(v) = parametric_curve { return Some(Some(Box::new(ToneCurveParametricEvaluator { parametric: v, }))); } } } None } } pub(crate) struct ToneCurveCicpEvaluator { rgb_trc: fn(Rgb) -> Rgb, trc: fn(f32) -> f32, } pub(crate) struct ToneCurveParametricEvaluator { parametric: ParametricCurve, } pub(crate) struct ToneCurveEvaluatorPureGamma { gamma: f32, } pub(crate) struct ToneCurveEvaluatorLinear {} impl ToneCurveEvaluator for ToneCurveCicpEvaluator { fn evaluate_tristimulus(&self, rgb: Rgb) -> Rgb { (self.rgb_trc)(rgb) } fn evaluate_value(&self, value: f32) -> f32 { (self.trc)(value) } } impl ToneCurveEvaluator for ToneCurveParametricEvaluator { fn evaluate_tristimulus(&self, rgb: Rgb) -> Rgb { Rgb::new( self.parametric.eval(rgb.r), self.parametric.eval(rgb.g), self.parametric.eval(rgb.b), ) } fn evaluate_value(&self, value: f32) -> f32 { self.parametric.eval(value) } } impl ToneCurveEvaluator for ToneCurveEvaluatorPureGamma { fn evaluate_tristimulus(&self, rgb: Rgb) -> Rgb { Rgb::new( dirty_powf(rgb.r, self.gamma), dirty_powf(rgb.g, self.gamma), dirty_powf(rgb.b, self.gamma), ) } fn evaluate_value(&self, value: f32) -> f32 { dirty_powf(value, self.gamma) } } impl ToneCurveEvaluator for ToneCurveEvaluatorLinear { fn evaluate_tristimulus(&self, rgb: Rgb) -> Rgb { rgb } fn evaluate_value(&self, value: f32) -> f32 { value } } pub trait ToneCurveEvaluator { fn evaluate_tristimulus(&self, rgb: Rgb) -> Rgb; fn evaluate_value(&self, value: f32) -> f32; } moxcms-0.7.7/src/writer.rs000064400000000000000000000716041046102023000136260ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::profile::{LutDataType, ProfileHeader}; use crate::tag::{TAG_SIZE, Tag, TagTypeDefinition}; use crate::trc::ToneReprCurve; use crate::{ CicpProfile, CmsError, ColorDateTime, ColorProfile, DataColorSpace, LocalizableString, LutMultidimensionalType, LutStore, LutType, LutWarehouse, Matrix3d, ProfileClass, ProfileSignature, ProfileText, ProfileVersion, Vector3d, Xyzd, }; pub(crate) trait FloatToFixedS15Fixed16 { fn to_s15_fixed16(self) -> i32; } pub(crate) trait FloatToFixedU8Fixed8 { fn to_u8_fixed8(self) -> u16; } // pub(crate) trait FloatToFixedU16 { // fn to_fixed_u16(self) -> u16; // } // impl FloatToFixedU16 for f32 { // #[inline] // fn to_fixed_u16(self) -> u16 { // const SCALE: f64 = (1 << 16) as f64; // (self as f64 * SCALE + 0.5) // .floor() // .clamp(u16::MIN as f64, u16::MAX as f64) as u16 // } // } impl FloatToFixedS15Fixed16 for f32 { #[inline] fn to_s15_fixed16(self) -> i32 { const SCALE: f64 = (1 << 16) as f64; (self as f64 * SCALE + 0.5) .floor() .clamp(i32::MIN as f64, i32::MAX as f64) as i32 } } impl FloatToFixedS15Fixed16 for f64 { #[inline] fn to_s15_fixed16(self) -> i32 { const SCALE: f64 = (1 << 16) as f64; (self * SCALE + 0.5) .floor() .clamp(i32::MIN as f64, i32::MAX as f64) as i32 } } #[inline] fn write_u32_be(into: &mut Vec, value: u32) { let bytes = value.to_be_bytes(); into.push(bytes[0]); into.push(bytes[1]); into.push(bytes[2]); into.push(bytes[3]); } #[inline] pub(crate) fn write_u16_be(into: &mut Vec, value: u16) { let bytes = value.to_be_bytes(); into.push(bytes[0]); into.push(bytes[1]); } #[inline] fn write_i32_be(into: &mut Vec, value: i32) { let bytes = value.to_be_bytes(); into.push(bytes[0]); into.push(bytes[1]); into.push(bytes[2]); into.push(bytes[3]); } fn first_two_ascii_bytes(s: &String) -> [u8; 2] { let bytes = s.as_bytes(); if bytes.len() >= 2 { bytes[0..2].try_into().unwrap() } else if bytes.len() == 1 { let vec = vec![bytes[0], 0u8]; vec.try_into().unwrap() } else { let vec = vec![0u8, 0u8]; vec.try_into().unwrap() } } /// Writes Multi Localized Unicode #[inline] fn write_mluc(into: &mut Vec, strings: &[LocalizableString]) -> usize { assert!(!strings.is_empty()); let start = into.len(); let tag_def: u32 = TagTypeDefinition::MultiLocalizedUnicode.into(); write_u32_be(into, tag_def); write_u32_be(into, 0); let number_of_records = strings.len(); write_u32_be(into, number_of_records as u32); write_u32_be(into, 12); // Record size, must be 12 let lang = first_two_ascii_bytes(&strings[0].language); into.extend_from_slice(&lang); let country = first_two_ascii_bytes(&strings[0].country); into.extend_from_slice(&country); let first_string_len = strings[0].value.len() * 2; write_u32_be(into, first_string_len as u32); let mut first_string_offset = 16 + 12 * strings.len(); write_u32_be(into, first_string_offset as u32); first_string_offset += first_string_len; for record in strings.iter().skip(1) { let lang = first_two_ascii_bytes(&record.language); into.extend_from_slice(&lang); let country = first_two_ascii_bytes(&record.country); into.extend_from_slice(&country); let first_string_len = record.value.len() * 2; write_u32_be(into, first_string_len as u32); write_u32_be(into, first_string_offset as u32); first_string_offset += first_string_len; } for record in strings.iter() { for chunk in record.value.encode_utf16() { write_u16_be(into, chunk); } } let end = into.len(); end - start } #[inline] fn write_string_value(into: &mut Vec, text: &ProfileText) -> usize { match text { ProfileText::PlainString(text) => { let vec = vec![LocalizableString { language: "en".to_string(), country: "US".to_string(), value: text.clone(), }]; write_mluc(into, &vec) } ProfileText::Localizable(localizable) => { if localizable.is_empty() { return 0; } write_mluc(into, localizable) } ProfileText::Description(description) => { let vec = vec![LocalizableString { language: "en".to_string(), country: "US".to_string(), value: description.unicode_string.clone(), }]; write_mluc(into, &vec) } } } #[inline] fn write_xyz_tag_value(into: &mut Vec, xyz: Xyzd) { let tag_definition: u32 = TagTypeDefinition::Xyz.into(); write_u32_be(into, tag_definition); write_u32_be(into, 0); let x_fixed = xyz.x.to_s15_fixed16(); write_i32_be(into, x_fixed); let y_fixed = xyz.y.to_s15_fixed16(); write_i32_be(into, y_fixed); let z_fixed = xyz.z.to_s15_fixed16(); write_i32_be(into, z_fixed); } #[inline] fn write_tag_entry(into: &mut Vec, tag: Tag, tag_entry: usize, tag_size: usize) { let tag_value: u32 = tag.into(); write_u32_be(into, tag_value); write_u32_be(into, tag_entry as u32); write_u32_be(into, tag_size as u32); } fn write_trc_entry(into: &mut Vec, trc: &ToneReprCurve) -> Result { match trc { ToneReprCurve::Lut(lut) => { let curv: u32 = TagTypeDefinition::LutToneCurve.into(); write_u32_be(into, curv); write_u32_be(into, 0); write_u32_be(into, lut.len() as u32); for item in lut.iter() { write_u16_be(into, *item); } Ok(12 + lut.len() * 2) } ToneReprCurve::Parametric(parametric_curve) => { if parametric_curve.len() > 7 || parametric_curve.len() == 6 || parametric_curve.len() == 2 { return Err(CmsError::InvalidProfile); } let para: u32 = TagTypeDefinition::ParametricToneCurve.into(); write_u32_be(into, para); write_u32_be(into, 0); if parametric_curve.len() == 1 { write_u16_be(into, 0); } else if parametric_curve.len() == 3 { write_u16_be(into, 1); } else if parametric_curve.len() == 4 { write_u16_be(into, 2); } else if parametric_curve.len() == 5 { write_u16_be(into, 3); } else if parametric_curve.len() == 7 { write_u16_be(into, 4); } write_u16_be(into, 0); for item in parametric_curve.iter() { write_i32_be(into, item.to_s15_fixed16()); } Ok(12 + 4 * parametric_curve.len()) } } } #[inline] fn write_cicp_entry(into: &mut Vec, cicp: &CicpProfile) { let cicp_tag: u32 = TagTypeDefinition::Cicp.into(); write_u32_be(into, cicp_tag); write_u32_be(into, 0); into.push(cicp.color_primaries as u8); into.push(cicp.transfer_characteristics as u8); into.push(cicp.matrix_coefficients as u8); into.push(if cicp.full_range { 1 } else { 0 }); } fn write_chad(into: &mut Vec, matrix: Matrix3d) { let arr_type: u32 = TagTypeDefinition::S15Fixed16Array.into(); write_u32_be(into, arr_type); write_u32_be(into, 0); write_matrix3d(into, matrix); } #[inline] fn write_matrix3d(into: &mut Vec, v: Matrix3d) { write_i32_be(into, v.v[0][0].to_s15_fixed16()); write_i32_be(into, v.v[0][1].to_s15_fixed16()); write_i32_be(into, v.v[0][2].to_s15_fixed16()); write_i32_be(into, v.v[1][0].to_s15_fixed16()); write_i32_be(into, v.v[1][1].to_s15_fixed16()); write_i32_be(into, v.v[1][2].to_s15_fixed16()); write_i32_be(into, v.v[2][0].to_s15_fixed16()); write_i32_be(into, v.v[2][1].to_s15_fixed16()); write_i32_be(into, v.v[2][2].to_s15_fixed16()); } #[inline] fn write_vector3d(into: &mut Vec, v: Vector3d) { write_i32_be(into, v.v[0].to_s15_fixed16()); write_i32_be(into, v.v[1].to_s15_fixed16()); write_i32_be(into, v.v[2].to_s15_fixed16()); } #[inline] fn write_lut_entry(into: &mut Vec, lut: &LutDataType) -> Result { if !lut.has_same_kind() { return Err(CmsError::InvalidProfile); } let start = into.len(); let lut16_tag: u32 = match &lut.input_table { LutStore::Store8(_) => LutType::Lut8.into(), LutStore::Store16(_) => LutType::Lut16.into(), }; write_u32_be(into, lut16_tag); write_u32_be(into, 0); into.push(lut.num_input_channels); into.push(lut.num_output_channels); into.push(lut.num_clut_grid_points); into.push(0); write_matrix3d(into, lut.matrix); write_u16_be(into, lut.num_input_table_entries); write_u16_be(into, lut.num_output_table_entries); match &lut.input_table { LutStore::Store8(input_table) => { for &item in input_table.iter() { into.push(item); } } LutStore::Store16(input_table) => { for &item in input_table.iter() { write_u16_be(into, item); } } } match &lut.clut_table { LutStore::Store8(input_table) => { for &item in input_table.iter() { into.push(item); } } LutStore::Store16(input_table) => { for &item in input_table.iter() { write_u16_be(into, item); } } } match &lut.output_table { LutStore::Store8(input_table) => { for &item in input_table.iter() { into.push(item); } } LutStore::Store16(input_table) => { for &item in input_table.iter() { write_u16_be(into, item); } } } let end = into.len(); Ok(end - start) } #[inline] fn write_mab_entry( into: &mut Vec, lut: &LutMultidimensionalType, is_a_to_b: bool, ) -> Result { let start = into.len(); let lut16_tag: u32 = if is_a_to_b { LutType::LutMab.into() } else { LutType::LutMba.into() }; write_u32_be(into, lut16_tag); write_u32_be(into, 0); into.push(lut.num_input_channels); into.push(lut.num_output_channels); write_u16_be(into, 0); let mut working_offset = 32usize; let mut data = Vec::new(); // Offset to "B curves" if !lut.b_curves.is_empty() { while working_offset % 4 != 0 { data.push(0); working_offset += 1; } write_u32_be(into, working_offset as u32); for trc in lut.b_curves.iter() { let curve_size = write_trc_entry(&mut data, trc)?; working_offset += curve_size; while working_offset % 4 != 0 { data.push(0); working_offset += 1; } } } else { write_u32_be(into, 0); } // Offset to matrix if !lut.m_curves.is_empty() { while working_offset % 4 != 0 { data.push(0); working_offset += 1; } write_u32_be(into, working_offset as u32); write_matrix3d(&mut data, lut.matrix); write_vector3d(&mut data, lut.bias); working_offset += 9 * 4 + 3 * 4; // Offset to "M curves" write_u32_be(into, working_offset as u32); for trc in lut.m_curves.iter() { let curve_size = write_trc_entry(&mut data, trc)?; working_offset += curve_size; while working_offset % 4 != 0 { data.push(0); working_offset += 1; } } } else { // Offset to matrix write_u32_be(into, 0); // Offset to "M curves" write_u32_be(into, 0); } let mut clut_start = data.len(); // Offset to CLUT if let Some(clut) = &lut.clut { while working_offset % 4 != 0 { data.push(0); working_offset += 1; } clut_start = data.len(); write_u32_be(into, working_offset as u32); // Writing CLUT for &pt in lut.grid_points.iter() { data.push(pt); } data.push(match clut { LutStore::Store8(_) => 1, LutStore::Store16(_) => 2, }); // Entry size data.push(0); data.push(0); data.push(0); match clut { LutStore::Store8(store) => { for &element in store.iter() { data.push(element) } } LutStore::Store16(store) => { for &element in store.iter() { write_u16_be(&mut data, element); } } } } else { write_u32_be(into, 0); } let clut_size = data.len() - clut_start; working_offset += clut_size; // Offset to "A curves" if !lut.a_curves.is_empty() { while working_offset % 4 != 0 { data.push(0); working_offset += 1; } write_u32_be(into, working_offset as u32); for trc in lut.a_curves.iter() { let curve_size = write_trc_entry(&mut data, trc)?; working_offset += curve_size; while working_offset % 4 != 0 { data.push(0); working_offset += 1; } } } else { write_u32_be(into, 0); } into.extend(data); let end = into.len(); Ok(end - start) } fn write_lut(into: &mut Vec, lut: &LutWarehouse, is_a_to_b: bool) -> Result { match lut { LutWarehouse::Lut(lut) => Ok(write_lut_entry(into, lut)?), LutWarehouse::Multidimensional(mab) => write_mab_entry(into, mab, is_a_to_b), } } impl ProfileHeader { fn encode(&self) -> Vec { let mut encoder: Vec = Vec::with_capacity(size_of::()); write_u32_be(&mut encoder, self.size); // Size write_u32_be(&mut encoder, 0); // CMM Type write_u32_be(&mut encoder, self.version.into()); // Version Number Type write_u32_be(&mut encoder, self.profile_class.into()); // Profile class write_u32_be(&mut encoder, self.data_color_space.into()); // Data color space write_u32_be(&mut encoder, self.pcs.into()); // PCS self.creation_date_time.encode(&mut encoder); // Date time write_u32_be(&mut encoder, self.signature.into()); // Profile signature write_u32_be(&mut encoder, self.platform); write_u32_be(&mut encoder, self.flags); write_u32_be(&mut encoder, self.device_manufacturer); write_u32_be(&mut encoder, self.device_model); for &i in self.device_attributes.iter() { encoder.push(i); } write_u32_be(&mut encoder, self.rendering_intent.into()); write_i32_be(&mut encoder, self.illuminant.x.to_s15_fixed16()); write_i32_be(&mut encoder, self.illuminant.y.to_s15_fixed16()); write_i32_be(&mut encoder, self.illuminant.z.to_s15_fixed16()); write_u32_be(&mut encoder, self.creator); for &i in self.profile_id.iter() { encoder.push(i); } for &i in self.reserved.iter() { encoder.push(i); } write_u32_be(&mut encoder, self.tag_count); encoder } } impl ColorProfile { fn writable_tags_count(&self) -> usize { let mut tags_count = 0usize; if self.red_colorant != Xyzd::default() { tags_count += 1; } if self.green_colorant != Xyzd::default() { tags_count += 1; } if self.blue_colorant != Xyzd::default() { tags_count += 1; } if self.red_trc.is_some() { tags_count += 1; } if self.green_trc.is_some() { tags_count += 1; } if self.blue_trc.is_some() { tags_count += 1; } if self.gray_trc.is_some() { tags_count += 1; } if self.cicp.is_some() { tags_count += 1; } if self.media_white_point.is_some() { tags_count += 1; } if self.gamut.is_some() { tags_count += 1; } if self.chromatic_adaptation.is_some() { tags_count += 1; } if self.lut_a_to_b_perceptual.is_some() { tags_count += 1; } if self.lut_a_to_b_colorimetric.is_some() { tags_count += 1; } if self.lut_a_to_b_saturation.is_some() { tags_count += 1; } if self.lut_b_to_a_perceptual.is_some() { tags_count += 1; } if self.lut_b_to_a_colorimetric.is_some() { tags_count += 1; } if self.lut_b_to_a_saturation.is_some() { tags_count += 1; } if self.luminance.is_some() { tags_count += 1; } if let Some(description) = &self.description { if description.has_values() { tags_count += 1; } } if let Some(copyright) = &self.copyright { if copyright.has_values() { tags_count += 1; } } if let Some(vd) = &self.viewing_conditions_description { if vd.has_values() { tags_count += 1; } } if let Some(vd) = &self.device_model { if vd.has_values() { tags_count += 1; } } if let Some(vd) = &self.device_manufacturer { if vd.has_values() { tags_count += 1; } } tags_count } /// Encodes profile pub fn encode(&self) -> Result, CmsError> { let mut entries = Vec::new(); let tags_count = self.writable_tags_count(); let mut tags = Vec::with_capacity(TAG_SIZE * tags_count); let mut base_offset = size_of::() + TAG_SIZE * tags_count; if self.red_colorant != Xyzd::default() { write_tag_entry(&mut tags, Tag::RedXyz, base_offset, 20); write_xyz_tag_value(&mut entries, self.red_colorant); base_offset += 20; } if self.green_colorant != Xyzd::default() { write_tag_entry(&mut tags, Tag::GreenXyz, base_offset, 20); write_xyz_tag_value(&mut entries, self.green_colorant); base_offset += 20; } if self.blue_colorant != Xyzd::default() { write_tag_entry(&mut tags, Tag::BlueXyz, base_offset, 20); write_xyz_tag_value(&mut entries, self.blue_colorant); base_offset += 20; } if let Some(chad) = self.chromatic_adaptation { write_tag_entry(&mut tags, Tag::ChromaticAdaptation, base_offset, 8 + 9 * 4); write_chad(&mut entries, chad); base_offset += 8 + 9 * 4; } if let Some(trc) = &self.red_trc { let entry_size = write_trc_entry(&mut entries, trc)?; write_tag_entry(&mut tags, Tag::RedToneReproduction, base_offset, entry_size); base_offset += entry_size; } if let Some(trc) = &self.green_trc { let entry_size = write_trc_entry(&mut entries, trc)?; write_tag_entry( &mut tags, Tag::GreenToneReproduction, base_offset, entry_size, ); base_offset += entry_size; } if let Some(trc) = &self.blue_trc { let entry_size = write_trc_entry(&mut entries, trc)?; write_tag_entry( &mut tags, Tag::BlueToneReproduction, base_offset, entry_size, ); base_offset += entry_size; } if let Some(trc) = &self.gray_trc { let entry_size = write_trc_entry(&mut entries, trc)?; write_tag_entry( &mut tags, Tag::GreyToneReproduction, base_offset, entry_size, ); base_offset += entry_size; } if self.white_point != Xyzd::default() { write_tag_entry(&mut tags, Tag::MediaWhitePoint, base_offset, 20); write_xyz_tag_value(&mut entries, self.white_point); base_offset += 20; } let has_cicp = self.cicp.is_some(); // This tag may be present when the data colour space in the profile header is RGB, YCbCr, or XYZ, and the // profile class in the profile header is Input or Display. The tag shall not be present for other data colour spaces // or profile classes indicated in the profile header. if let Some(cicp) = &self.cicp { if (self.profile_class == ProfileClass::InputDevice || self.profile_class == ProfileClass::DisplayDevice) && (self.color_space == DataColorSpace::Rgb || self.color_space == DataColorSpace::YCbr || self.color_space == DataColorSpace::Xyz) { write_tag_entry(&mut tags, Tag::CodeIndependentPoints, base_offset, 12); write_cicp_entry(&mut entries, cicp); base_offset += 12; } } if let Some(lut) = &self.lut_a_to_b_perceptual { let entry_size = write_lut(&mut entries, lut, true)?; write_tag_entry( &mut tags, Tag::DeviceToPcsLutPerceptual, base_offset, entry_size, ); base_offset += entry_size; } if let Some(lut) = &self.lut_a_to_b_colorimetric { let entry_size = write_lut(&mut entries, lut, true)?; write_tag_entry( &mut tags, Tag::DeviceToPcsLutColorimetric, base_offset, entry_size, ); base_offset += entry_size; } if let Some(lut) = &self.lut_a_to_b_saturation { let entry_size = write_lut(&mut entries, lut, true)?; write_tag_entry( &mut tags, Tag::DeviceToPcsLutSaturation, base_offset, entry_size, ); base_offset += entry_size; } if let Some(lut) = &self.lut_b_to_a_perceptual { let entry_size = write_lut(&mut entries, lut, false)?; write_tag_entry( &mut tags, Tag::PcsToDeviceLutPerceptual, base_offset, entry_size, ); base_offset += entry_size; } if let Some(lut) = &self.lut_b_to_a_colorimetric { let entry_size = write_lut(&mut entries, lut, false)?; write_tag_entry( &mut tags, Tag::PcsToDeviceLutColorimetric, base_offset, entry_size, ); base_offset += entry_size; } if let Some(lut) = &self.lut_b_to_a_saturation { let entry_size = write_lut(&mut entries, lut, false)?; write_tag_entry( &mut tags, Tag::PcsToDeviceLutSaturation, base_offset, entry_size, ); base_offset += entry_size; } if let Some(lut) = &self.gamut { let entry_size = write_lut(&mut entries, lut, false)?; write_tag_entry(&mut tags, Tag::Gamut, base_offset, entry_size); base_offset += entry_size; } if let Some(luminance) = self.luminance { write_tag_entry(&mut tags, Tag::Luminance, base_offset, 20); write_xyz_tag_value(&mut entries, luminance); base_offset += 20; } if let Some(description) = &self.description { if description.has_values() { let entry_size = write_string_value(&mut entries, description); write_tag_entry(&mut tags, Tag::ProfileDescription, base_offset, entry_size); base_offset += entry_size; } } if let Some(copyright) = &self.copyright { if copyright.has_values() { let entry_size = write_string_value(&mut entries, copyright); write_tag_entry(&mut tags, Tag::Copyright, base_offset, entry_size); base_offset += entry_size; } } if let Some(vd) = &self.viewing_conditions_description { if vd.has_values() { let entry_size = write_string_value(&mut entries, vd); write_tag_entry( &mut tags, Tag::ViewingConditionsDescription, base_offset, entry_size, ); base_offset += entry_size; } } if let Some(vd) = &self.device_model { if vd.has_values() { let entry_size = write_string_value(&mut entries, vd); write_tag_entry(&mut tags, Tag::DeviceModel, base_offset, entry_size); base_offset += entry_size; } } if let Some(vd) = &self.device_manufacturer { if vd.has_values() { let entry_size = write_string_value(&mut entries, vd); write_tag_entry(&mut tags, Tag::DeviceManufacturer, base_offset, entry_size); // base_offset += entry_size; } } tags.extend(entries); let profile_header = ProfileHeader { size: size_of::() as u32 + tags.len() as u32, pcs: self.pcs, profile_class: self.profile_class, rendering_intent: self.rendering_intent, cmm_type: 0, version: if has_cicp { ProfileVersion::V4_3 } else { ProfileVersion::V4_0 }, data_color_space: self.color_space, creation_date_time: ColorDateTime::now(), signature: ProfileSignature::Acsp, platform: 0u32, flags: 0u32, device_manufacturer: 0u32, device_model: 0u32, device_attributes: [0u8; 8], illuminant: self.white_point.to_xyz(), creator: 0u32, profile_id: [0u8; 16], reserved: [0u8; 28], tag_count: tags_count as u32, }; let mut header = profile_header.encode(); header.extend(tags); Ok(header) } } impl FloatToFixedU8Fixed8 for f32 { #[inline] fn to_u8_fixed8(self) -> u16 { if self > 255.0 + 255.0 / 256f32 { 0xffffu16 } else if self < 0.0 { 0u16 } else { (self * 256.0 + 0.5).floor() as u16 } } } #[cfg(test)] mod tests { use super::*; #[test] fn to_u8_fixed8() { assert_eq!(0, 0f32.to_u8_fixed8()); assert_eq!(0x0100, 1f32.to_u8_fixed8()); assert_eq!(u16::MAX, (255f32 + (255f32 / 256f32)).to_u8_fixed8()); } #[test] fn to_s15_fixed16() { assert_eq!(0x80000000u32 as i32, (-32768f32).to_s15_fixed16()); assert_eq!(0, 0f32.to_s15_fixed16()); assert_eq!(0x10000, 1.0f32.to_s15_fixed16()); assert_eq!( i32::MAX, (32767f32 + (65535f32 / 65536f32)).to_s15_fixed16() ); } } moxcms-0.7.7/src/xyy.rs000064400000000000000000000060321046102023000131340ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::{Xyz, Xyzd}; /// Holds CIE XyY representation #[derive(Clone, Debug, Copy, Default)] pub struct XyY { pub x: f64, pub y: f64, pub yb: f64, } pub trait XyYRepresentable { fn to_xyy(self) -> XyY; } impl XyYRepresentable for XyY { #[inline] fn to_xyy(self) -> XyY { self } } impl XyY { #[inline] pub const fn new(x: f64, y: f64, yb: f64) -> Self { Self { x, y, yb } } #[inline] pub const fn to_xyz(self) -> Xyz { let reciprocal = if self.y != 0. { 1. / self.y * self.yb } else { 0. }; Xyz { x: (self.x * reciprocal) as f32, y: self.yb as f32, z: ((1. - self.x - self.y) * reciprocal) as f32, } } #[inline] pub const fn to_xyzd(self) -> Xyzd { let reciprocal = if self.y != 0. { 1. / self.y * self.yb } else { 0. }; Xyzd { x: self.x * reciprocal, y: self.yb, z: (1. - self.x - self.y) * reciprocal, } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_xyzd_xyy() { let xyy = XyY::new(0.2, 0.4, 0.5); let xyy = xyy.to_xyzd(); let r_xyy = xyy.to_xyzd(); assert!((r_xyy.x - xyy.x).abs() < 1e-5); assert!((r_xyy.y - xyy.y).abs() < 1e-5); assert!((r_xyy.z - xyy.z).abs() < 1e-5); } } moxcms-0.7.7/src/yrg.rs000064400000000000000000000132761046102023000131140ustar 00000000000000/* * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved. * // * // Redistribution and use in source and binary forms, with or without modification, * // are permitted provided that the following conditions are met: * // * // 1. Redistributions of source code must retain the above copyright notice, this * // list of conditions and the following disclaimer. * // * // 2. Redistributions in binary form must reproduce the above copyright notice, * // this list of conditions and the following disclaimer in the documentation * // and/or other materials provided with the distribution. * // * // 3. Neither the name of the copyright holder nor the names of its * // contributors may be used to endorse or promote products derived from * // this software without specific prior written permission. * // * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ use crate::mlaf::mlaf; use crate::{Matrix3f, Vector3f, Xyz}; use pxfm::{f_atan2f, f_hypotf, f_sincosf}; /// Structure for Yrg colorspace /// /// Kirk Yrg 2021. #[repr(C)] #[derive(Default, Debug, PartialOrd, PartialEq, Copy, Clone)] pub struct Yrg { pub y: f32, pub r: f32, pub g: f32, } /// Structure for cone form of Yrg colorspace #[repr(C)] #[derive(Default, Debug, PartialOrd, PartialEq, Copy, Clone)] pub struct Ych { pub y: f32, pub c: f32, pub h: f32, } const LMS_TO_XYZ: Matrix3f = Matrix3f { v: [ [1.8079466, -1.2997167, 0.34785876], [0.61783963, 0.39595452, -0.041046873], [-0.12546961, 0.20478038, 1.7427418], ], }; const XYZ_TO_LMS: Matrix3f = Matrix3f { v: [ [0.257085, 0.859943, -0.031061], [-0.394427, 1.175800, 0.106423], [0.064856, -0.076250, 0.559067], ], }; impl Yrg { #[inline] pub const fn new(y: f32, r: f32, g: f32) -> Yrg { Yrg { y, r, g } } /// Convert [Xyz] D65 to [Yrg] /// /// Yrg defined in D65 white point. Ensure Xyz values is adapted. /// Yrg use CIE XYZ 2006, adapt CIE XYZ 1931 by using [cie_y_1931_to_cie_y_2006] at first. #[inline] pub fn from_xyz(xyz: Xyz) -> Self { let lms = XYZ_TO_LMS.f_mul_vector(Vector3f { v: [xyz.x, xyz.y, xyz.z], }); let y = mlaf(0.68990272 * lms.v[0], 0.34832189, lms.v[1]); let a = lms.v[0] + lms.v[1] + lms.v[2]; let l = if a == 0. { 0. } else { lms.v[0] / a }; let m = if a == 0. { 0. } else { lms.v[1] / a }; let r = mlaf(mlaf(0.02062, -0.6873, m), 1.0671, l); let g = mlaf(mlaf(-0.05155, -0.0362, l), 1.7182, m); Yrg { y, r, g } } #[inline] pub fn to_xyz(&self) -> Xyz { let l = mlaf(0.95 * self.r, 0.38, self.g); let m = mlaf(mlaf(0.03, 0.59, self.g), 0.02, self.r); let den = mlaf(0.68990272 * l, 0.34832189, m); let a = if den == 0. { 0. } else { self.y / den }; let l0 = l * a; let m0 = m * a; let s0 = (1f32 - l - m) * a; let v = Vector3f { v: [l0, m0, s0] }; let x = LMS_TO_XYZ.f_mul_vector(v); Xyz { x: x.v[0], y: x.v[1], z: x.v[2], } } } impl Ych { #[inline] pub const fn new(y: f32, c: f32, h: f32) -> Self { Ych { y, c, h } } #[inline] pub fn from_yrg(yrg: Yrg) -> Self { let y = yrg.y; // Subtract white point. These are the r, g coordinates of // sRGB (D50 adapted) (1, 1, 1) taken through // XYZ D50 -> CAT16 D50->D65 adaptation -> LMS 2006 // -> grading RGB conversion. let r = yrg.r - 0.21902143; let g = yrg.g - 0.54371398; let c = f_hypotf(g, r); let h = f_atan2f(g, r); Self { y, c, h } } #[inline] pub fn to_yrg(&self) -> Yrg { let y = self.y; let c = self.c; let h = self.h; let sincos = f_sincosf(h); let r = mlaf(0.21902143, c, sincos.1); let g = mlaf(0.54371398, c, sincos.0); Yrg { y, r, g } } } // Pipeline and ICC luminance is CIE Y 1931 // Kirk Ych/Yrg uses CIE Y 2006 // 1 CIE Y 1931 = 1.05785528 CIE Y 2006, so we need to adjust that. // This also accounts for the CAT16 D50->D65 adaptation that has to be done // to go from RGB to CIE LMS 2006. // Warning: only applies to achromatic pixels. pub const fn cie_y_1931_to_cie_y_2006(x: f32) -> f32 { 1.05785528 * (x) } #[cfg(test)] mod tests { use super::*; #[test] fn test_yrg() { let xyz = Xyz::new(0.95, 1.0, 1.08); let yrg = Yrg::from_xyz(xyz); let yrg_to_xyz = yrg.to_xyz(); assert!((xyz.x - yrg_to_xyz.x) < 1e-5); assert!((xyz.y - yrg_to_xyz.y) < 1e-5); assert!((xyz.z - yrg_to_xyz.z) < 1e-5); } #[test] fn test_ych() { let xyz = Yrg::new(0.5, 0.4, 0.3); let yrg = Ych::from_yrg(xyz); let yrg_to_xyz = yrg.to_yrg(); assert!((xyz.y - yrg_to_xyz.y) < 1e-5); assert!((xyz.r - yrg_to_xyz.r) < 1e-5); assert!((xyz.g - yrg_to_xyz.g) < 1e-5); } }