fdeflate-0.3.7/.cargo_vcs_info.json0000644000000001360000000000100126150ustar { "git": { "sha1": "c365c7e6ffa81feb2e1fb762eed7299f05c9b0ca" }, "path_in_vcs": "" }fdeflate-0.3.7/Cargo.lock0000644000000101450000000000100105710ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "adler" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "byteorder" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "fdeflate" version = "0.3.7" dependencies = [ "miniz_oxide", "rand", "simd-adler32", ] [[package]] name = "getrandom" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "libc" version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "miniz_oxide" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" dependencies = [ "adler", ] [[package]] name = "ppv-lite86" version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro2" version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] [[package]] name = "simd-adler32" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" [[package]] name = "syn" version = "2.0.85" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "unicode-ident" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "zerocopy" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", "syn", ] fdeflate-0.3.7/Cargo.toml0000644000000025060000000000100106160ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.67.0" name = "fdeflate" version = "0.3.7" authors = ["The image-rs Developers"] build = false include = [ "/src", "/tests", "README.md", "LICENSE-APACHE", "LICENSE-MIT", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Fast specialized deflate implementation" homepage = "https://github.com/image-rs/fdeflate" documentation = "https://docs.rs/fdeflate" readme = "README.md" categories = ["compression"] license = "MIT OR Apache-2.0" repository = "https://github.com/image-rs/fdeflate" [lib] name = "fdeflate" path = "src/lib.rs" [dependencies.simd-adler32] version = "0.3.4" [dev-dependencies.miniz_oxide] version = "0.7.1" [dev-dependencies.rand] version = "0.8.5" [lints.rust.unexpected_cfgs] level = "allow" priority = 0 check-cfg = ["cfg(fuzzing)"] fdeflate-0.3.7/Cargo.toml.orig000064400000000000000000000013441046102023000142760ustar 00000000000000[package] name = "fdeflate" version = "0.3.7" edition = "2021" # note: when changed, also update test runner in `.github/workflows/rust.yml` rust-version = "1.67.0" license = "MIT OR Apache-2.0" description = "Fast specialized deflate implementation" authors = ["The image-rs Developers"] include = ["/src", "/tests", "README.md", "LICENSE-APACHE", "LICENSE-MIT"] # crates.io metadata documentation = "https://docs.rs/fdeflate" repository = "https://github.com/image-rs/fdeflate" homepage = "https://github.com/image-rs/fdeflate" categories = ["compression"] [dependencies] simd-adler32 = "0.3.4" [dev-dependencies] miniz_oxide = "0.7.1" rand = "0.8.5" [lints.rust] unexpected_cfgs = { level = "allow", check-cfg = ['cfg(fuzzing)'] } fdeflate-0.3.7/LICENSE-APACHE000064400000000000000000000236761046102023000133470ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS fdeflate-0.3.7/LICENSE-MIT000064400000000000000000000020141046102023000130360ustar 00000000000000MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. fdeflate-0.3.7/README.md000064400000000000000000000024771046102023000126760ustar 00000000000000# fdeflate [![crates.io](https://img.shields.io/crates/v/fdeflate.svg)](https://crates.io/crates/fdeflate) [![Documentation](https://docs.rs/fdeflate/badge.svg)](https://docs.rs/fdeflate) [![Build Status](https://img.shields.io/github/actions/workflow/status/image-rs/fdeflate/rust.yml?label=Rust%20CI)](https://github.com/image-rs/fdeflate/actions) A fast and safe deflate implementation for PNG. This crate contains an optimized implementation of the [deflate algorithm](https://en.wikipedia.org/wiki/Deflate) tuned for PNG images. At least on PNG data, our decoder rivals the performance of `zlib-ng` and `zlib-rs` without using any `unsafe` code. When compressing it makes a bunch of simplifying assumptions that drastically improve encoding speed while still being compatible with zlib: - Exactly one block per deflate stream. - No distance codes except for run length encoding of zeros. - A single fixed huffman tree trained on a large corpus of PNG images. - All huffman codes are <= 12 bits. ### Inspiration The algorithms in this crate take inspiration from multiple sources: * [fpnge](https://github.com/veluca93/fpnge) * [zune-inflate](https://github.com/etemesi254/zune-image/tree/main/zune-inflate) * [RealTime Data Compression blog](https://fastcompression.blogspot.com/2015/10/huffman-revisited-part-4-multi-bytes.html) fdeflate-0.3.7/src/compress.rs000064400000000000000000000233631046102023000144040ustar 00000000000000use simd_adler32::Adler32; use std::io::{self, Seek, SeekFrom, Write}; use crate::tables::{ BITMASKS, HUFFMAN_CODES, HUFFMAN_LENGTHS, LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL, }; /// Compressor that produces fdeflate compressed streams. pub struct Compressor { checksum: Adler32, buffer: u64, nbits: u8, writer: W, } impl Compressor { fn write_bits(&mut self, bits: u64, nbits: u8) -> io::Result<()> { debug_assert!(nbits <= 64); self.buffer |= bits << self.nbits; self.nbits += nbits; if self.nbits >= 64 { self.writer.write_all(&self.buffer.to_le_bytes())?; self.nbits -= 64; self.buffer = bits.checked_shr((nbits - self.nbits) as u32).unwrap_or(0); } debug_assert!(self.nbits < 64); Ok(()) } fn flush(&mut self) -> io::Result<()> { if self.nbits % 8 != 0 { self.write_bits(0, 8 - self.nbits % 8)?; } if self.nbits > 0 { self.writer .write_all(&self.buffer.to_le_bytes()[..self.nbits as usize / 8]) .unwrap(); self.buffer = 0; self.nbits = 0; } Ok(()) } fn write_run(&mut self, mut run: u32) -> io::Result<()> { self.write_bits(HUFFMAN_CODES[0] as u64, HUFFMAN_LENGTHS[0])?; run -= 1; while run >= 258 { self.write_bits(HUFFMAN_CODES[285] as u64, HUFFMAN_LENGTHS[285] + 1)?; run -= 258; } if run > 4 { let sym = LENGTH_TO_SYMBOL[run as usize - 3] as usize; self.write_bits(HUFFMAN_CODES[sym] as u64, HUFFMAN_LENGTHS[sym])?; let len_extra = LENGTH_TO_LEN_EXTRA[run as usize - 3]; let extra = ((run - 3) & BITMASKS[len_extra as usize]) as u64; self.write_bits(extra, len_extra + 1)?; } else { debug_assert_eq!(HUFFMAN_CODES[0], 0); self.write_bits(0, run as u8 * HUFFMAN_LENGTHS[0])?; } Ok(()) } /// Create a new Compressor. pub fn new(writer: W) -> io::Result { let mut compressor = Self { checksum: Adler32::new(), buffer: 0, nbits: 0, writer, }; compressor.write_headers()?; Ok(compressor) } fn write_headers(&mut self) -> io::Result<()> { const HEADER: [u8; 54] = [ 120, 1, 237, 192, 3, 160, 36, 89, 150, 198, 241, 255, 119, 238, 141, 200, 204, 167, 114, 75, 99, 174, 109, 219, 182, 109, 219, 182, 109, 219, 182, 109, 105, 140, 158, 150, 74, 175, 158, 50, 51, 34, 238, 249, 118, 183, 106, 122, 166, 135, 59, 107, 213, 15, ]; self.writer.write_all(&HEADER[..53]).unwrap(); self.write_bits(HEADER[53] as u64, 5)?; Ok(()) } /// Write data to the compressor. pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> { self.checksum.write(data); let mut run = 0; let mut chunks = data.chunks_exact(8); for chunk in &mut chunks { let ichunk = u64::from_le_bytes(chunk.try_into().unwrap()); if ichunk == 0 { run += 8; continue; } else if run > 0 { let run_extra = ichunk.trailing_zeros() / 8; self.write_run(run + run_extra)?; run = 0; if run_extra > 0 { run = ichunk.leading_zeros() / 8; for &b in &chunk[run_extra as usize..8 - run as usize] { self.write_bits( HUFFMAN_CODES[b as usize] as u64, HUFFMAN_LENGTHS[b as usize], )?; } continue; } } let run_start = ichunk.leading_zeros() / 8; if run_start > 0 { for &b in &chunk[..8 - run_start as usize] { self.write_bits( HUFFMAN_CODES[b as usize] as u64, HUFFMAN_LENGTHS[b as usize], )?; } run = run_start; continue; } let n0 = HUFFMAN_LENGTHS[chunk[0] as usize]; let n1 = HUFFMAN_LENGTHS[chunk[1] as usize]; let n2 = HUFFMAN_LENGTHS[chunk[2] as usize]; let n3 = HUFFMAN_LENGTHS[chunk[3] as usize]; let bits = HUFFMAN_CODES[chunk[0] as usize] as u64 | ((HUFFMAN_CODES[chunk[1] as usize] as u64) << n0) | ((HUFFMAN_CODES[chunk[2] as usize] as u64) << (n0 + n1)) | ((HUFFMAN_CODES[chunk[3] as usize] as u64) << (n0 + n1 + n2)); self.write_bits(bits, n0 + n1 + n2 + n3)?; let n4 = HUFFMAN_LENGTHS[chunk[4] as usize]; let n5 = HUFFMAN_LENGTHS[chunk[5] as usize]; let n6 = HUFFMAN_LENGTHS[chunk[6] as usize]; let n7 = HUFFMAN_LENGTHS[chunk[7] as usize]; let bits2 = HUFFMAN_CODES[chunk[4] as usize] as u64 | ((HUFFMAN_CODES[chunk[5] as usize] as u64) << n4) | ((HUFFMAN_CODES[chunk[6] as usize] as u64) << (n4 + n5)) | ((HUFFMAN_CODES[chunk[7] as usize] as u64) << (n4 + n5 + n6)); self.write_bits(bits2, n4 + n5 + n6 + n7)?; } if run > 0 { self.write_run(run)?; } for &b in chunks.remainder() { self.write_bits( HUFFMAN_CODES[b as usize] as u64, HUFFMAN_LENGTHS[b as usize], )?; } Ok(()) } /// Write the remainder of the stream and return the inner writer. pub fn finish(mut self) -> io::Result { // Write end of block self.write_bits(HUFFMAN_CODES[256] as u64, HUFFMAN_LENGTHS[256])?; self.flush()?; // Write Adler32 checksum let checksum: u32 = self.checksum.finish(); self.writer .write_all(checksum.to_be_bytes().as_ref()) .unwrap(); Ok(self.writer) } } /// Compressor that only writes the stored blocks. /// /// This is useful for writing files that are not compressed, but still need to be wrapped in a /// zlib stream. pub struct StoredOnlyCompressor { writer: W, checksum: Adler32, block_bytes: u16, } impl StoredOnlyCompressor { /// Creates a new `StoredOnlyCompressor` that writes to the given writer. pub fn new(mut writer: W) -> io::Result { writer.write_all(&[0x78, 0x01])?; // zlib header writer.write_all(&[0; 5])?; // placeholder stored block header Ok(Self { writer, checksum: Adler32::new(), block_bytes: 0, }) } fn set_block_header(&mut self, size: u16, last: bool) -> io::Result<()> { self.writer.seek(SeekFrom::Current(-(size as i64 + 5)))?; self.writer.write_all(&[ last as u8, (size & 0xFF) as u8, ((size >> 8) & 0xFF) as u8, (!size & 0xFF) as u8, ((!size >> 8) & 0xFF) as u8, ])?; self.writer.seek(SeekFrom::Current(size as i64))?; Ok(()) } /// Writes the given data to the underlying writer. pub fn write_data(&mut self, mut data: &[u8]) -> io::Result<()> { self.checksum.write(data); while !data.is_empty() { if self.block_bytes == u16::MAX { self.set_block_header(u16::MAX, false)?; self.writer.write_all(&[0; 5])?; // placeholder stored block header self.block_bytes = 0; } let prefix_bytes = data.len().min((u16::MAX - self.block_bytes) as usize); self.writer.write_all(&data[..prefix_bytes])?; self.block_bytes += prefix_bytes as u16; data = &data[prefix_bytes..]; } Ok(()) } /// Finish writing the final block and return the underlying writer. pub fn finish(mut self) -> io::Result { self.set_block_header(self.block_bytes, true)?; // Write Adler32 checksum let checksum: u32 = self.checksum.finish(); self.writer .write_all(checksum.to_be_bytes().as_ref()) .unwrap(); Ok(self.writer) } } impl StoredOnlyCompressor { /// Return the number of bytes that will be written to the output stream /// for the given input size. Because this compressor only writes stored blocks, /// the output size is always slightly *larger* than the input size. pub fn compressed_size(raw_size: usize) -> usize { (raw_size.saturating_sub(1) / u16::MAX as usize) * (u16::MAX as usize + 5) + (raw_size % u16::MAX as usize + 5) + 6 } } /// Compresses the given data. pub fn compress_to_vec(input: &[u8]) -> Vec { let mut compressor = Compressor::new(Vec::with_capacity(input.len() / 4)).unwrap(); compressor.write_data(input).unwrap(); compressor.finish().unwrap() } #[cfg(test)] mod tests { use super::*; use rand::Rng; fn roundtrip(data: &[u8]) { let compressed = compress_to_vec(data); let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap(); assert_eq!(&decompressed, data); } #[test] fn it_works() { roundtrip(b"Hello world!"); } #[test] fn constant() { roundtrip(&vec![0; 2048]); roundtrip(&vec![5; 2048]); roundtrip(&vec![128; 2048]); roundtrip(&vec![254; 2048]); } #[test] fn random() { let mut rng = rand::thread_rng(); let mut data = vec![0; 2048]; for _ in 0..10 { for byte in &mut data { *byte = rng.gen(); } roundtrip(&data); } } } fdeflate-0.3.7/src/decompress/tests/test_utils.rs000064400000000000000000000066511046102023000202570ustar 00000000000000//! Testing utilities for testing `fdeflate::Decompressor`. //! //! These utilities are used by: //! //! * Unit tests (e.g. `#[test]` tests in `src/decompress.rs`) //! * Fuzzers (e.g. `fuzz/fuzz_targets/inflate_bytewise3.rs`) #[cfg(test)] use crate as fdeflate; use fdeflate::{DecompressionError, Decompressor}; #[derive(Debug, PartialEq)] pub enum TestDecompressionError { ProdError(DecompressionError), TestError(TestErrorKind), } #[derive(Debug, Eq, PartialEq)] pub enum TestErrorKind { OutputTooLarge, TooManyIterations, } impl From for TestDecompressionError { fn from(e: DecompressionError) -> Self { Self::ProdError(e) } } impl From for TestDecompressionError { fn from(kind: TestErrorKind) -> Self { Self::TestError(kind) } } /// Decompresses `input` when feeding it into a `Decompressor::read` in `chunks`. /// /// `chunks` typically can be used to decode the whole input at once (setting `chunks` to /// `vec![input.len]`) or byte-by-byte (setting `chunks` to `std::iter::repeat(1)`). /// But `chunks` can also be used to replicate arbitrary chunking patterns (such as may be /// used by some fuzzing-based repros from the `png` crate). /// /// `early_eof` is used to the last `end_of_input` argument of `Decompressor::read` calls. /// When `early_eof` is `false`, then `end_of_input` is `false` until the whole input is /// consumed (and then is `Decompressor::is_done` is still false, then `Decompressor::read` /// is called one or more times with empty input slice and `end_of_input` set to true). /// When `early_eof` is `true` then `end_of_input` is set to `true` as soon as the slice /// fed to `Decompressor::read` "reaches" the end of the whole input. /// /// Unlike the `png` crate, this testing helper uses a big, fixed-size output buffer. /// (i.e. there is no simulation of `ZlibStream.compact_out_buffer_if_needed` from the `png` /// crate). pub fn decompress_by_chunks( input: &[u8], chunks: impl IntoIterator, early_eof: bool, ) -> Result, TestDecompressionError> { let mut chunks = chunks.into_iter(); // `iteration_counter` helps to prevent infinite loops (which may happen with `chunks` such // as `std::iter::repeat(0)`). let mut iteration_counter = 0; // Ignoring checksums so that we can work with inputs generated by fuzzing. (Fuzzing // typically ignores checksums to make it easier to explore the space of possible inputs.) let mut d = Decompressor::new(); d.ignore_adler32(); let mut out_buf = vec![0; 1_000_000]; let mut in_pos = 0; let mut out_pos = 0; while !d.is_done() { iteration_counter += 1; if iteration_counter > 5000 { return Err(TestErrorKind::TooManyIterations.into()); } let chunk_size = chunks.next().unwrap_or(0); let start = in_pos; let end = std::cmp::min(start + chunk_size, input.len()); let eof = if early_eof { end == input.len() } else { start == input.len() }; let (in_consumed, out_written) = d.read(&input[start..end], out_buf.as_mut_slice(), out_pos, eof)?; in_pos += in_consumed; out_pos += out_written; if out_pos == out_buf.len() && in_consumed == 0 && !d.is_done() { return Err(TestErrorKind::OutputTooLarge.into()); } } out_buf.resize(out_pos, 0xFF); Ok(out_buf) } fdeflate-0.3.7/src/decompress.rs000064400000000000000000001456011046102023000147150ustar 00000000000000use simd_adler32::Adler32; use crate::{ huffman::{self, build_table}, tables::{ self, CLCL_ORDER, DIST_SYM_TO_DIST_BASE, DIST_SYM_TO_DIST_EXTRA, FIXED_DIST_TABLE, FIXED_LITLEN_TABLE, LEN_SYM_TO_LEN_BASE, LEN_SYM_TO_LEN_EXTRA, LITLEN_TABLE_ENTRIES, }, }; /// An error encountered while decompressing a deflate stream. #[derive(Debug, PartialEq)] pub enum DecompressionError { /// The zlib header is corrupt. BadZlibHeader, /// All input was consumed, but the end of the stream hasn't been reached. InsufficientInput, /// A block header specifies an invalid block type. InvalidBlockType, /// An uncompressed block's NLEN value is invalid. InvalidUncompressedBlockLength, /// Too many literals were specified. InvalidHlit, /// Too many distance codes were specified. InvalidHdist, /// Attempted to repeat a previous code before reading any codes, or past the end of the code /// lengths. InvalidCodeLengthRepeat, /// The stream doesn't specify a valid huffman tree. BadCodeLengthHuffmanTree, /// The stream doesn't specify a valid huffman tree. BadLiteralLengthHuffmanTree, /// The stream doesn't specify a valid huffman tree. BadDistanceHuffmanTree, /// The stream contains a literal/length code that was not allowed by the header. InvalidLiteralLengthCode, /// The stream contains a distance code that was not allowed by the header. InvalidDistanceCode, /// The stream contains contains back-reference as the first symbol. InputStartsWithRun, /// The stream contains a back-reference that is too far back. DistanceTooFarBack, /// The deflate stream checksum is incorrect. WrongChecksum, /// Extra input data. ExtraInput, } struct BlockHeader { hlit: usize, hdist: usize, hclen: usize, num_lengths_read: usize, /// Low 3-bits are code length code length, high 5-bits are code length code. table: [u32; 128], code_lengths: [u8; 320], } pub const LITERAL_ENTRY: u32 = 0x8000; pub const EXCEPTIONAL_ENTRY: u32 = 0x4000; pub const SECONDARY_TABLE_ENTRY: u32 = 0x2000; /// The Decompressor state for a compressed block. #[derive(Eq, PartialEq, Debug)] struct CompressedBlock { litlen_table: Box<[u32; 4096]>, secondary_table: Vec, dist_table: Box<[u32; 512]>, dist_secondary_table: Vec, eof_code: u16, eof_mask: u16, eof_bits: u8, } #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum State { ZlibHeader, BlockHeader, CodeLengthCodes, CodeLengths, CompressedData, UncompressedData, Checksum, Done, } /// Decompressor for arbitrary zlib streams. pub struct Decompressor { /// State for decoding a compressed block. compression: CompressedBlock, // State for decoding a block header. header: BlockHeader, // Number of bytes left for uncompressed block. uncompressed_bytes_left: u16, buffer: u64, nbits: u8, queued_rle: Option<(u8, usize)>, queued_backref: Option<(usize, usize)>, last_block: bool, fixed_table: bool, state: State, checksum: Adler32, ignore_adler32: bool, } impl Default for Decompressor { fn default() -> Self { Self::new() } } impl Decompressor { /// Create a new decompressor. pub fn new() -> Self { Self { buffer: 0, nbits: 0, compression: CompressedBlock { litlen_table: Box::new([0; 4096]), dist_table: Box::new([0; 512]), secondary_table: Vec::new(), dist_secondary_table: Vec::new(), eof_code: 0, eof_mask: 0, eof_bits: 0, }, header: BlockHeader { hlit: 0, hdist: 0, hclen: 0, table: [0; 128], num_lengths_read: 0, code_lengths: [0; 320], }, uncompressed_bytes_left: 0, queued_rle: None, queued_backref: None, checksum: Adler32::new(), state: State::ZlibHeader, last_block: false, ignore_adler32: false, fixed_table: false, } } /// Ignore the checksum at the end of the stream. pub fn ignore_adler32(&mut self) { self.ignore_adler32 = true; } fn fill_buffer(&mut self, input: &mut &[u8]) { if input.len() >= 8 { self.buffer |= u64::from_le_bytes(input[..8].try_into().unwrap()) << self.nbits; *input = &input[(63 - self.nbits as usize) / 8..]; self.nbits |= 56; } else { let nbytes = input.len().min((63 - self.nbits as usize) / 8); let mut input_data = [0; 8]; input_data[..nbytes].copy_from_slice(&input[..nbytes]); self.buffer |= u64::from_le_bytes(input_data) .checked_shl(self.nbits as u32) .unwrap_or(0); self.nbits += nbytes as u8 * 8; *input = &input[nbytes..]; } } fn peak_bits(&mut self, nbits: u8) -> u64 { debug_assert!(nbits <= 56 && nbits <= self.nbits); self.buffer & ((1u64 << nbits) - 1) } fn consume_bits(&mut self, nbits: u8) { debug_assert!(self.nbits >= nbits); self.buffer >>= nbits; self.nbits -= nbits; } fn read_block_header(&mut self, remaining_input: &mut &[u8]) -> Result<(), DecompressionError> { self.fill_buffer(remaining_input); if self.nbits < 10 { return Ok(()); } let start = self.peak_bits(3); self.last_block = start & 1 != 0; match start >> 1 { 0b00 => { let align_bits = (self.nbits - 3) % 8; let header_bits = 3 + 32 + align_bits; if self.nbits < header_bits { return Ok(()); } let len = (self.peak_bits(align_bits + 19) >> (align_bits + 3)) as u16; let nlen = (self.peak_bits(header_bits) >> (align_bits + 19)) as u16; if nlen != !len { return Err(DecompressionError::InvalidUncompressedBlockLength); } self.state = State::UncompressedData; self.uncompressed_bytes_left = len; self.consume_bits(header_bits); Ok(()) } 0b01 => { self.consume_bits(3); // Check for an entirely empty blocks which can happen if there are "partial // flushes" in the deflate stream. With fixed huffman codes, the EOF symbol is // 7-bits of zeros so we peak ahead and see if the next 7-bits are all zero. if self.peak_bits(7) == 0 { self.consume_bits(7); if self.last_block { self.state = State::Checksum; return Ok(()); } // At this point we've consumed the entire block and need to read the next block // header. If tail call optimization were guaranteed, we could just recurse // here. But without it, a long sequence of empty fixed-blocks might cause a // stack overflow. Instead, we consume all empty blocks in a loop and then // recurse. This is the only recursive call this function, and thus is safe. while self.nbits >= 10 && self.peak_bits(10) == 0b010 { self.consume_bits(10); self.fill_buffer(remaining_input); } return self.read_block_header(remaining_input); } // Build decoding tables if the previous block wasn't also a fixed block. if !self.fixed_table { self.fixed_table = true; for chunk in self.compression.litlen_table.chunks_exact_mut(512) { chunk.copy_from_slice(&FIXED_LITLEN_TABLE); } for chunk in self.compression.dist_table.chunks_exact_mut(32) { chunk.copy_from_slice(&FIXED_DIST_TABLE); } self.compression.eof_bits = 7; self.compression.eof_code = 0; self.compression.eof_mask = 0x7f; } self.state = State::CompressedData; Ok(()) } 0b10 => { if self.nbits < 17 { return Ok(()); } self.header.hlit = (self.peak_bits(8) >> 3) as usize + 257; self.header.hdist = (self.peak_bits(13) >> 8) as usize + 1; self.header.hclen = (self.peak_bits(17) >> 13) as usize + 4; if self.header.hlit > 286 { return Err(DecompressionError::InvalidHlit); } if self.header.hdist > 30 { return Err(DecompressionError::InvalidHdist); } self.consume_bits(17); self.state = State::CodeLengthCodes; self.fixed_table = false; Ok(()) } 0b11 => Err(DecompressionError::InvalidBlockType), _ => unreachable!(), } } fn read_code_length_codes( &mut self, remaining_input: &mut &[u8], ) -> Result<(), DecompressionError> { self.fill_buffer(remaining_input); if self.nbits as usize + remaining_input.len() * 8 < 3 * self.header.hclen { return Ok(()); } let mut code_length_lengths = [0; 19]; for i in 0..self.header.hclen { code_length_lengths[CLCL_ORDER[i]] = self.peak_bits(3) as u8; self.consume_bits(3); // We need to refill the buffer after reading 3 * 18 = 54 bits since the buffer holds // between 56 and 63 bits total. if i == 17 { self.fill_buffer(remaining_input); } } let mut codes = [0; 19]; if !build_table( &code_length_lengths, &[], &mut codes, &mut self.header.table, &mut Vec::new(), false, false, ) { return Err(DecompressionError::BadCodeLengthHuffmanTree); } self.state = State::CodeLengths; self.header.num_lengths_read = 0; Ok(()) } fn read_code_lengths(&mut self, remaining_input: &mut &[u8]) -> Result<(), DecompressionError> { let total_lengths = self.header.hlit + self.header.hdist; while self.header.num_lengths_read < total_lengths { self.fill_buffer(remaining_input); if self.nbits < 7 { return Ok(()); } let code = self.peak_bits(7); let entry = self.header.table[code as usize]; let length = (entry & 0x7) as u8; let symbol = (entry >> 16) as u8; debug_assert!(length != 0); match symbol { 0..=15 => { self.header.code_lengths[self.header.num_lengths_read] = symbol; self.header.num_lengths_read += 1; self.consume_bits(length); } 16..=18 => { let (base_repeat, extra_bits) = match symbol { 16 => (3, 2), 17 => (3, 3), 18 => (11, 7), _ => unreachable!(), }; if self.nbits < length + extra_bits { return Ok(()); } let value = match symbol { 16 => { self.header.code_lengths[self .header .num_lengths_read .checked_sub(1) .ok_or(DecompressionError::InvalidCodeLengthRepeat)?] // TODO: is this right? } 17 => 0, 18 => 0, _ => unreachable!(), }; let repeat = (self.peak_bits(length + extra_bits) >> length) as usize + base_repeat; if self.header.num_lengths_read + repeat > total_lengths { return Err(DecompressionError::InvalidCodeLengthRepeat); } for i in 0..repeat { self.header.code_lengths[self.header.num_lengths_read + i] = value; } self.header.num_lengths_read += repeat; self.consume_bits(length + extra_bits); } _ => unreachable!(), } } self.header .code_lengths .copy_within(self.header.hlit..total_lengths, 288); for i in self.header.hlit..288 { self.header.code_lengths[i] = 0; } for i in 288 + self.header.hdist..320 { self.header.code_lengths[i] = 0; } Self::build_tables( self.header.hlit, &self.header.code_lengths, &mut self.compression, )?; self.state = State::CompressedData; Ok(()) } fn build_tables( hlit: usize, code_lengths: &[u8], compression: &mut CompressedBlock, ) -> Result<(), DecompressionError> { // If there is no code assigned for the EOF symbol then the bitstream is invalid. if code_lengths[256] == 0 { // TODO: Return a dedicated error in this case. return Err(DecompressionError::BadLiteralLengthHuffmanTree); } let mut codes = [0; 288]; compression.secondary_table.clear(); if !huffman::build_table( &code_lengths[..hlit], &LITLEN_TABLE_ENTRIES, &mut codes[..hlit], &mut *compression.litlen_table, &mut compression.secondary_table, false, true, ) { return Err(DecompressionError::BadCodeLengthHuffmanTree); } compression.eof_code = codes[256]; compression.eof_mask = (1 << code_lengths[256]) - 1; compression.eof_bits = code_lengths[256]; // Build the distance code table. let lengths = &code_lengths[288..320]; if lengths == [0; 32] { compression.dist_table.fill(0); } else { let mut dist_codes = [0; 32]; if !huffman::build_table( lengths, &tables::DISTANCE_TABLE_ENTRIES, &mut dist_codes, &mut *compression.dist_table, &mut compression.dist_secondary_table, true, false, ) { return Err(DecompressionError::BadDistanceHuffmanTree); } } Ok(()) } fn read_compressed( &mut self, remaining_input: &mut &[u8], output: &mut [u8], mut output_index: usize, ) -> Result { // Fast decoding loop. // // This loop is optimized for speed and is the main decoding loop for the decompressor, // which is used when there are at least 8 bytes of input and output data available. It // assumes that the bitbuffer is full (nbits >= 56) and that litlen_entry has been loaded. // // These assumptions enable a few optimizations: // - Nearly all checks for nbits are avoided. // - Checking the input size is optimized out in the refill function call. // - The litlen_entry for the next loop iteration can be loaded in parallel with refilling // the bit buffer. This is because when the input is non-empty, the bit buffer actually // has 64-bits of valid data (even though nbits will be in 56..=63). self.fill_buffer(remaining_input); let mut litlen_entry = self.compression.litlen_table[(self.buffer & 0xfff) as usize]; while self.state == State::CompressedData && output_index + 8 <= output.len() && remaining_input.len() >= 8 { // First check whether the next symbol is a literal. This code does up to 2 additional // table lookups to decode more literals. let mut bits; let mut litlen_code_bits = litlen_entry as u8; if litlen_entry & LITERAL_ENTRY != 0 { let litlen_entry2 = self.compression.litlen_table [(self.buffer >> litlen_code_bits & 0xfff) as usize]; let litlen_code_bits2 = litlen_entry2 as u8; let litlen_entry3 = self.compression.litlen_table [(self.buffer >> (litlen_code_bits + litlen_code_bits2) & 0xfff) as usize]; let litlen_code_bits3 = litlen_entry3 as u8; let litlen_entry4 = self.compression.litlen_table[(self.buffer >> (litlen_code_bits + litlen_code_bits2 + litlen_code_bits3) & 0xfff) as usize]; let advance_output_bytes = ((litlen_entry & 0xf00) >> 8) as usize; output[output_index] = (litlen_entry >> 16) as u8; output[output_index + 1] = (litlen_entry >> 24) as u8; output_index += advance_output_bytes; if litlen_entry2 & LITERAL_ENTRY != 0 { let advance_output_bytes2 = ((litlen_entry2 & 0xf00) >> 8) as usize; output[output_index] = (litlen_entry2 >> 16) as u8; output[output_index + 1] = (litlen_entry2 >> 24) as u8; output_index += advance_output_bytes2; if litlen_entry3 & LITERAL_ENTRY != 0 { let advance_output_bytes3 = ((litlen_entry3 & 0xf00) >> 8) as usize; output[output_index] = (litlen_entry3 >> 16) as u8; output[output_index + 1] = (litlen_entry3 >> 24) as u8; output_index += advance_output_bytes3; litlen_entry = litlen_entry4; self.consume_bits(litlen_code_bits + litlen_code_bits2 + litlen_code_bits3); self.fill_buffer(remaining_input); continue; } else { self.consume_bits(litlen_code_bits + litlen_code_bits2); litlen_entry = litlen_entry3; litlen_code_bits = litlen_code_bits3; self.fill_buffer(remaining_input); bits = self.buffer; } } else { self.consume_bits(litlen_code_bits); bits = self.buffer; litlen_entry = litlen_entry2; litlen_code_bits = litlen_code_bits2; if self.nbits < 48 { self.fill_buffer(remaining_input); } } } else { bits = self.buffer; } // The next symbol is either a 13+ bit literal, back-reference, or an EOF symbol. let (length_base, length_extra_bits, litlen_code_bits) = if litlen_entry & EXCEPTIONAL_ENTRY == 0 { ( litlen_entry >> 16, (litlen_entry >> 8) as u8, litlen_code_bits, ) } else if litlen_entry & SECONDARY_TABLE_ENTRY != 0 { let secondary_table_index = (litlen_entry >> 16) + ((bits >> 12) as u32 & (litlen_entry & 0xff)); let secondary_entry = self.compression.secondary_table[secondary_table_index as usize]; let litlen_symbol = secondary_entry >> 4; let litlen_code_bits = (secondary_entry & 0xf) as u8; match litlen_symbol { 0..=255 => { self.consume_bits(litlen_code_bits); litlen_entry = self.compression.litlen_table[(self.buffer & 0xfff) as usize]; self.fill_buffer(remaining_input); output[output_index] = litlen_symbol as u8; output_index += 1; continue; } 256 => { self.consume_bits(litlen_code_bits); self.state = match self.last_block { true => State::Checksum, false => State::BlockHeader, }; break; } _ => ( LEN_SYM_TO_LEN_BASE[litlen_symbol as usize - 257] as u32, LEN_SYM_TO_LEN_EXTRA[litlen_symbol as usize - 257], litlen_code_bits, ), } } else if litlen_code_bits == 0 { return Err(DecompressionError::InvalidLiteralLengthCode); } else { self.consume_bits(litlen_code_bits); self.state = match self.last_block { true => State::Checksum, false => State::BlockHeader, }; break; }; bits >>= litlen_code_bits; let length_extra_mask = (1 << length_extra_bits) - 1; let length = length_base as usize + (bits & length_extra_mask) as usize; bits >>= length_extra_bits; let dist_entry = self.compression.dist_table[(bits & 0x1ff) as usize]; let (dist_base, dist_extra_bits, dist_code_bits) = if dist_entry & LITERAL_ENTRY != 0 { ( (dist_entry >> 16) as u16, (dist_entry >> 8) as u8 & 0xf, dist_entry as u8, ) } else if dist_entry >> 8 == 0 { return Err(DecompressionError::InvalidDistanceCode); } else { let secondary_table_index = (dist_entry >> 16) + ((bits >> 9) as u32 & (dist_entry & 0xff)); let secondary_entry = self.compression.dist_secondary_table[secondary_table_index as usize]; let dist_symbol = (secondary_entry >> 4) as usize; if dist_symbol >= 30 { return Err(DecompressionError::InvalidDistanceCode); } ( DIST_SYM_TO_DIST_BASE[dist_symbol], DIST_SYM_TO_DIST_EXTRA[dist_symbol], (secondary_entry & 0xf) as u8, ) }; bits >>= dist_code_bits; let dist = dist_base as usize + (bits & ((1 << dist_extra_bits) - 1)) as usize; if dist > output_index { return Err(DecompressionError::DistanceTooFarBack); } self.consume_bits( litlen_code_bits + length_extra_bits + dist_code_bits + dist_extra_bits, ); self.fill_buffer(remaining_input); litlen_entry = self.compression.litlen_table[(self.buffer & 0xfff) as usize]; let copy_length = length.min(output.len() - output_index); if dist == 1 { let last = output[output_index - 1]; output[output_index..][..copy_length].fill(last); if copy_length < length { self.queued_rle = Some((last, length - copy_length)); output_index = output.len(); break; } } else if output_index + length + 15 <= output.len() { let start = output_index - dist; output.copy_within(start..start + 16, output_index); if length > 16 || dist < 16 { for i in (0..length).step_by(dist.min(16)).skip(1) { output.copy_within(start + i..start + i + 16, output_index + i); } } } else { if dist < copy_length { for i in 0..copy_length { output[output_index + i] = output[output_index + i - dist]; } } else { output.copy_within( output_index - dist..output_index + copy_length - dist, output_index, ) } if copy_length < length { self.queued_backref = Some((dist, length - copy_length)); output_index = output.len(); break; } } output_index += copy_length; } // Careful decoding loop. // // This loop processes the remaining input when we're too close to the end of the input or // output to use the fast loop. while let State::CompressedData = self.state { self.fill_buffer(remaining_input); if output_index == output.len() { break; } let mut bits = self.buffer; let litlen_entry = self.compression.litlen_table[(bits & 0xfff) as usize]; let litlen_code_bits = litlen_entry as u8; if litlen_entry & LITERAL_ENTRY != 0 { // Fast path: the next symbol is <= 12 bits and a literal, the table specifies the // output bytes and we can directly write them to the output buffer. let advance_output_bytes = ((litlen_entry & 0xf00) >> 8) as usize; if self.nbits < litlen_code_bits { break; } else if output_index + 1 < output.len() { output[output_index] = (litlen_entry >> 16) as u8; output[output_index + 1] = (litlen_entry >> 24) as u8; output_index += advance_output_bytes; self.consume_bits(litlen_code_bits); continue; } else if output_index + advance_output_bytes == output.len() { debug_assert_eq!(advance_output_bytes, 1); output[output_index] = (litlen_entry >> 16) as u8; output_index += 1; self.consume_bits(litlen_code_bits); break; } else { debug_assert_eq!(advance_output_bytes, 2); output[output_index] = (litlen_entry >> 16) as u8; self.queued_rle = Some(((litlen_entry >> 24) as u8, 1)); output_index += 1; self.consume_bits(litlen_code_bits); break; } } let (length_base, length_extra_bits, litlen_code_bits) = if litlen_entry & EXCEPTIONAL_ENTRY == 0 { ( litlen_entry >> 16, (litlen_entry >> 8) as u8, litlen_code_bits, ) } else if litlen_entry & SECONDARY_TABLE_ENTRY != 0 { let secondary_table_index = (litlen_entry >> 16) + ((bits >> 12) as u32 & (litlen_entry & 0xff)); let secondary_entry = self.compression.secondary_table[secondary_table_index as usize]; let litlen_symbol = secondary_entry >> 4; let litlen_code_bits = (secondary_entry & 0xf) as u8; if self.nbits < litlen_code_bits { break; } else if litlen_symbol < 256 { self.consume_bits(litlen_code_bits); output[output_index] = litlen_symbol as u8; output_index += 1; continue; } else if litlen_symbol == 256 { self.consume_bits(litlen_code_bits); self.state = match self.last_block { true => State::Checksum, false => State::BlockHeader, }; break; } ( LEN_SYM_TO_LEN_BASE[litlen_symbol as usize - 257] as u32, LEN_SYM_TO_LEN_EXTRA[litlen_symbol as usize - 257], litlen_code_bits, ) } else if litlen_code_bits == 0 { return Err(DecompressionError::InvalidLiteralLengthCode); } else { if self.nbits < litlen_code_bits { break; } self.consume_bits(litlen_code_bits); self.state = match self.last_block { true => State::Checksum, false => State::BlockHeader, }; break; }; bits >>= litlen_code_bits; let length_extra_mask = (1 << length_extra_bits) - 1; let length = length_base as usize + (bits & length_extra_mask) as usize; bits >>= length_extra_bits; let dist_entry = self.compression.dist_table[(bits & 0x1ff) as usize]; let (dist_base, dist_extra_bits, dist_code_bits) = if dist_entry & LITERAL_ENTRY != 0 { ( (dist_entry >> 16) as u16, (dist_entry >> 8) as u8 & 0xf, dist_entry as u8, ) } else if self.nbits > litlen_code_bits + length_extra_bits + 9 { if dist_entry >> 8 == 0 { return Err(DecompressionError::InvalidDistanceCode); } let secondary_table_index = (dist_entry >> 16) + ((bits >> 9) as u32 & (dist_entry & 0xff)); let secondary_entry = self.compression.dist_secondary_table[secondary_table_index as usize]; let dist_symbol = (secondary_entry >> 4) as usize; if dist_symbol >= 30 { return Err(DecompressionError::InvalidDistanceCode); } ( DIST_SYM_TO_DIST_BASE[dist_symbol], DIST_SYM_TO_DIST_EXTRA[dist_symbol], (secondary_entry & 0xf) as u8, ) } else { break; }; bits >>= dist_code_bits; let dist = dist_base as usize + (bits & ((1 << dist_extra_bits) - 1)) as usize; let total_bits = litlen_code_bits + length_extra_bits + dist_code_bits + dist_extra_bits; if self.nbits < total_bits { break; } else if dist > output_index { return Err(DecompressionError::DistanceTooFarBack); } self.consume_bits(total_bits); let copy_length = length.min(output.len() - output_index); if dist == 1 { let last = output[output_index - 1]; output[output_index..][..copy_length].fill(last); if copy_length < length { self.queued_rle = Some((last, length - copy_length)); output_index = output.len(); break; } } else if output_index + length + 15 <= output.len() { let start = output_index - dist; output.copy_within(start..start + 16, output_index); if length > 16 || dist < 16 { for i in (0..length).step_by(dist.min(16)).skip(1) { output.copy_within(start + i..start + i + 16, output_index + i); } } } else { if dist < copy_length { for i in 0..copy_length { output[output_index + i] = output[output_index + i - dist]; } } else { output.copy_within( output_index - dist..output_index + copy_length - dist, output_index, ) } if copy_length < length { self.queued_backref = Some((dist, length - copy_length)); output_index = output.len(); break; } } output_index += copy_length; } if self.state == State::CompressedData && self.queued_backref.is_none() && self.queued_rle.is_none() && self.nbits >= 15 && self.peak_bits(15) as u16 & self.compression.eof_mask == self.compression.eof_code { self.consume_bits(self.compression.eof_bits); self.state = match self.last_block { true => State::Checksum, false => State::BlockHeader, }; } Ok(output_index) } /// Decompresses a chunk of data. /// /// Returns the number of bytes read from `input` and the number of bytes written to `output`, /// or an error if the deflate stream is not valid. `input` is the compressed data. `output` is /// the buffer to write the decompressed data to, starting at index `output_position`. /// `end_of_input` indicates whether more data may be available in the future. /// /// The contents of `output` after `output_position` are ignored. However, this function may /// write additional data to `output` past what is indicated by the return value. /// /// When this function returns `Ok`, at least one of the following is true: /// - The input is fully consumed. /// - The output is full but there are more bytes to output. /// - The deflate stream is complete (and `is_done` will return true). /// /// # Panics /// /// This function will panic if `output_position` is out of bounds. pub fn read( &mut self, input: &[u8], output: &mut [u8], output_position: usize, end_of_input: bool, ) -> Result<(usize, usize), DecompressionError> { if let State::Done = self.state { return Ok((0, 0)); } assert!(output_position <= output.len()); let mut remaining_input = input; let mut output_index = output_position; if let Some((data, len)) = self.queued_rle.take() { let n = len.min(output.len() - output_index); output[output_index..][..n].fill(data); output_index += n; if n < len { self.queued_rle = Some((data, len - n)); return Ok((0, n)); } } if let Some((dist, len)) = self.queued_backref.take() { let n = len.min(output.len() - output_index); for i in 0..n { output[output_index + i] = output[output_index + i - dist]; } output_index += n; if n < len { self.queued_backref = Some((dist, len - n)); return Ok((0, n)); } } // Main decoding state machine. let mut last_state = None; while last_state != Some(self.state) { last_state = Some(self.state); match self.state { State::ZlibHeader => { self.fill_buffer(&mut remaining_input); if self.nbits < 16 { break; } let input0 = self.peak_bits(8); let input1 = self.peak_bits(16) >> 8 & 0xff; if input0 & 0x0f != 0x08 || (input0 & 0xf0) > 0x70 || input1 & 0x20 != 0 || (input0 << 8 | input1) % 31 != 0 { return Err(DecompressionError::BadZlibHeader); } self.consume_bits(16); self.state = State::BlockHeader; } State::BlockHeader => { self.read_block_header(&mut remaining_input)?; } State::CodeLengthCodes => { self.read_code_length_codes(&mut remaining_input)?; } State::CodeLengths => { self.read_code_lengths(&mut remaining_input)?; } State::CompressedData => { output_index = self.read_compressed(&mut remaining_input, output, output_index)? } State::UncompressedData => { // Drain any bytes from our buffer. debug_assert_eq!(self.nbits % 8, 0); while self.nbits > 0 && self.uncompressed_bytes_left > 0 && output_index < output.len() { output[output_index] = self.peak_bits(8) as u8; self.consume_bits(8); output_index += 1; self.uncompressed_bytes_left -= 1; } // Buffer may contain one additional byte. Clear it to avoid confusion. if self.nbits == 0 { self.buffer = 0; } // Copy subsequent bytes directly from the input. let copy_bytes = (self.uncompressed_bytes_left as usize) .min(remaining_input.len()) .min(output.len() - output_index); output[output_index..][..copy_bytes] .copy_from_slice(&remaining_input[..copy_bytes]); remaining_input = &remaining_input[copy_bytes..]; output_index += copy_bytes; self.uncompressed_bytes_left -= copy_bytes as u16; if self.uncompressed_bytes_left == 0 { self.state = if self.last_block { State::Checksum } else { State::BlockHeader }; } } State::Checksum => { self.fill_buffer(&mut remaining_input); let align_bits = self.nbits % 8; if self.nbits >= 32 + align_bits { self.checksum.write(&output[output_position..output_index]); if align_bits != 0 { self.consume_bits(align_bits); } #[cfg(not(fuzzing))] if !self.ignore_adler32 && (self.peak_bits(32) as u32).swap_bytes() != self.checksum.finish() { return Err(DecompressionError::WrongChecksum); } self.state = State::Done; self.consume_bits(32); break; } } State::Done => unreachable!(), } } if !self.ignore_adler32 && self.state != State::Done { self.checksum.write(&output[output_position..output_index]); } if self.state == State::Done || !end_of_input || output_index == output.len() { let input_left = remaining_input.len(); Ok((input.len() - input_left, output_index - output_position)) } else { Err(DecompressionError::InsufficientInput) } } /// Returns true if the decompressor has finished decompressing the input. pub fn is_done(&self) -> bool { self.state == State::Done } } /// Decompress the given data. pub fn decompress_to_vec(input: &[u8]) -> Result, DecompressionError> { match decompress_to_vec_bounded(input, usize::MAX) { Ok(output) => Ok(output), Err(BoundedDecompressionError::DecompressionError { inner }) => Err(inner), Err(BoundedDecompressionError::OutputTooLarge { .. }) => { unreachable!("Impossible to allocate more than isize::MAX bytes") } } } /// An error encountered while decompressing a deflate stream given a bounded maximum output. pub enum BoundedDecompressionError { /// The input is not a valid deflate stream. DecompressionError { /// The underlying error. inner: DecompressionError, }, /// The output is too large. OutputTooLarge { /// The output decoded so far. partial_output: Vec, }, } impl From for BoundedDecompressionError { fn from(inner: DecompressionError) -> Self { BoundedDecompressionError::DecompressionError { inner } } } /// Decompress the given data, returning an error if the output is larger than /// `maxlen` bytes. pub fn decompress_to_vec_bounded( input: &[u8], maxlen: usize, ) -> Result, BoundedDecompressionError> { let mut decoder = Decompressor::new(); let mut output = vec![0; 1024.min(maxlen)]; let mut input_index = 0; let mut output_index = 0; loop { let (consumed, produced) = decoder.read(&input[input_index..], &mut output, output_index, true)?; input_index += consumed; output_index += produced; if decoder.is_done() || output_index == maxlen { break; } output.resize((output_index + 32 * 1024).min(maxlen), 0); } output.resize(output_index, 0); if decoder.is_done() { Ok(output) } else { Err(BoundedDecompressionError::OutputTooLarge { partial_output: output, }) } } #[cfg(test)] mod tests { use crate::tables::{LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL}; use super::*; use rand::Rng; fn roundtrip(data: &[u8]) { let compressed = crate::compress_to_vec(data); let decompressed = decompress_to_vec(&compressed).unwrap(); assert_eq!(&decompressed, data); } fn roundtrip_miniz_oxide(data: &[u8]) { let compressed = miniz_oxide::deflate::compress_to_vec_zlib(data, 3); let decompressed = decompress_to_vec(&compressed).unwrap(); assert_eq!(decompressed.len(), data.len()); for (i, (a, b)) in decompressed.chunks(1).zip(data.chunks(1)).enumerate() { assert_eq!(a, b, "chunk {}..{}", i, i + 1); } assert_eq!(&decompressed, data); } #[allow(unused)] fn compare_decompression(data: &[u8]) { // let decompressed0 = flate2::read::ZlibDecoder::new(std::io::Cursor::new(&data)) // .bytes() // .collect::, _>>() // .unwrap(); let decompressed = decompress_to_vec(data).unwrap(); let decompressed2 = miniz_oxide::inflate::decompress_to_vec_zlib(data).unwrap(); for i in 0..decompressed.len().min(decompressed2.len()) { if decompressed[i] != decompressed2[i] { panic!( "mismatch at index {} {:?} {:?}", i, &decompressed[i.saturating_sub(1)..(i + 16).min(decompressed.len())], &decompressed2[i.saturating_sub(1)..(i + 16).min(decompressed2.len())] ); } } if decompressed != decompressed2 { panic!( "length mismatch {} {} {:x?}", decompressed.len(), decompressed2.len(), &decompressed2[decompressed.len()..][..16] ); } //assert_eq!(decompressed, decompressed2); } #[test] fn tables() { for (i, &bits) in LEN_SYM_TO_LEN_EXTRA.iter().enumerate() { let len_base = LEN_SYM_TO_LEN_BASE[i]; for j in 0..(1 << bits) { if i == 27 && j == 31 { continue; } assert_eq!(LENGTH_TO_LEN_EXTRA[len_base + j - 3], bits, "{} {}", i, j); assert_eq!( LENGTH_TO_SYMBOL[len_base + j - 3], i as u16 + 257, "{} {}", i, j ); } } } #[test] fn fixed_tables() { let mut compression = CompressedBlock { litlen_table: Box::new([0; 4096]), dist_table: Box::new([0; 512]), secondary_table: Vec::new(), dist_secondary_table: Vec::new(), eof_code: 0, eof_mask: 0, eof_bits: 0, }; Decompressor::build_tables(288, &FIXED_CODE_LENGTHS, &mut compression).unwrap(); assert_eq!(compression.litlen_table[..512], FIXED_LITLEN_TABLE); assert_eq!(compression.dist_table[..32], FIXED_DIST_TABLE); } #[test] fn it_works() { roundtrip(b"Hello world!"); } #[test] fn constant() { roundtrip_miniz_oxide(&[0; 50]); roundtrip_miniz_oxide(&vec![5; 2048]); roundtrip_miniz_oxide(&vec![128; 2048]); roundtrip_miniz_oxide(&vec![254; 2048]); } #[test] fn random() { let mut rng = rand::thread_rng(); let mut data = vec![0; 50000]; for _ in 0..10 { for byte in &mut data { *byte = rng.gen::() % 5; } println!("Random data: {:?}", data); roundtrip_miniz_oxide(&data); } } #[test] fn ignore_adler32() { let mut compressed = crate::compress_to_vec(b"Hello world!"); let last_byte = compressed.len() - 1; compressed[last_byte] = compressed[last_byte].wrapping_add(1); match decompress_to_vec(&compressed) { Err(DecompressionError::WrongChecksum) => {} r => panic!("expected WrongChecksum, got {:?}", r), } let mut decompressor = Decompressor::new(); decompressor.ignore_adler32(); let mut decompressed = vec![0; 1024]; let decompressed_len = decompressor .read(&compressed, &mut decompressed, 0, true) .unwrap() .1; assert_eq!(&decompressed[..decompressed_len], b"Hello world!"); } #[test] fn checksum_after_eof() { let input = b"Hello world!"; let compressed = crate::compress_to_vec(input); let mut decompressor = Decompressor::new(); let mut decompressed = vec![0; 1024]; let (input_consumed, output_written) = decompressor .read( &compressed[..compressed.len() - 1], &mut decompressed, 0, false, ) .unwrap(); assert_eq!(output_written, input.len()); assert_eq!(input_consumed, compressed.len() - 1); let (input_consumed, output_written) = decompressor .read( &compressed[input_consumed..], &mut decompressed[..output_written], output_written, true, ) .unwrap(); assert!(decompressor.is_done()); assert_eq!(input_consumed, 1); assert_eq!(output_written, 0); assert_eq!(&decompressed[..input.len()], input); } #[test] fn zero_length() { let mut compressed = crate::compress_to_vec(b"").to_vec(); // Splice in zero-length non-compressed blocks. for _ in 0..10 { println!("compressed len: {}", compressed.len()); compressed.splice(2..2, [0u8, 0, 0, 0xff, 0xff].into_iter()); } // Ensure that the full input is decompressed, regardless of whether // `end_of_input` is set. for end_of_input in [true, false] { let mut decompressor = Decompressor::new(); let (input_consumed, output_written) = decompressor .read(&compressed, &mut [], 0, end_of_input) .unwrap(); assert!(decompressor.is_done()); assert_eq!(input_consumed, compressed.len()); assert_eq!(output_written, 0); } } mod test_utils; use tables::FIXED_CODE_LENGTHS; use test_utils::{decompress_by_chunks, TestDecompressionError}; fn verify_no_sensitivity_to_input_chunking( input: &[u8], ) -> Result, TestDecompressionError> { let r_whole = decompress_by_chunks(input, vec![input.len()], false); let r_bytewise = decompress_by_chunks(input, std::iter::repeat(1), false); assert_eq!(r_whole, r_bytewise); r_whole // Returning an arbitrary result, since this is equal to `r_bytewise`. } /// This is a regression test found by the `buf_independent` fuzzer from the `png` crate. When /// this test case was found, the results were unexpectedly different when 1) decompressing the /// whole input (successful result) vs 2) decompressing byte-by-byte /// (`Err(InvalidDistanceCode)`). #[test] fn test_input_chunking_sensitivity_when_handling_distance_codes() { let result = verify_no_sensitivity_to_input_chunking(include_bytes!( "../tests/input-chunking-sensitivity-example1.zz" )) .unwrap(); assert_eq!(result.len(), 281); assert_eq!(simd_adler32::adler32(&result.as_slice()), 751299); } /// This is a regression test found by the `inflate_bytewise3` fuzzer from the `fdeflate` /// crate. When this test case was found, the results were unexpectedly different when 1) /// decompressing the whole input (`Err(DistanceTooFarBack)`) vs 2) decompressing byte-by-byte /// (successful result)`). #[test] fn test_input_chunking_sensitivity_when_no_end_of_block_symbol_example1() { let err = verify_no_sensitivity_to_input_chunking(include_bytes!( "../tests/input-chunking-sensitivity-example2.zz" )) .unwrap_err(); assert_eq!( err, TestDecompressionError::ProdError(DecompressionError::BadLiteralLengthHuffmanTree) ); } /// This is a regression test found by the `inflate_bytewise3` fuzzer from the `fdeflate` /// crate. When this test case was found, the results were unexpectedly different when 1) /// decompressing the whole input (`Err(InvalidDistanceCode)`) vs 2) decompressing byte-by-byte /// (successful result)`). #[test] fn test_input_chunking_sensitivity_when_no_end_of_block_symbol_example2() { let err = verify_no_sensitivity_to_input_chunking(include_bytes!( "../tests/input-chunking-sensitivity-example3.zz" )) .unwrap_err(); assert_eq!( err, TestDecompressionError::ProdError(DecompressionError::BadLiteralLengthHuffmanTree) ); } } fdeflate-0.3.7/src/huffman.rs000064400000000000000000000153121046102023000141700ustar 00000000000000use crate::decompress::{EXCEPTIONAL_ENTRY, LITERAL_ENTRY, SECONDARY_TABLE_ENTRY}; /// Return the next code, or if the codeword is already all ones (which is the final code), return /// the same code again. fn next_codeword(mut codeword: u16, table_size: u16) -> u16 { if codeword == table_size - 1 { return codeword; } let adv = (u16::BITS - 1) - (codeword ^ (table_size - 1)).leading_zeros(); let bit = 1 << adv; codeword &= bit - 1; codeword |= bit; codeword } #[allow(clippy::needless_range_loop)] pub fn build_table( lengths: &[u8], entries: &[u32], codes: &mut [u16], primary_table: &mut [u32], secondary_table: &mut Vec, is_distance_table: bool, double_literal: bool, ) -> bool { // Count the number of symbols with each code length. let mut histogram = [0; 16]; for &length in lengths { histogram[length as usize] += 1; } // Determine the maximum code length. let mut max_length = 15; while max_length > 1 && histogram[max_length] == 0 { max_length -= 1; } // Handle zero and one symbol huffman codes (which are only allowed for distance codes). if is_distance_table { if max_length == 0 { primary_table.fill(0); secondary_table.clear(); return true; } else if max_length == 1 && histogram[1] == 1 { let symbol = lengths.iter().position(|&l| l == 1).unwrap(); codes[symbol] = 0; let entry = entries .get(symbol) .cloned() .unwrap_or((symbol as u32) << 16) | 1; for chunk in primary_table.chunks_mut(2) { chunk[0] = entry; chunk[1] = 0; } return true; } } // Sort symbols by code length. Given the histogram, we can determine the starting offset // for each code length. let mut offsets = [0; 16]; let mut codespace_used = 0; offsets[1] = histogram[0]; for i in 1..max_length { offsets[i + 1] = offsets[i] + histogram[i]; codespace_used = (codespace_used << 1) + histogram[i]; } codespace_used = (codespace_used << 1) + histogram[max_length]; // Check that the provided lengths form a valid Huffman tree. if codespace_used != (1 << max_length) { return false; } // Sort the symbols by code length. let mut next_index = offsets; let mut sorted_symbols = [0; 288]; for symbol in 0..lengths.len() { let length = lengths[symbol]; sorted_symbols[next_index[length as usize]] = symbol; next_index[length as usize] += 1; } let mut codeword = 0u16; let mut i = histogram[0]; // Populate the primary decoding table let primary_table_bits = primary_table.len().ilog2() as usize; let primary_table_mask = (1 << primary_table_bits) - 1; for length in 1..=primary_table_bits { let current_table_end = 1 << length; // Loop over all symbols with the current code length and set their table entries. for _ in 0..histogram[length] { let symbol = sorted_symbols[i]; i += 1; primary_table[codeword as usize] = entries .get(symbol) .cloned() .unwrap_or((symbol as u32) << 16) | length as u32; codes[symbol] = codeword; codeword = next_codeword(codeword, current_table_end as u16); } if double_literal { for len1 in 1..(length - 1) { let len2 = length - len1; for sym1_index in offsets[len1]..next_index[len1] { for sym2_index in offsets[len2]..next_index[len2] { let sym1 = sorted_symbols[sym1_index]; let sym2 = sorted_symbols[sym2_index]; if sym1 < 256 && sym2 < 256 { let codeword1 = codes[sym1]; let codeword2 = codes[sym2]; let codeword = codeword1 | (codeword2 << len1); let entry = (sym1 as u32) << 16 | (sym2 as u32) << 24 | LITERAL_ENTRY | (2 << 8); primary_table[codeword as usize] = entry | (length as u32); } } } } } // If we aren't at the maximum table size, double the size of the table. if length < primary_table_bits { primary_table.copy_within(0..current_table_end, current_table_end); } } // Populate the secondary decoding table. secondary_table.clear(); if max_length > primary_table_bits { let mut subtable_start = 0; let mut subtable_prefix = !0; for length in (primary_table_bits + 1)..=max_length { let subtable_size = 1 << (length - primary_table_bits); for _ in 0..histogram[length] { // If the codeword's prefix doesn't match the current subtable, create a new // subtable. if codeword & primary_table_mask != subtable_prefix { subtable_prefix = codeword & primary_table_mask; subtable_start = secondary_table.len(); primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16) | EXCEPTIONAL_ENTRY | SECONDARY_TABLE_ENTRY | (subtable_size as u32 - 1); secondary_table.resize(subtable_start + subtable_size, 0); } // Lookup the symbol. let symbol = sorted_symbols[i]; i += 1; // Insert the symbol into the secondary table and advance to the next codeword. codes[symbol] = codeword; secondary_table[subtable_start + (codeword >> primary_table_bits) as usize] = ((symbol as u16) << 4) | (length as u16); codeword = next_codeword(codeword, 1 << length); } // If there are more codes with the same subtable prefix, extend the subtable. if length < max_length && codeword & primary_table_mask == subtable_prefix { secondary_table.extend_from_within(subtable_start..); let subtable_size = secondary_table.len() - subtable_start; primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16) | EXCEPTIONAL_ENTRY | SECONDARY_TABLE_ENTRY | (subtable_size as u32 - 1); } } } true } fdeflate-0.3.7/src/lib.rs000064400000000000000000000074541046102023000133220ustar 00000000000000//! A fast deflate implementation. //! //! This crate contains an optimized implementation of the deflate algorithm tuned to compress PNG //! images. It is compatible with standard zlib, but make a bunch of simplifying assumptions that //! drastically improve encoding performance: //! //! - Exactly one block per deflate stream. //! - No distance codes except for run length encoding of zeros. //! - A single fixed huffman tree trained on a large corpus of PNG images. //! - All huffman codes are 12 bits or less. //! //! It also contains a fast decompressor that supports arbitrary zlib streams but does especially //! well on streams that meet the above assumptions. //! //! # Inspiration //! //! The algorithms in this crate take inspiration from multiple sources: //! * [fpnge](https://github.com/veluca93/fpnge) //! * [zune-inflate](https://github.com/etemesi254/zune-image/tree/main/zune-inflate) //! * [RealTime Data Compression blog](https://fastcompression.blogspot.com/2015/10/huffman-revisited-part-4-multi-bytes.html) #![forbid(unsafe_code)] #![warn(missing_docs)] mod compress; mod decompress; mod huffman; mod tables; pub use compress::{compress_to_vec, Compressor, StoredOnlyCompressor}; pub use decompress::{ decompress_to_vec, decompress_to_vec_bounded, BoundedDecompressionError, DecompressionError, Decompressor, }; /// Build a length limited huffman tree. /// /// Dynamic programming algorithm from fpnge. #[doc(hidden)] pub fn compute_code_lengths( freqs: &[u64], min_limit: &[u8], max_limit: &[u8], calculated_nbits: &mut [u8], ) { debug_assert_eq!(freqs.len(), min_limit.len()); debug_assert_eq!(freqs.len(), max_limit.len()); debug_assert_eq!(freqs.len(), calculated_nbits.len()); let len = freqs.len(); for i in 0..len { debug_assert!(min_limit[i] >= 1); debug_assert!(min_limit[i] <= max_limit[i]); } let precision = *max_limit.iter().max().unwrap(); let num_patterns = 1 << precision; let mut dynp = vec![u64::MAX; (num_patterns + 1) * (len + 1)]; let index = |sym: usize, off: usize| sym * (num_patterns + 1) + off; dynp[index(0, 0)] = 0; for sym in 0..len { for bits in min_limit[sym]..=max_limit[sym] { let off_delta = 1 << (precision - bits); for off in 0..=num_patterns.saturating_sub(off_delta) { dynp[index(sym + 1, off + off_delta)] = dynp[index(sym, off)] .saturating_add(freqs[sym] * u64::from(bits)) .min(dynp[index(sym + 1, off + off_delta)]); } } } let mut sym = len; let mut off = num_patterns; while sym > 0 { sym -= 1; assert!(off > 0); for bits in min_limit[sym]..=max_limit[sym] { let off_delta = 1 << (precision - bits); if off_delta <= off && dynp[index(sym + 1, off)] == dynp[index(sym, off - off_delta)] .saturating_add(freqs[sym] * u64::from(bits)) { off -= off_delta; calculated_nbits[sym] = bits; break; } } } for i in 0..len { debug_assert!(calculated_nbits[i] >= min_limit[i]); debug_assert!(calculated_nbits[i] <= max_limit[i]); } } const fn compute_codes(lengths: &[u8; NSYMS]) -> Option<[u16; NSYMS]> { let mut codes = [0u16; NSYMS]; let mut code = 0u32; let mut len = 1; while len <= 16 { let mut i = 0; while i < lengths.len() { if lengths[i] == len { codes[i] = (code as u16).reverse_bits() >> (16 - len); code += 1; } i += 1; } code <<= 1; len += 1; } if code == 2 << 16 { Some(codes) } else { None } } fdeflate-0.3.7/src/tables.rs000064400000000000000000000306711046102023000140230ustar 00000000000000use crate::decompress::{EXCEPTIONAL_ENTRY, LITERAL_ENTRY}; /// Hard-coded Huffman codes used regardless of the input. /// /// These values work well for PNGs with some form of filtering enabled, but will likely make most /// other inputs worse. pub(crate) const HUFFMAN_LENGTHS: [u8; 286] = [ 2, 3, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 8, 9, 8, 8, 8, 8, 8, 7, 7, 7, 6, 6, 6, 5, 4, 3, 12, 12, 12, 9, 9, 11, 10, 11, 11, 10, 11, 11, 11, 11, 11, 11, 12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 9, ]; pub(crate) const HUFFMAN_CODES: [u16; 286] = match crate::compute_codes(&HUFFMAN_LENGTHS) { Some(codes) => codes, None => panic!("HUFFMAN_LENGTHS is invalid"), }; /// Length code for length values (derived from deflate spec). pub(crate) const LENGTH_TO_SYMBOL: [u16; 256] = [ 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285, ]; /// Number of extra bits for length values (derived from deflate spec). pub(crate) const LENGTH_TO_LEN_EXTRA: [u8; 256] = [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, ]; pub(crate) const BITMASKS: [u32; 17] = [ 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, ]; /// Order of the length code length alphabet (derived from deflate spec). pub(crate) const CLCL_ORDER: [usize; 19] = [ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, ]; /// Number of extra bits for each length code (derived from deflate spec). pub(crate) const LEN_SYM_TO_LEN_EXTRA: [u8; 29] = [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, ]; /// The base length for each length code (derived from deflate spec). pub(crate) const LEN_SYM_TO_LEN_BASE: [usize; 29] = [ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, ]; /// Number of extra bits for each distance code (derived from deflate spec.) pub(crate) const DIST_SYM_TO_DIST_EXTRA: [u8; 30] = [ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, ]; /// The base distance for each distance code (derived from deflate spec). pub(crate) const DIST_SYM_TO_DIST_BASE: [u16; 30] = [ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, ]; /// The main litlen_table uses a 12-bit input to lookup the meaning of the symbol. The table is /// split into 4 sections: /// /// aaaaaaaa_bbbbbbbb_1000yyyy_0000xxxx x = input_advance_bits, y = output_advance_bytes (literal) /// 0000000z_zzzzzzzz_00000yyy_0000xxxx x = input_advance_bits, y = extra_bits, z = distance_base (length) /// 00000000_00000000_01000000_0000xxxx x = input_advance_bits (EOF) /// 0000xxxx_xxxxxxxx_01100000_00000000 x = secondary_table_index /// 00000000_00000000_01000000_00000000 invalid code pub(crate) const LITLEN_TABLE_ENTRIES: [u32; 288] = { let mut entries = [EXCEPTIONAL_ENTRY; 288]; let mut i = 0; while i < 256 { entries[i] = (i as u32) << 16 | LITERAL_ENTRY | (1 << 8); i += 1; } let mut i = 257; while i < 286 { entries[i] = (LEN_SYM_TO_LEN_BASE[i - 257] as u32) << 16 | (LEN_SYM_TO_LEN_EXTRA[i - 257] as u32) << 8; i += 1; } entries }; /// The distance table is a 512-entry table that maps 9 bits of distance symbols to their meaning. /// /// 00000000_00000000_00000000_00000000 symbol is more than 9 bits /// zzzzzzzz_zzzzzzzz_0000yyyy_0000xxxx x = input_advance_bits, y = extra_bits, z = distance_base pub(crate) const DISTANCE_TABLE_ENTRIES: [u32; 32] = { let mut entries = [0; 32]; let mut i = 0; while i < 30 { entries[i] = (DIST_SYM_TO_DIST_BASE[i] as u32) << 16 | (DIST_SYM_TO_DIST_EXTRA[i] as u32) << 8 | LITERAL_ENTRY; i += 1; } entries }; pub(crate) const FIXED_LITLEN_TABLE: [u32; 512] = [ 16391, 5275912, 1081608, 7537672, 2032135, 7373064, 3178760, 12615945, 655367, 6324488, 2130184, 10518793, 33032, 8421640, 4227336, 14713097, 393223, 5800200, 1605896, 9470217, 3867399, 7897352, 3703048, 13664521, 1114375, 6848776, 2654472, 11567369, 557320, 8945928, 4751624, 15761673, 262151, 5538056, 1343752, 14877960, 2818823, 7635208, 3440904, 13140233, 852231, 6586632, 2392328, 11043081, 295176, 8683784, 4489480, 15237385, 524295, 6062344, 1868040, 9994505, 5440519, 8159496, 3965192, 14188809, 1507847, 7110920, 2916616, 12091657, 819464, 9208072, 5013768, 16285961, 196615, 5406984, 1212680, 10683656, 2294535, 7504136, 3309832, 12878089, 721159, 6455560, 2261256, 10780937, 164104, 8552712, 4358408, 14975241, 458759, 5931272, 1736968, 9732361, 4391943, 8028424, 3834120, 13926665, 1245703, 6979848, 2785544, 11829513, 688392, 9077000, 4882696, 16023817, 327687, 5669128, 1474824, 16392, 3343111, 7766280, 3571976, 13402377, 983303, 6717704, 2523400, 11305225, 426248, 8814856, 4620552, 15499529, 589831, 6193416, 1999112, 10256649, 6489095, 8290568, 4096264, 14450953, 1769991, 7241992, 3047688, 12353801, 950536, 9339144, 5144840, 16548105, 16391, 5341448, 1147144, 8586504, 2032135, 7438600, 3244296, 12747017, 655367, 6390024, 2195720, 10649865, 98568, 8487176, 4292872, 14844169, 393223, 5865736, 1671432, 9601289, 3867399, 7962888, 3768584, 13795593, 1114375, 6914312, 2720008, 11698441, 622856, 9011464, 4817160, 15892745, 262151, 5603592, 1409288, 16908296, 2818823, 7700744, 3506440, 13271305, 852231, 6652168, 2457864, 11174153, 360712, 8749320, 4555016, 15368457, 524295, 6127880, 1933576, 10125577, 5440519, 8225032, 4030728, 14319881, 1507847, 7176456, 2982152, 12222729, 885000, 9273608, 5079304, 16417033, 196615, 5472520, 1278216, 12780808, 2294535, 7569672, 3375368, 13009161, 721159, 6521096, 2326792, 10912009, 229640, 8618248, 4423944, 15106313, 458759, 5996808, 1802504, 9863433, 4391943, 8093960, 3899656, 14057737, 1245703, 7045384, 2851080, 11960585, 753928, 9142536, 4948232, 16154889, 327687, 5734664, 1540360, 16392, 3343111, 7831816, 3637512, 13533449, 983303, 6783240, 2588936, 11436297, 491784, 8880392, 4686088, 15630601, 589831, 6258952, 2064648, 10387721, 6489095, 8356104, 4161800, 14582025, 1769991, 7307528, 3113224, 12484873, 1016072, 9404680, 5210376, 16679177, 16391, 5275912, 1081608, 7537672, 2032135, 7373064, 3178760, 12681481, 655367, 6324488, 2130184, 10584329, 33032, 8421640, 4227336, 14778633, 393223, 5800200, 1605896, 9535753, 3867399, 7897352, 3703048, 13730057, 1114375, 6848776, 2654472, 11632905, 557320, 8945928, 4751624, 15827209, 262151, 5538056, 1343752, 14877960, 2818823, 7635208, 3440904, 13205769, 852231, 6586632, 2392328, 11108617, 295176, 8683784, 4489480, 15302921, 524295, 6062344, 1868040, 10060041, 5440519, 8159496, 3965192, 14254345, 1507847, 7110920, 2916616, 12157193, 819464, 9208072, 5013768, 16351497, 196615, 5406984, 1212680, 10683656, 2294535, 7504136, 3309832, 12943625, 721159, 6455560, 2261256, 10846473, 164104, 8552712, 4358408, 15040777, 458759, 5931272, 1736968, 9797897, 4391943, 8028424, 3834120, 13992201, 1245703, 6979848, 2785544, 11895049, 688392, 9077000, 4882696, 16089353, 327687, 5669128, 1474824, 16392, 3343111, 7766280, 3571976, 13467913, 983303, 6717704, 2523400, 11370761, 426248, 8814856, 4620552, 15565065, 589831, 6193416, 1999112, 10322185, 6489095, 8290568, 4096264, 14516489, 1769991, 7241992, 3047688, 12419337, 950536, 9339144, 5144840, 16613641, 16391, 5341448, 1147144, 8586504, 2032135, 7438600, 3244296, 12812553, 655367, 6390024, 2195720, 10715401, 98568, 8487176, 4292872, 14909705, 393223, 5865736, 1671432, 9666825, 3867399, 7962888, 3768584, 13861129, 1114375, 6914312, 2720008, 11763977, 622856, 9011464, 4817160, 15958281, 262151, 5603592, 1409288, 16908296, 2818823, 7700744, 3506440, 13336841, 852231, 6652168, 2457864, 11239689, 360712, 8749320, 4555016, 15433993, 524295, 6127880, 1933576, 10191113, 5440519, 8225032, 4030728, 14385417, 1507847, 7176456, 2982152, 12288265, 885000, 9273608, 5079304, 16482569, 196615, 5472520, 1278216, 12780808, 2294535, 7569672, 3375368, 13074697, 721159, 6521096, 2326792, 10977545, 229640, 8618248, 4423944, 15171849, 458759, 5996808, 1802504, 9928969, 4391943, 8093960, 3899656, 14123273, 1245703, 7045384, 2851080, 12026121, 753928, 9142536, 4948232, 16220425, 327687, 5734664, 1540360, 16392, 3343111, 7831816, 3637512, 13598985, 983303, 6783240, 2588936, 11501833, 491784, 8880392, 4686088, 15696137, 589831, 6258952, 2064648, 10453257, 6489095, 8356104, 4161800, 14647561, 1769991, 7307528, 3113224, 12550409, 1016072, 9404680, 5210376, 16744713, ]; pub(crate) const FIXED_DIST_TABLE: [u32; 32] = [ 98309, 16877317, 1147653, 268536581, 360709, 67209477, 4293893, 1073843461, 229381, 33654789, 2196485, 536972293, 623109, 134318597, 8488453, 5, 163845, 25265925, 1671941, 402754309, 491781, 100763909, 6391045, 1610714373, 294917, 50432005, 3245061, 805407749, 885253, 201427461, 12682757, 5, ]; #[cfg(test)] pub(crate) const FIXED_CODE_LENGTHS: [u8; 320] = make_fixed_code_lengths(); #[cfg(test)] const fn make_fixed_code_lengths() -> [u8; 320] { let mut i = 0; let mut lengths = [0; 320]; while i < 144 { lengths[i] = 8; i += 1; } while i < 256 { lengths[i] = 9; i += 1; } while i < 280 { lengths[i] = 7; i += 1; } while i < 288 { lengths[i] = 8; i += 1; } while i < 320 { lengths[i] = 5; i += 1; } lengths } fdeflate-0.3.7/tests/input-chunking-sensitivity-example1.zz000064400000000000000000000003171046102023000222020ustar 00000000000000xwU;ts9$I$#EQPŀx0(" ((J s tt{s>w ߨs7MsɒArEA "TE% D934448~nx:>5y #㾵ޙj\\\9fdeflate-0.3.7/tests/input-chunking-sensitivity-example2.zz000064400000000000000000000000511046102023000221760ustar 00000000000000X ! z0[fdeflate-0.3.7/tests/input-chunking-sensitivity-example3.zz000064400000000000000000000000641046102023000222030ustar 00000000000000X @!