bytelines-2.5.0/.cargo_vcs_info.json0000644000000001360000000000100130360ustar { "git": { "sha1": "62be7095491ec84d5a4e72bf4d0a241d49b7c907" }, "path_in_vcs": "" }bytelines-2.5.0/.github/workflows/ci.yml000064400000000000000000000016651046102023000163510ustar 00000000000000name: CI on: push: branches: [ main ] pull_request: branches: [ main ] jobs: build: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: - macos-latest - ubuntu-latest - windows-latest rust: - stable - beta - nightly steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: ${{ matrix.rust }} override: true components: rustfmt, clippy - uses: actions-rs/cargo@v1 with: command: build - uses: actions-rs/cargo@v1 with: command: test - uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check - uses: actions-rs/cargo@v1 with: command: clippy args: --all --all-features --profile test bytelines-2.5.0/.gitignore000064400000000000000000000000361046102023000136150ustar 00000000000000/target **/*.rs.bk Cargo.lock bytelines-2.5.0/Cargo.toml0000644000000022030000000000100110310ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "bytelines" version = "2.5.0" authors = ["Isaac Whitfield "] description = "Read input lines as byte slices for high efficiency" readme = "README.md" keywords = [ "lines", "bytes", ] categories = [ "parsing", "text-processing", ] license = "MIT" repository = "https://github.com/whitfin/bytelines" [dependencies.futures-util] version = "0.3" optional = true default-features = false [dependencies.tokio] version = "1.14" features = [ "fs", "io-util", ] optional = true [dev-dependencies.tokio] version = "1.14" features = ["full"] [features] default = ["tokio"] tokio = [ "dep:tokio", "futures-util", ] bytelines-2.5.0/Cargo.toml.orig000064400000000000000000000012541046102023000145170ustar 00000000000000[package] name = "bytelines" version = "2.5.0" # remember to update html_root_url authors = ["Isaac Whitfield "] description = "Read input lines as byte slices for high efficiency" repository = "https://github.com/whitfin/bytelines" keywords = ["lines", "bytes"] categories = ["parsing", "text-processing"] readme = "README.md" edition = "2018" license = "MIT" [dependencies] futures-util = {version = "0.3", optional = true, default-features = false } tokio = { version = "1.14", features = ["fs", "io-util"], optional = true} [dev-dependencies] tokio = { version = "1.14", features = ["full"] } [features] default = ["tokio"] tokio = ["dep:tokio", "futures-util"] bytelines-2.5.0/LICENSE000064400000000000000000000020601046102023000126310ustar 00000000000000MIT License Copyright (c) 2022 Isaac Whitfield Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. bytelines-2.5.0/README.md000064400000000000000000000063201046102023000131060ustar 00000000000000# bytelines [![Build Status](https://img.shields.io/github/actions/workflow/status/whitfin/bytelines/ci.yml)](https://github.com/whitfin/bytelines/actions) [![Crates.io](https://img.shields.io/crates/v/bytelines.svg)](https://crates.io/crates/bytelines) This library provides an easy way to read in input lines as byte slices for high efficiency. It's basically [lines](https://doc.rust-lang.org/std/io/trait.BufRead.html#method.lines) from the standard library, but it reads each line as a byte slice (`&[u8]`). This performs significantly faster than `lines()` in the case you don't particularly care about unicode, and basically as fast as writing the loops out by hand. Although the code itself is somewhat trivial, I've had to roll this in at least 4 tools I've written recently and so I figured it was time to have a convenience crate for it. ### Installation This tool will be available via [Crates.io](https://crates.io/crates/bytelines), so you can add it as a dependency in your `Cargo.toml`: ```toml [dependencies] bytelines = "2.5" ``` ### Usage It's quite simple; in the place you would typically call `lines` on a `BufRead` implementor, you can now use `bytelines` to retrieve a structure used to walk over lines as `&[u8]` (and thus avoid allocations). There are two ways to use the API, and both are shown below: ```rust // our input file we're going to walk over lines of, and our reader let file = File::open("./my-input.txt").expect("able to open file"); let reader = BufReader::new(file); let mut lines = ByteLines::new(reader); // Option 1: Walk using a `while` loop. // // This is the most performant option, as it avoids an allocation by // simply referencing bytes inside the reading structure. This means // that there's no copying at all, until the developer chooses to. while let Some(line) = lines.next() { // do something with the line } // Option 2: Use the `Iterator` trait. // // This is more idiomatic, but requires allocating each line into // an owned `Vec` to avoid potential memory safety issues. Although // there is an allocation here, the overhead should be negligible // except in cases where performance is paramount. for line in lines.into_iter() { // do something with the line } ``` As of v2.3 this crate includes fairly minimal support for Tokio, namely the `AsyncBufRead` trait. This looks fairly similar to the base APIs, and can be used in much the same way. ```rust // configure our inputs again, using `AsyncByteLines`. let file = File::open("./my-input.txt").await?; let reader = BufReader::new(file); let mut lines = AsyncByteLines::new(reader); // walk through all lines using a `while` loop while let Some(line) = lines.next().await? { // do something with the line } // walk through all lines using `Stream` APIs lines.into_stream().for_each(|line| { }); ``` The main difference is that the Tokio implementations yield `Result, _>` instead of `Option>` for consistency with the exiting Tokio APIs. If you don't want Tokio support, please disable default features: ```toml [dependencies] bytelines = { version = "2.5", default-features = false } ``` This will be removed as a default feature in the next major bump (v3.0), but for now you can exclude it this way. bytelines-2.5.0/res/empty.txt000064400000000000000000000000011046102023000143050ustar 00000000000000 bytelines-2.5.0/res/numbers.txt000064400000000000000000000000241046102023000146270ustar 000000000000000 1 2 3 4 5 6 7 8 9 bytelines-2.5.0/src/lib.rs000064400000000000000000000026161046102023000135360ustar 00000000000000//! `Bytelines` is a simple library crate which offers line iteration for //! `BufRead` via `&[u8]` rather than `String`. //! //! Due to the removal of checking for `String` validity, this is typically //! much faster for reading in raw data and much more flexible. The APIs //! offered in this crate are intended to function exactly the same as the //! `lines` function inside the `BufRead` trait, except that the bytes which //! precede the line delimiter are not validated. //! //! Performance of [ByteLines](enum.ByteLines.html) is practically identical //! to that of writing a `loop` manually, due to the avoidance of allocations. #![doc(html_root_url = "https://docs.rs/bytelines/2.5.0")] use ::std::io::BufRead; #[cfg(feature = "tokio")] use ::tokio::io::AsyncBufRead; // mods mod std; mod util; #[cfg(feature = "tokio")] mod tokio; // expose all public APIs to keep the v2.x interface the same pub use crate::std::{ByteLines, ByteLinesIter, ByteLinesReader}; #[cfg(feature = "tokio")] pub use crate::tokio::AsyncByteLines; /// Creates a new line reader from a stdlib `BufRead`. #[inline] pub fn from_std(reader: B) -> ByteLines where B: BufRead, { ByteLines::new(reader) } /// Creates a new line reader from a Tokio `AsyncBufRead`. #[cfg(feature = "tokio")] #[inline] pub fn from_tokio(reader: B) -> AsyncByteLines where B: AsyncBufRead + Unpin, { AsyncByteLines::new(reader) } bytelines-2.5.0/src/std.rs000064400000000000000000000130611046102023000135560ustar 00000000000000//! Module exposing APIs based around `BufRead` from stdlib. use std::io::{BufRead, Error}; /// Provides iteration over bytes of input, split by line. /// /// Unlike the implementation in the standard library, this requires /// no allocations and simply references the input lines from the /// internal buffer. In order to do this safely, we must sacrifice /// the `Iterator` API, and operate using `while` syntax: /// /// ```rust /// use bytelines::*; /// use std::fs::File; /// use std::io::BufReader; /// /// // construct our iterator from our file input /// let file = File::open("./res/numbers.txt").unwrap(); /// let reader = BufReader::new(file); /// let mut lines = ByteLines::new(reader); /// /// // walk our lines using `while` syntax /// while let Some(line) = lines.next() { /// // do something with the line, which is Result<&[u8], _> /// } /// ``` /// /// For those who prefer the `Iterator` API, this structure implements /// the `IntoIterator` trait to provide it. This comes at the cost of /// an allocation of a `Vec` for each line in the `Iterator`. This is /// negligible in many cases, so often it comes down to which syntax /// is preferred: /// /// ```rust /// use bytelines::*; /// use std::fs::File; /// use std::io::BufReader; /// /// // construct our iterator from our file input /// let file = File::open("./res/numbers.txt").unwrap(); /// let reader = BufReader::new(file); /// let mut lines = ByteLines::new(reader); /// /// // walk our lines using `for` syntax /// for line in lines.into_iter() { /// // do something with the line, which is Result, _> /// } /// ``` pub struct ByteLines where B: BufRead, { buffer: Vec, reader: B, } impl ByteLines where B: BufRead, { /// Constructs a new `ByteLines` from an input `BufRead`. pub fn new(buf: B) -> Self { Self { buffer: Vec::new(), reader: buf, } } /// Retrieves a reference to the next line of bytes in the reader (if any). pub fn next(&mut self) -> Option> { self.buffer.clear(); crate::util::handle_line( self.reader.read_until(b'\n', &mut self.buffer), &mut self.buffer, ) } } /// `IntoIterator` conversion for `ByteLines` to provide `Iterator` APIs. impl IntoIterator for ByteLines where B: BufRead, { type Item = Result, Error>; type IntoIter = ByteLinesIter; /// Constructs a `ByteLinesIter` to provide an `Iterator` API. #[inline] fn into_iter(self) -> ByteLinesIter { ByteLinesIter { inner: self } } } /// `Iterator` implementation of `ByteLines` to provide `Iterator` APIs. /// /// This structure enables developers the use of the `Iterator` API in /// their code, at the cost of an allocation per input line: /// /// ```rust /// use bytelines::*; /// use std::fs::File; /// use std::io::BufReader; /// /// // construct our iterator from our file input /// let file = File::open("./res/numbers.txt").unwrap(); /// let lines = BufReader::new(file); /// let lines = bytelines::from_std(lines); /// /// // walk our lines using `for` syntax /// for line in lines.into_iter() { /// // do something with the line, which is Result, _> /// } /// ``` pub struct ByteLinesIter where B: BufRead, { inner: ByteLines, } impl Iterator for ByteLinesIter where B: BufRead, { type Item = Result, Error>; /// Retrieves the next line in the iterator (if any). #[inline] fn next(&mut self) -> Option, Error>> { self.inner.next().map(|r| r.map(|s| s.to_vec())) } } /// Represents anything which can provide iterators of byte lines. pub trait ByteLinesReader where B: BufRead, { /// Returns a structure used to iterate the lines of this reader as `Result<&[u8], _>`. fn byte_lines(self) -> ByteLines; } /// Blanket implementation for all `BufRead`. impl ByteLinesReader for B where B: BufRead, { /// Returns a structure used to iterate the lines of this reader as Result<&[u8], _>. #[inline] fn byte_lines(self) -> ByteLines { super::from_std(self) } } #[cfg(test)] #[allow(clippy::needless_range_loop)] mod tests { use super::*; use std::fs::File; use std::io::BufReader; #[test] fn test_basic_loop() { let file = File::open("./res/numbers.txt").unwrap(); let mut brdr = BufReader::new(file).byte_lines(); let mut lines = Vec::new(); while let Some(line) = brdr.next() { let line = line.unwrap().to_vec(); let line = String::from_utf8(line).unwrap(); lines.push(line); } for i in 0..9 { assert_eq!(lines[i], format!("{}", i)); } } #[test] fn test_basic_iterator() { let file = File::open("./res/numbers.txt").unwrap(); let mut lines = Vec::new(); for line in BufReader::new(file).byte_lines().into_iter() { let line = line.unwrap(); let line = String::from_utf8(line).unwrap(); lines.push(line); } for i in 0..9 { assert_eq!(lines[i], format!("{}", i)); } } #[test] fn test_empty_line() { let file = File::open("./res/empty.txt").unwrap(); let mut lines = Vec::new(); for line in BufReader::new(file).byte_lines().into_iter() { let line = line.unwrap(); let line = String::from_utf8(line).unwrap(); lines.push(line); } assert_eq!(lines.len(), 1); assert_eq!(lines[0], ""); } } bytelines-2.5.0/src/tokio.rs000064400000000000000000000072131046102023000141130ustar 00000000000000//! Module exposing APIs based around `AsyncBufRead` from Tokio. use futures_util::stream::{self, Stream}; use tokio::io::{AsyncBufRead, AsyncBufReadExt}; use std::io::Error; /// Provides async iteration over bytes of input, split by line. /// /// ```rust ignore /// use bytelines::*; /// use std::fs::File; /// use std::io::BufReader; /// /// // construct our iterator from our file input /// let file = File::open("./res/numbers.txt").await?; /// let reader = BufReader::new(file); /// let mut lines = AsyncByteLines::new(reader); /// /// // walk our lines using `while` syntax /// while let Some(line) = lines.next().await? { /// // do something with the line, which is &[u8] /// } /// /// This differs from the `stdlib` version of the API as it fits /// more closely with the Tokio API for types. /// /// For those who prefer the `Stream` API, this structure can be /// converted using `into_stream`. This comes at the cost of an /// allocation of a `Vec` for each line in the `Stream`. This is /// negligible in many cases, so often it comes down to which /// syntax is preferred: /// /// ```rust ignore /// use bytelines::*; /// use std::fs::File; /// use std::io::BufReader; /// /// // construct our iterator from our file input /// let file = File::open("./res/numbers.txt").await?; /// let reader = BufReader::new(file); /// let mut lines = AsyncByteLines::new(reader); /// /// // walk our lines using `Stream` syntax /// lines.into_stream().for_each(|line| { /// /// }); /// ``` pub struct AsyncByteLines where B: AsyncBufRead + Unpin, { buffer: Vec, reader: B, } impl AsyncByteLines where B: AsyncBufRead + Unpin, { /// Constructs a new `ByteLines` from an input `AsyncBufRead`. pub fn new(buf: B) -> Self { Self { buffer: Vec::new(), reader: buf, } } /// Retrieves a reference to the next line of bytes in the reader (if any). pub async fn next(&mut self) -> Result, Error> { self.buffer.clear(); let handled = crate::util::handle_line( self.reader.read_until(b'\n', &mut self.buffer).await, &mut self.buffer, ); handled.transpose() } /// Converts this wrapper to provide a `Stream` API. pub fn into_stream(self) -> impl Stream, Error>> { stream::try_unfold(self, |mut lines| async { Ok(lines .next() .await? .map(|line| line.to_vec()) .map(|line| (line, lines))) }) } } #[cfg(test)] #[allow(clippy::needless_range_loop)] mod tests { use tokio::fs::File; use tokio::io::BufReader; #[tokio::test] async fn test_basic_loop() { let file = File::open("./res/numbers.txt").await.unwrap(); let brdr = BufReader::new(file); let mut brdr = crate::from_tokio(brdr); let mut lines = Vec::new(); while let Some(line) = brdr.next().await.unwrap() { let line = line.to_vec(); let line = String::from_utf8(line).unwrap(); lines.push(line); } for i in 0..9 { assert_eq!(lines[i], format!("{}", i)); } } #[tokio::test] async fn test_basic_stream() { use futures_util::StreamExt; let file = File::open("./res/numbers.txt").await.unwrap(); let brdr = BufReader::new(file); let lines = crate::from_tokio(brdr) .into_stream() .map(|line| String::from_utf8(line.unwrap()).unwrap()) .collect::>() .await; for i in 0..9 { assert_eq!(lines[i], format!("{}", i)); } } } bytelines-2.5.0/src/util.rs000064400000000000000000000014311046102023000137370ustar 00000000000000//! Module exposing utility handlers across read types. use std::io::Result; /// Handles a line of input and maps into the provided buffer and returns a reference. pub fn handle_line(input: Result, buffer: &mut Vec) -> Option> { match input { // short circuit on error Err(e) => Some(Err(e)), // no input, done Ok(0) => None, // bytes! Ok(mut n) => { // always "pop" the delim if buffer[n - 1] == b'\n' { n -= 1; // also "pop" a potential leading \r if n > 0 && buffer[n - 1] == b'\r' { n -= 1; } } // pass back the byte slice Some(Ok(&buffer[..n])) } } }