shell-quote-0.7.2/.cargo_vcs_info.json0000644000000001360000000000100133040ustar { "git": { "sha1": "b5365b3a8b17c23c09f168f0c15b0c3214facfdc" }, "path_in_vcs": "" }shell-quote-0.7.2/Cargo.toml0000644000000030200000000000100112750ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "shell-quote" version = "0.7.2" authors = ["Gavin Panella "] build = false include = [ "LICENSE", "README.md", "src/**/*.rs", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "A Rust library for shell-quoting strings, e.g. for interpolating into a Bash script." homepage = "https://github.com/allenap/shell-quote" readme = "README.md" keywords = [ "bash", "dash", "fish", "zsh", "escape", ] categories = [ "encoding", "filesystem", ] license = "Apache-2.0" repository = "https://github.com/allenap/shell-quote" [lib] name = "shell_quote" path = "src/lib.rs" [dependencies.bstr] version = "1" optional = true [dev-dependencies.criterion] version = "^0.5.1" features = ["html_reports"] [dev-dependencies.lenient_semver] version = "0.4.2" [dev-dependencies.semver] version = "1.0.23" [dev-dependencies.test-case] version = "3.3.1" [features] bash = [] default = [ "bstr", "bash", "sh", "fish", ] fish = [] sh = [] shell-quote-0.7.2/Cargo.toml.orig000064400000000000000000000017471046102023000147740ustar 00000000000000[package] authors = ["Gavin Panella "] categories = ["encoding", "filesystem"] description = "A Rust library for shell-quoting strings, e.g. for interpolating into a Bash script." edition = "2021" homepage = "https://github.com/allenap/shell-quote" keywords = ["bash", "dash", "fish", "zsh", "escape"] license = "Apache-2.0" name = "shell-quote" readme = "README.md" repository = "https://github.com/allenap/shell-quote" version = "0.7.2" include = ["LICENSE", "README.md", "src/**/*.rs"] [features] default = ["bstr", "bash", "sh", "fish"] bash = [] fish = [] sh = [] [dependencies] bstr = { version = "1", optional = true } [dev-dependencies] criterion = { version = "^0.5.1", features = ["html_reports"] } lenient_semver = "0.4.2" semver = "1.0.23" test-case = "3.3.1" [[bench]] name = "bash" harness = false required-features = ["bash"] [[bench]] name = "sh" harness = false required-features = ["sh"] [[bench]] name = "fish" harness = false required-features = ["fish"] shell-quote-0.7.2/LICENSE000064400000000000000000000261361046102023000131110ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. shell-quote-0.7.2/README.md000064400000000000000000000156171046102023000133650ustar 00000000000000 [`&str`]: https://doc.rust-lang.org/stable/std/primitive.str.html [`String`]: https://doc.rust-lang.org/stable/alloc/string/struct.String.html [`bstr::BStr`]: https://docs.rs/bstr/latest/bstr/struct.BStr.html [`bstr::BString`]: https://docs.rs/bstr/latest/bstr/struct.BString.html [`slice`]: https://doc.rust-lang.org/stable/std/primitive.slice.html [`Vec`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html [`OsStr`]: https://doc.rust-lang.org/stable/std/ffi/struct.OsStr.html [`OsString`]: https://doc.rust-lang.org/stable/std/ffi/struct.OsString.html [`Path`]: https://doc.rust-lang.org/stable/std/path/struct.Path.html [`PathBuf`]: https://doc.rust-lang.org/stable/std/path/struct.PathBuf.html [`Sh`]: https://docs.rs/shell-quote/latest/shell_quote/struct.Sh.html [`Dash`]: https://docs.rs/shell-quote/latest/shell_quote/struct.Dash.html [`Bash`]: https://docs.rs/shell-quote/latest/shell_quote/struct.Bash.html [`Fish`]: https://docs.rs/shell-quote/latest/shell_quote/struct.Fish.html [`Zsh`]: https://docs.rs/shell-quote/latest/shell_quote/struct.Zsh.html [`QuoteRefExt`]: https://docs.rs/shell-quote/latest/shell_quote/trait.QuoteRefExt.html [`QuoteRefExt::quoted`]: https://docs.rs/shell-quote/latest/shell_quote/trait.QuoteRefExt.html#tymethod.quoted [`QuoteExt`]: https://docs.rs/shell-quote/latest/shell_quote/trait.QuoteExt.html
# shell-quote
**shell-quote** escapes strings in a way that they can be inserted into shell scripts without the risk that they're interpreted as, say, multiple arguments (like with Bash's _word splitting_), paths (Bash's _pathname expansion_), shell metacharacters, function calls, or other syntax. This is frequently not as simple as wrapping a string in quotes. This package implements escaping for [GNU Bash][gnu-bash], [Z Shell][z-shell], [fish][], and `/bin/sh`-like shells including [Dash][dash]. [dash]: https://en.wikipedia.org/wiki/Almquist_shell#dash [gnu-bash]: https://www.gnu.org/software/bash/ [z-shell]: https://zsh.sourceforge.io/ [fish]: https://fishshell.com/ It can take as input many different string and byte string types: - [`&str`] and [`String`] - [`&bstr::BStr`][`bstr::BStr`] and [`bstr::BString`] - [`&[u8]`][`slice`] and [`Vec`] - [`&OsStr`][`OsStr`] and [`OsString`] (on UNIX) - [`&Path`][`Path`] and [`PathBuf`] and produce output as (or push into) the following types: - [`String`] (for shells that support it, i.e. not [`Sh`]/[`Dash`]) - [`bstr::BString`] - [`Vec`] - [`OsString`] (on UNIX) Inspired by the Haskell [shell-escape][] package. [shell-escape]: https://github.com/solidsnack/shell-escape ## Examples When quoting using raw bytes it can be convenient to call [`Sh`]'s, [`Dash`]'s, [`Bash`]'s, [`Fish`]'s, and [`Zsh`]'s associated functions directly: ```rust use shell_quote::{Bash, Dash, Fish, Sh, Zsh}; // No quoting is necessary for simple strings. assert_eq!(Sh::quote_vec("foobar"), b"foobar"); assert_eq!(Dash::quote_vec("foobar"), b"foobar"); // `Dash` is an alias for `Sh` assert_eq!(Bash::quote_vec("foobar"), b"foobar"); assert_eq!(Zsh::quote_vec("foobar"), b"foobar"); // `Zsh` is an alias for `Bash` assert_eq!(Fish::quote_vec("foobar"), b"foobar"); // In all shells, quoting is necessary for strings with spaces. assert_eq!(Sh::quote_vec("foo bar"), b"foo' bar'"); assert_eq!(Dash::quote_vec("foo bar"), b"foo' bar'"); assert_eq!(Bash::quote_vec("foo bar"), b"$'foo bar'"); assert_eq!(Zsh::quote_vec("foo bar"), b"$'foo bar'"); assert_eq!(Fish::quote_vec("foo bar"), b"foo' bar'"); ``` It's also possible to use the extension trait [`QuoteRefExt`] which provides a [`quoted`][`QuoteRefExt::quoted`] function: ```rust use shell_quote::{Bash, Sh, Fish, QuoteRefExt}; let quoted: String = "foo bar".quoted(Bash); assert_eq!(quoted, "$'foo bar'"); let quoted: Vec = "foo bar".quoted(Sh); assert_eq!(quoted, b"foo' bar'"); let quoted: String = "foo bar".quoted(Fish); assert_eq!(quoted, "foo' bar'"); ``` Or the extension trait [`QuoteExt`] for pushing quoted strings into a buffer: ```rust use shell_quote::{Bash, QuoteExt}; let mut script: bstr::BString = "echo ".into(); script.push_quoted(Bash, "foo bar"); script.extend(b" > "); script.push_quoted(Bash, "/path/(to)/[output]"); assert_eq!(script, "echo $'foo bar' > $'/path/(to)/[output]'"); ``` ## Notes on string encoding
Here we will use [`Bash`] for the example, but other shells may have similar _or different_ behaviours; check their documentation.
When we use [`&str`] or [`String`] as an input type, UTF-8 code points of U+0080 and above are written into the quoted form just as they are encoded in UTF-8, i.e. the bytes are the same and there are no escape sequences. Compare this to using a different input type: ```rust # use shell_quote::{Bash, QuoteRefExt}; let data: &str = "café"; let data_utf8_quoted_from_string_type: Vec = data.quoted(Bash); assert_eq!(&data_utf8_quoted_from_string_type, b"$'caf\xC3\xA9'"); // UTF-8, verbatim. let data_utf8_quoted_from_bytes: Vec = data.as_bytes().quoted(Bash); assert_eq!(&data_utf8_quoted_from_bytes, b"$'caf\\xC3\\xA9'"); // Now hex escaped! ``` It follows then, supposing you need to use a text encoding that is not UTF-8, that string types must be encoded _before_ passing to the functions from this crate. For example, the character 'é' (U+00E9): - In ISO-8859-1, it is represented by the single byte `0xE9`. - In UTF-8, it is represented by the two bytes `0xC3 0xA9`. Using a hypothetical `encode_iso_8859_1` function: ```rust # use shell_quote::{Bash, QuoteRefExt}; # fn encode_iso_8859_1(_s: &str) -> &[u8] { # &[99, 97, 102, 233] # } let data = "café"; let data_utf8_quoted: Vec = data.quoted(Bash); assert_eq!(&data_utf8_quoted, b"$'caf\xC3\xA9'"); // UTF-8: 2 bytes for é. let data_iso_8859_1: &[u8] = encode_iso_8859_1(data); let data_iso_8859_1_quoted: Vec = data_iso_8859_1.quoted(Bash); assert_eq!(&data_iso_8859_1_quoted, b"$'caf\\xE9'"); // ISO-8859-1: 1 byte, hex escaped. ``` ## Compatibility [`Sh`] can serve as a lowest common denominator for Bash, Z Shell, and `/bin/sh`-like shells like Dash. However, fish's quoting rules are different enough that you must use [`Fish`] for fish scripts. Note that using [`Sh`] as a lowest common denominator brings with it other issues; read its documentation carefully to understand the limitations. ## Feature flags The following are all enabled by default: - `bstr`: Support [`bstr::BStr`] and [`bstr::BString`]. - `bash`: Support [Bash][gnu-bash] and [Z Shell][z-shell]. - `fish`: Support [fish][]. - `sh`: Support `/bin/sh`-like shells including [Dash][dash]. To limit support to specific shells, you must disable this crate's default features in `Cargo.toml` and re-enable those you want. For example: ```toml [dependencies] shell-quote = { version = "*", default-features = false, features = ["bash"] } ``` shell-quote-0.7.2/src/ascii.rs000064400000000000000000000065161046102023000143310ustar 00000000000000#![cfg(any(feature = "bash", feature = "fish", feature = "sh"))] //! Scanner for ASCII control codes, shell metacharacters, printable characters, //! and extended codes, i.e. classify each byte in a stream according to where //! it appears in extended ASCII. use std::borrow::Borrow; #[derive(PartialEq)] pub(crate) enum Char { Bell, Backspace, Escape, FormFeed, NewLine, CarriageReturn, HorizontalTab, VerticalTab, Control(u8), Backslash, SingleQuote, DoubleQuote, Delete, PrintableInert(u8), Printable(u8), Extended(u8), } impl Char { pub fn from>(ch: T) -> Self { let ch = *ch.borrow(); use Char::*; match ch { // ASCII control characters that frequently have dedicated backslash // sequences when quoted. BEL => Bell, BS => Backspace, ESC => Escape, FF => FormFeed, LF => NewLine, CR => CarriageReturn, TAB => HorizontalTab, VT => VerticalTab, // ASCII control characters, the rest. 0x00..=0x06 | 0x0E..=0x1A | 0x1C..=0x1F => Control(ch), // ASCII printable characters that can have dedicated backslash // sequences when quoted or otherwise need some special treatment. b'\\' => Backslash, b'\'' => SingleQuote, b'\"' => DoubleQuote, DEL => Delete, // ASCII printable letters, numbers, and "safe" punctuation. b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => PrintableInert(ch), b',' | b'.' | b'/' | b'_' | b'-' => PrintableInert(ch), // ASCII punctuation which can have significance in the shell. b'|' | b'&' | b';' | b'(' | b')' | b'<' | b'>' => Printable(ch), b' ' | b'?' | b'[' | b']' | b'{' | b'}' | b'`' => Printable(ch), b'~' | b'!' | b'$' | b'@' | b'+' | b'=' | b'*' => Printable(ch), b'%' | b'#' | b':' | b'^' => Printable(ch), // ASCII extended characters, or high bytes. 0x80..=0xff => Extended(ch), } } #[inline] pub fn is_inert(&self) -> bool { matches!(self, Char::PrintableInert(_)) } #[inline] #[cfg(feature = "sh")] pub fn code(&self) -> u8 { use Char::*; match *self { Bell => BEL, Backspace => BS, Escape => ESC, FormFeed => FF, NewLine => LF, CarriageReturn => CR, HorizontalTab => TAB, VerticalTab => VT, Control(ch) => ch, Backslash => b'\\', SingleQuote => b'\'', DoubleQuote => b'"', Delete => DEL, PrintableInert(ch) => ch, Printable(ch) => ch, Extended(ch) => ch, } } } const BEL: u8 = 0x07; // -> \a const BS: u8 = 0x08; // -> \b const TAB: u8 = 0x09; // -> \t const LF: u8 = 0x0A; // -> \n const VT: u8 = 0x0B; // -> \v const FF: u8 = 0x0C; // -> \f const CR: u8 = 0x0D; // -> \r const ESC: u8 = 0x1B; // -> \e const DEL: u8 = 0x7F; #[cfg(test)] mod tests { #[test] #[cfg(feature = "sh")] fn test_code() { for ch in u8::MIN..=u8::MAX { let char = super::Char::from(ch); assert_eq!(ch, char.code()); } } } shell-quote-0.7.2/src/bash.rs000064400000000000000000000263711046102023000141570ustar 00000000000000#![cfg(feature = "bash")] use crate::{Quotable, QuoteInto}; /// Quote byte strings for use with Bash, the GNU Bourne-Again Shell. /// /// # Compatibility /// /// Quoted/escaped strings produced by [`Bash`] work in both Bash and Z Shell. /// /// # ⚠️ Warning /// /// It is _possible_ to encode NUL in a Bash string, but Bash appears to then /// truncate the rest of the string after that point **or** sometimes it filters /// the NUL out. It's not yet clear to me when/why each behaviour is chosen. /// /// If you're quoting UTF-8 content this may not be a problem since there is /// only one code point – the null character itself – that will ever produce a /// NUL byte. To avoid this problem entirely, consider using [Modified /// UTF-8][modified-utf-8] so that the NUL byte can never appear in a valid byte /// stream. /// /// [modified-utf-8]: https://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8 /// /// # Notes /// /// From bash(1): /// /// Words of the form $'string' are treated specially. The word expands to /// string, with backslash-escaped characters replaced as specified by the /// ANSI C standard. Backslash escape sequences, if present, are decoded as /// follows: /// /// ```text /// \a alert (bell) /// \b backspace /// \e an escape character /// \f form feed /// \n new line /// \r carriage return /// \t horizontal tab /// \v vertical tab /// \\ backslash /// \' single quote /// \nnn the eight-bit character whose value is the /// octal value nnn (one to three digits) /// \xHH the eight-bit character whose value is the /// hexadecimal value HH (one or two hex digits) /// \cx a control-x character /// ``` /// /// Bash allows, in newer versions, for non-ASCII Unicode characters with /// `\uHHHH` and `\UXXXXXXXX` syntax inside these [ANSI C quoted /// strings][ansi-c-quoting], but we avoid this and work only with bytes. Part /// of the problem is that it's not clear how Bash then works with these /// strings. Does it encode these characters into bytes according to the user's /// current locale? Are strings in Bash now natively Unicode? /// /// For now it's up to the caller to figure out encoding. A significant use case /// for this code is to quote filenames into scripts, and on *nix variants I /// understand that filenames are essentially arrays of bytes, even if the OS /// adds some normalisation and case-insensitivity on top. /// /// [ansi-c-quoting]: /// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html /// #[derive(Debug, Clone, Copy)] pub struct Bash; // ---------------------------------------------------------------------------- impl QuoteInto> for Bash { fn quote_into<'q, S: Into>>(s: S, out: &mut Vec) { Self::quote_into_vec(s, out); } } impl QuoteInto for Bash { fn quote_into<'q, S: Into>>(s: S, out: &mut String) { Self::quote_into_vec(s, unsafe { out.as_mut_vec() }) } } #[cfg(unix)] impl QuoteInto for Bash { fn quote_into<'q, S: Into>>(s: S, out: &mut std::ffi::OsString) { use std::os::unix::ffi::OsStringExt; let s = Self::quote_vec(s); let s = std::ffi::OsString::from_vec(s); out.push(s); } } #[cfg(feature = "bstr")] impl QuoteInto for Bash { fn quote_into<'q, S: Into>>(s: S, out: &mut bstr::BString) { let s = Self::quote_vec(s); out.extend(s); } } // ---------------------------------------------------------------------------- impl Bash { /// Quote a string of bytes into a new `Vec`. /// /// This will return one of the following: /// - The string as-is, if no escaping is necessary. /// - An [ANSI-C escaped string][ansi-c-quoting], like `$'foo\nbar'`. /// /// See [`quote_into_vec`][`Self::quote_into_vec`] for a variant that /// extends an existing `Vec` instead of allocating a new one. /// /// # Examples /// /// ``` /// # use shell_quote::Bash; /// assert_eq!(Bash::quote_vec("foobar"), b"foobar"); /// assert_eq!(Bash::quote_vec("foo bar"), b"$'foo bar'"); /// ``` /// /// [ansi-c-quoting]: /// https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html /// pub fn quote_vec<'a, S: Into>>(s: S) -> Vec { // Here, previously, in the `Escape` cases, an optimisation // precalculated the required capacity of the output `Vec` to avoid // reallocations later on, but benchmarks showed that it was slower. It // _may_ have lowered maximum RAM required, but that was not measured. match s.into() { Quotable::Bytes(bytes) => match bytes::escape_prepare(bytes) { bytes::Prepared::Empty => vec![b'\'', b'\''], bytes::Prepared::Inert => bytes.into(), bytes::Prepared::Escape(esc) => { let mut sout = Vec::new(); bytes::escape_chars(esc, &mut sout); sout } }, Quotable::Text(text) => match text::escape_prepare(text) { text::Prepared::Empty => vec![b'\'', b'\''], text::Prepared::Inert => text.into(), text::Prepared::Escape(esc) => { let mut sout = Vec::new(); text::escape_chars(esc, &mut sout); sout } }, } } /// Quote a string of bytes into an existing `Vec`. /// /// See [`quote_vec`][`Self::quote_vec`] for more details. /// /// # Examples /// /// ``` /// # use shell_quote::Bash; /// let mut buf = Vec::with_capacity(128); /// Bash::quote_into_vec("foobar", &mut buf); /// buf.push(b' '); // Add a space. /// Bash::quote_into_vec("foo bar", &mut buf); /// assert_eq!(buf, b"foobar $'foo bar'"); /// ``` /// pub fn quote_into_vec<'a, S: Into>>(s: S, sout: &mut Vec) { // Here, previously, in the `Escape` cases, an optimisation // precalculated the required capacity of the output `Vec` to avoid // reallocations later on, but benchmarks showed that it was slower. It // _may_ have lowered maximum RAM required, but that was not measured. match s.into() { Quotable::Bytes(bytes) => match bytes::escape_prepare(bytes) { bytes::Prepared::Empty => sout.extend(b"''"), bytes::Prepared::Inert => sout.extend(bytes), bytes::Prepared::Escape(esc) => bytes::escape_chars(esc, sout), }, Quotable::Text(text) => match text::escape_prepare(text) { text::Prepared::Empty => sout.extend(b"''"), text::Prepared::Inert => sout.extend(text.as_bytes()), text::Prepared::Escape(esc) => text::escape_chars(esc, sout), }, } } } // ---------------------------------------------------------------------------- mod bytes { use super::u8_to_hex_escape; use crate::ascii::Char; pub enum Prepared { Empty, Inert, Escape(Vec), } pub fn escape_prepare(sin: &[u8]) -> Prepared { let esc: Vec<_> = sin.iter().map(Char::from).collect(); // An optimisation: if the string is not empty and contains only "safe" // characters we can avoid further work. if esc.is_empty() { Prepared::Empty } else if esc.iter().all(Char::is_inert) { Prepared::Inert } else { Prepared::Escape(esc) } } pub fn escape_chars(esc: Vec, sout: &mut Vec) { // Push a Bash-style $'...' quoted string into `sout`. sout.extend(b"$'"); for mode in esc { use Char::*; match mode { Bell => sout.extend(b"\\a"), Backspace => sout.extend(b"\\b"), Escape => sout.extend(b"\\e"), FormFeed => sout.extend(b"\\f"), NewLine => sout.extend(b"\\n"), CarriageReturn => sout.extend(b"\\r"), HorizontalTab => sout.extend(b"\\t"), VerticalTab => sout.extend(b"\\v"), Control(ch) => sout.extend(&u8_to_hex_escape(ch)), Backslash => sout.extend(b"\\\\"), SingleQuote => sout.extend(b"\\'"), DoubleQuote => sout.extend(b"\""), Delete => sout.extend(b"\\x7F"), PrintableInert(ch) => sout.push(ch), Printable(ch) => sout.push(ch), Extended(ch) => sout.extend(&u8_to_hex_escape(ch)), } } sout.push(b'\''); } } // ---------------------------------------------------------------------------- mod text { use super::u8_to_hex_escape; use crate::utf8::Char; pub enum Prepared { Empty, Inert, Escape(Vec), } pub fn escape_prepare(sin: &str) -> Prepared { let esc: Vec<_> = sin.chars().map(Char::from).collect(); // An optimisation: if the string is not empty and contains only "safe" // characters we can avoid further work. if esc.is_empty() { Prepared::Empty } else if esc.iter().all(Char::is_inert) { Prepared::Inert } else { Prepared::Escape(esc) } } pub fn escape_chars(esc: Vec, sout: &mut Vec) { // Push a Bash-style $'...' quoted string into `sout`. sout.extend(b"$'"); let buf = &mut [0u8; 4]; for mode in esc { use Char::*; match mode { Bell => sout.extend(b"\\a"), Backspace => sout.extend(b"\\b"), Escape => sout.extend(b"\\e"), FormFeed => sout.extend(b"\\f"), NewLine => sout.extend(b"\\n"), CarriageReturn => sout.extend(b"\\r"), HorizontalTab => sout.extend(b"\\t"), VerticalTab => sout.extend(b"\\v"), Control(ch) => sout.extend(&u8_to_hex_escape(ch)), Backslash => sout.extend(b"\\\\"), SingleQuote => sout.extend(b"\\'"), DoubleQuote => sout.extend(b"\""), Delete => sout.extend(b"\\x7F"), PrintableInert(ch) => sout.push(ch), Printable(ch) => sout.push(ch), Utf8(ch) => sout.extend(ch.encode_utf8(buf).as_bytes()), } } sout.push(b'\''); } } // ---------------------------------------------------------------------------- /// Escape a byte as a 4-byte hex escape sequence. /// /// The `\\xHH` format (backslash, a literal "x", two hex characters) is /// understood by many shells. #[inline] fn u8_to_hex_escape(ch: u8) -> [u8; 4] { const HEX_DIGITS: &[u8] = b"0123456789ABCDEF"; [ b'\\', b'x', HEX_DIGITS[(ch >> 4) as usize], HEX_DIGITS[(ch & 0xF) as usize], ] } #[cfg(test)] #[test] fn test_u8_to_hex_escape() { for ch in u8::MIN..=u8::MAX { let expected = format!("\\x{ch:02X}"); let observed = u8_to_hex_escape(ch); let observed = std::str::from_utf8(&observed).unwrap(); assert_eq!(observed, &expected); } } shell-quote-0.7.2/src/fish.rs000064400000000000000000000266761046102023000142030ustar 00000000000000#![cfg(feature = "fish")] use crate::{Quotable, QuoteInto}; /// Quote byte strings for use with fish. /// /// # ⚠️ Warning /// /// Prior to version 3.6.2, fish did not correctly handle some Unicode code /// points encoded as UTF-8. From the [version 3.6.2 release notes][]: /// /// > fish uses certain Unicode non-characters internally for marking wildcards /// > and expansions. It incorrectly allowed these markers to be read on command /// > substitution output, rather than transforming them into a safe internal /// > representation. /// /// [version 3.6.2 release notes]: /// https://github.com/fish-shell/fish-shell/releases/tag/3.6.2 /// /// At present this crate has **no workaround** for this issue. Please use fish /// 3.6.2 or later. /// /// # Notes /// /// The documentation on [quoting][] and [escaping characters][] in fish is /// confusing at first, especially when coming from a Bourne-like shell, but /// essentially we have to be able to move and and out of a quoted string /// context. For example, the escape sequence `\t` for a tab _must_ be outside /// of quotes, single or double, to be recognised as a tab character by fish: /// /// ```fish /// echo 'foo'\t'bar' /// ``` /// /// This emphasises the importance of using the correct quoting module for the /// target shell. /// /// [quoting]: https://fishshell.com/docs/current/language.html#quotes /// [escaping characters]: /// https://fishshell.com/docs/current/language.html#escaping-characters #[derive(Debug, Clone, Copy)] pub struct Fish; impl QuoteInto> for Fish { fn quote_into<'q, S: Into>>(s: S, out: &mut Vec) { Self::quote_into_vec(s, out); } } impl QuoteInto for Fish { fn quote_into<'q, S: Into>>(s: S, out: &mut String) { Self::quote_into_vec(s, unsafe { out.as_mut_vec() }) } } #[cfg(unix)] impl QuoteInto for Fish { fn quote_into<'q, S: Into>>(s: S, out: &mut std::ffi::OsString) { use std::os::unix::ffi::OsStringExt; let s = Self::quote_vec(s); let s = std::ffi::OsString::from_vec(s); out.push(s); } } #[cfg(feature = "bstr")] impl QuoteInto for Fish { fn quote_into<'q, S: Into>>(s: S, out: &mut bstr::BString) { let s = Self::quote_vec(s); out.extend(s); } } impl Fish { /// Quote a string of bytes into a new `Vec`. /// /// This will return one of the following: /// - The string as-is, if no escaping is necessary. /// - An escaped string, like `'foo \'bar'`, `\a'ABC'` /// /// See [`quote_into_vec`][`Self::quote_into_vec`] for a variant that /// extends an existing `Vec` instead of allocating a new one. /// /// # Examples /// /// ``` /// # use shell_quote::Fish; /// assert_eq!(Fish::quote_vec("foobar"), b"foobar"); /// assert_eq!(Fish::quote_vec("foo 'bar"), b"foo' \\'bar'"); /// ``` pub fn quote_vec<'a, S: Into>>(s: S) -> Vec { match s.into() { Quotable::Bytes(bytes) => match bytes::escape_prepare(bytes) { bytes::Prepared::Empty => vec![b'\'', b'\''], bytes::Prepared::Inert => bytes.into(), bytes::Prepared::Escape(esc) => { let mut sout = Vec::new(); bytes::escape_chars(esc, &mut sout); sout } }, Quotable::Text(text) => match text::escape_prepare(text) { text::Prepared::Empty => vec![b'\'', b'\''], text::Prepared::Inert => text.into(), text::Prepared::Escape(esc) => { let mut sout = Vec::new(); text::escape_chars(esc, &mut sout); sout } }, } } /// Quote a string of bytes into an existing `Vec`. /// /// See [`quote_vec`][`Self::quote_vec`] for more details. /// /// # Examples /// /// ``` /// # use shell_quote::Fish; /// let mut buf = Vec::with_capacity(128); /// Fish::quote_into_vec("foobar", &mut buf); /// buf.push(b' '); // Add a space. /// Fish::quote_into_vec("foo 'bar", &mut buf); /// assert_eq!(buf, b"foobar foo' \\'bar'"); /// ``` /// pub fn quote_into_vec<'a, S: Into>>(s: S, sout: &mut Vec) { match s.into() { Quotable::Bytes(bytes) => match bytes::escape_prepare(bytes) { bytes::Prepared::Empty => sout.extend(b"''"), bytes::Prepared::Inert => sout.extend(bytes), bytes::Prepared::Escape(esc) => bytes::escape_chars(esc, sout), }, Quotable::Text(text) => match text::escape_prepare(text) { text::Prepared::Empty => sout.extend(b"''"), text::Prepared::Inert => sout.extend(text.as_bytes()), text::Prepared::Escape(esc) => text::escape_chars(esc, sout), }, } } } // ---------------------------------------------------------------------------- mod bytes { use super::u8_to_hex_escape_uppercase_x; use crate::ascii::Char; pub enum Prepared { Empty, Inert, Escape(Vec), } pub fn escape_prepare(sin: &[u8]) -> Prepared { let esc: Vec<_> = sin.iter().map(Char::from).collect(); // An optimisation: if the string is not empty and contains only "safe" // characters we can avoid further work. if esc.is_empty() { Prepared::Empty } else if esc.iter().all(Char::is_inert) { Prepared::Inert } else { Prepared::Escape(esc) } } pub fn escape_chars(esc: Vec, sout: &mut Vec) { #[derive(PartialEq)] enum QuoteStyle { Inside, Outside, Whatever, } use QuoteStyle::*; let mut inside_quotes_now = false; let mut push_literal = |style: QuoteStyle, literal: &[u8]| { match (inside_quotes_now, style) { (true, Outside) => { sout.push(b'\''); inside_quotes_now = false; } (false, Inside) => { sout.push(b'\''); inside_quotes_now = true; } _ => (), } sout.extend(literal); }; for mode in esc { use Char::*; match mode { Bell => push_literal(Outside, b"\\a"), Backspace => push_literal(Outside, b"\\b"), Escape => push_literal(Outside, b"\\e"), FormFeed => push_literal(Outside, b"\\f"), NewLine => push_literal(Outside, b"\\n"), CarriageReturn => push_literal(Outside, b"\\r"), HorizontalTab => push_literal(Outside, b"\\t"), VerticalTab => push_literal(Outside, b"\\v"), Control(ch) => push_literal(Outside, &u8_to_hex_escape_uppercase_x(ch)), Backslash => push_literal(Whatever, b"\\\\"), SingleQuote => push_literal(Whatever, b"\\'"), DoubleQuote => push_literal(Inside, b"\""), Delete => push_literal(Outside, b"\\X7F"), PrintableInert(ch) => push_literal(Whatever, &ch.to_le_bytes()), Printable(ch) => push_literal(Inside, &ch.to_le_bytes()), Extended(ch) => push_literal(Outside, &u8_to_hex_escape_uppercase_x(ch)), } } if inside_quotes_now { sout.push(b'\''); } } } // ---------------------------------------------------------------------------- mod text { use super::u8_to_hex_escape_uppercase_x; use crate::utf8::Char; pub enum Prepared { Empty, Inert, Escape(Vec), } pub fn escape_prepare(sin: &str) -> Prepared { let esc: Vec<_> = sin.chars().map(Char::from).collect(); // An optimisation: if the string is not empty and contains only "safe" // characters we can avoid further work. if esc.is_empty() { Prepared::Empty } else if esc.iter().all(Char::is_inert) { Prepared::Inert } else { Prepared::Escape(esc) } } pub fn escape_chars(esc: Vec, sout: &mut Vec) { #[derive(PartialEq)] enum QuoteStyle { Inside, Outside, Whatever, } use QuoteStyle::*; let mut inside_quotes_now = false; let mut push_literal = |style: QuoteStyle, literal: &[u8]| { match (inside_quotes_now, style) { (true, Outside) => { sout.push(b'\''); inside_quotes_now = false; } (false, Inside) => { sout.push(b'\''); inside_quotes_now = true; } _ => (), } sout.extend(literal); }; let buf = &mut [0u8; 4]; for mode in esc { use Char::*; match mode { Bell => push_literal(Outside, b"\\a"), Backspace => push_literal(Outside, b"\\b"), Escape => push_literal(Outside, b"\\e"), FormFeed => push_literal(Outside, b"\\f"), NewLine => push_literal(Outside, b"\\n"), CarriageReturn => push_literal(Outside, b"\\r"), HorizontalTab => push_literal(Outside, b"\\t"), VerticalTab => push_literal(Outside, b"\\v"), Control(ch) => push_literal(Outside, &u8_to_hex_escape_uppercase_x(ch)), Backslash => push_literal(Whatever, b"\\\\"), SingleQuote => push_literal(Whatever, b"\\'"), DoubleQuote => push_literal(Inside, b"\""), Delete => push_literal(Outside, b"\\X7F"), PrintableInert(ch) => push_literal(Whatever, &ch.to_le_bytes()), Printable(ch) => push_literal(Inside, &ch.to_le_bytes()), Utf8(char) => push_literal(Inside, char.encode_utf8(buf).as_bytes()), } } if inside_quotes_now { sout.push(b'\''); } } } // ---------------------------------------------------------------------------- /// Escape a byte as a 4-byte hex escape sequence _with uppercase "X"_. /// /// The `\\XHH` format (backslash, a literal "X", two hex characters) is /// understood by fish. The `\\xHH` format is _also_ understood, but until fish /// 3.6.0 it had a weirdness. From the [release notes][]: /// /// > The `\\x` and `\\X` escape syntax is now equivalent. `\\xAB` previously /// > behaved the same as `\\XAB`, except that it would error if the value “AB” /// > was larger than “7f” (127 in decimal, the highest ASCII value). /// /// [release notes]: https://github.com/fish-shell/fish-shell/releases/tag/3.6.0 /// #[inline] fn u8_to_hex_escape_uppercase_x(ch: u8) -> [u8; 4] { const HEX_DIGITS: &[u8] = b"0123456789ABCDEF"; [ b'\\', b'X', HEX_DIGITS[(ch >> 4) as usize], HEX_DIGITS[(ch & 0xF) as usize], ] } #[cfg(test)] #[test] fn test_u8_to_hex_escape_uppercase_x() { for ch in u8::MIN..=u8::MAX { let expected = format!("\\X{ch:02X}"); let observed = u8_to_hex_escape_uppercase_x(ch); let observed = std::str::from_utf8(&observed).unwrap(); assert_eq!(observed, &expected); } } shell-quote-0.7.2/src/lib.rs000064400000000000000000000136621046102023000140070ustar 00000000000000//! //! //! [`&str`]: `&str` //! [`String`]: `String` //! [`bstr::BStr`]: `bstr::BStr` //! [`bstr::BString`]: `bstr::BString` //! [`slice`]: `slice` //! [`Vec`]: `std::vec::Vec` //! [`OsStr`]: `std::ffi::OsStr` //! [`OsString`]: `std::ffi::OsString` //! [`Path`]: `std::path::Path` //! [`PathBuf`]: `std::path::PathBuf` //! //! [`Sh`]: `Sh` //! [`Dash`]: `Dash` //! [`Bash`]: `Bash` //! [`Fish`]: `Fish` //! [`Zsh`]: `Zsh` //! //! [`QuoteRefExt`]: `QuoteRefExt` //! [`QuoteRefExt::quoted`]: `QuoteRefExt::quoted` //! [`QuoteExt`]: `QuoteExt` //! //! //! #![cfg_attr( all( feature = "bstr", feature = "bash", feature = "fish", feature = "sh", ), doc = include_str!("../README.md") )] use std::ffi::{OsStr, OsString}; use std::path::{Path, PathBuf}; mod ascii; mod bash; mod fish; mod sh; mod utf8; #[cfg(feature = "bash")] pub use bash::Bash; #[cfg(feature = "fish")] pub use fish::Fish; #[cfg(feature = "sh")] pub use sh::Sh; /// Dash accepts the same quoted/escaped strings as `/bin/sh` – indeed, on many /// systems, `dash` _is_ `/bin/sh` – hence this is an alias for [`Sh`]. #[cfg(feature = "sh")] pub type Dash = sh::Sh; /// Zsh accepts the same quoted/escaped strings as Bash, hence this is an alias /// for [`Bash`]. #[cfg(feature = "bash")] pub type Zsh = bash::Bash; // ---------------------------------------------------------------------------- /// Quoting/escaping a string of bytes into a shell-safe form. pub trait QuoteInto { /// Quote/escape a string of bytes into an existing container. fn quote_into<'q, S: Into>>(s: S, out: &mut OUT); } /// Quoting/escaping a string of bytes into a shell-safe form. pub trait Quote: QuoteInto { /// Quote/escape a string of bytes into a new container. fn quote<'q, S: Into>>(s: S) -> OUT { let mut out = OUT::default(); Self::quote_into(s, &mut out); out } } /// Blanket [`Quote`] impl for anything that has a [`QuoteInto`] impl. impl, OUT: Default> Quote for T {} // ---------------------------------------------------------------------------- /// Extension trait for pushing shell quoted byte slices, e.g. `&[u8]`, [`&str`] /// – anything that's [`Quotable`] – into container types like [`Vec`], /// [`String`], [`OsString`] on Unix, and [`bstr::BString`] if it's enabled. pub trait QuoteExt { fn push_quoted<'q, Q, S>(&mut self, _q: Q, s: S) where Q: QuoteInto, S: Into>; } impl QuoteExt for T { fn push_quoted<'q, Q, S>(&mut self, _q: Q, s: S) where Q: QuoteInto, S: Into>, { Q::quote_into(s, self); } } // ---------------------------------------------------------------------------- /// Extension trait for shell quoting many different owned and reference types, /// e.g. `&[u8]`, [`&str`] – anything that's [`Quotable`] – into owned container /// types like [`Vec`], [`String`], [`OsString`] on Unix, and /// [`bstr::BString`] if it's enabled. pub trait QuoteRefExt { fn quoted>(self, q: Q) -> Output; } impl<'a, S, OUT: Default> QuoteRefExt for S where S: Into>, { fn quoted>(self, _q: Q) -> OUT { Q::quote(self) } } // ---------------------------------------------------------------------------- /// A string of bytes that can be quoted/escaped. /// /// This is used by many methods in this crate as a generic /// [`Into`][`Into`] constraint. Why not accept /// [`AsRef<[u8]>`][`AsRef`] instead? The ergonomics of that approach were not /// so good. For example, quoting [`OsString`]/[`OsStr`] and /// [`PathBuf`]/[`Path`] didn't work in a natural way. pub enum Quotable<'a> { #[cfg_attr( not(any(feature = "bash", feature = "fish", feature = "sh")), allow(unused) )] Bytes(&'a [u8]), #[cfg_attr( not(any(feature = "bash", feature = "fish", feature = "sh")), allow(unused) )] Text(&'a str), } impl<'a> From<&'a [u8]> for Quotable<'a> { fn from(source: &'a [u8]) -> Quotable<'a> { Quotable::Bytes(source) } } impl<'a, const N: usize> From<&'a [u8; N]> for Quotable<'a> { fn from(source: &'a [u8; N]) -> Quotable<'a> { Quotable::Bytes(&source[..]) } } impl<'a> From<&'a Vec> for Quotable<'a> { fn from(source: &'a Vec) -> Quotable<'a> { Quotable::Bytes(source) } } impl<'a> From<&'a str> for Quotable<'a> { fn from(source: &'a str) -> Quotable<'a> { Quotable::Text(source) } } impl<'a> From<&'a String> for Quotable<'a> { fn from(source: &'a String) -> Quotable<'a> { Quotable::Text(source) } } #[cfg(unix)] impl<'a> From<&'a OsStr> for Quotable<'a> { fn from(source: &'a OsStr) -> Quotable<'a> { use std::os::unix::ffi::OsStrExt; source.as_bytes().into() } } #[cfg(unix)] impl<'a> From<&'a OsString> for Quotable<'a> { fn from(source: &'a OsString) -> Quotable<'a> { use std::os::unix::ffi::OsStrExt; source.as_bytes().into() } } #[cfg(feature = "bstr")] impl<'a> From<&'a bstr::BStr> for Quotable<'a> { fn from(source: &'a bstr::BStr) -> Quotable<'a> { let bytes: &[u8] = source.as_ref(); bytes.into() } } #[cfg(feature = "bstr")] impl<'a> From<&'a bstr::BString> for Quotable<'a> { fn from(source: &'a bstr::BString) -> Quotable<'a> { let bytes: &[u8] = source.as_ref(); bytes.into() } } #[cfg(unix)] impl<'a> From<&'a Path> for Quotable<'a> { fn from(source: &'a Path) -> Quotable<'a> { source.as_os_str().into() } } #[cfg(unix)] impl<'a> From<&'a PathBuf> for Quotable<'a> { fn from(source: &'a PathBuf) -> Quotable<'a> { source.as_os_str().into() } } shell-quote-0.7.2/src/sh.rs000064400000000000000000000216521046102023000136510ustar 00000000000000#![cfg(feature = "sh")] use crate::{ascii::Char, Quotable, QuoteInto}; /// Quote byte strings for use with `/bin/sh`. /// /// # ⚠️ Warning /// /// There is no escape sequence for bytes between 0x80 and 0xFF – these must be /// reproduced exactly in the quoted output – hence **it is not possible to /// safely create or quote into an existing [`String`]** with [`Sh`] because /// these bytes would be misinterpreted as a second or subsequent byte of a /// [multi-byte UTF-8 code point representation][utf-8-encoding]. /// /// [utf-8-encoding]: https://en.wikipedia.org/wiki/UTF-8#Encoding /// /// If you're not using bytes between 0x80 and 0xFF, a workaround is to instead /// quote into a [`Vec`] and convert that into a string using /// [`String::from_utf8`]. The key difference is that `from_utf8` returns a /// [`Result`] which the caller must deal with. /// /// # Compatibility /// /// Quoted/escaped strings produced by [`Sh`] also work in Bash, Dash, and Z /// Shell. /// /// The quoted/escaped strings it produces are different to those coming from /// [`Bash`][`crate::Bash`] or its alias [`Zsh`][`crate::Zsh`]. Those strings /// won't work in a pure `/bin/sh` shell like Dash, but they are better for /// humans to read, to copy and paste. For example, [`Sh`] does not (and cannot) /// escape control characters, but characters like `BEL` and `TAB` (and others) /// are represented by `\\a` and `\\t` respectively by [`Bash`][`crate::Bash`]. /// /// # Notes /// /// I wasn't able to find any definitive statement of exactly how Bourne Shell /// strings should be quoted, mainly because "Bourne Shell" or `/bin/sh` can /// refer to many different pieces of software: Bash has a Bourne Shell mode, /// `/bin/sh` on Ubuntu is actually Dash, and on macOS 12.3 (and later, and /// possibly earlier) all bets are off: /// /// > `sh` is a POSIX-compliant command interpreter (shell). It is implemented /// > by re-execing as either `bash(1)`, `dash(1)`, or `zsh(1)` as determined by /// > the symbolic link located at `/private/var/select/sh`. If /// > `/private/var/select/sh` does not exist or does not point to a valid /// > shell, `sh` will use one of the supported shells. /// /// However, [dash](https://en.wikipedia.org/wiki/Almquist_shell#dash) appears /// to be the de facto `/bin/sh` these days, having been formally adopted in /// Ubuntu and Debian, and also available as `/bin/dash` on macOS. /// /// From dash(1): /// /// > ## Quoting /// > /// > Quoting is used to remove the special meaning of certain characters or /// > words to the shell, such as operators, whitespace, or keywords. There /// > are three types of quoting: matched single quotes, matched double /// > quotes, and backslash. /// > /// > ## Backslash /// > /// > A backslash preserves the literal meaning of the following character, /// > with the exception of ⟨newline⟩. A backslash preceding a ⟨newline⟩ is /// > treated as a line continuation. /// > /// > ## Single Quotes /// > /// > Enclosing characters in single quotes preserves the literal meaning of /// > all the characters (except single quotes, making it impossible to put /// > single-quotes in a single-quoted string). /// > /// > ## Double Quotes /// > /// > Enclosing characters within double quotes preserves the literal meaning /// > of all characters except dollarsign ($), backquote (`), and backslash /// > (\). The backslash inside double quotes is historically weird, and /// > serves to quote only the following characters: /// > /// > ```text /// > $ ` " \ . /// > ``` /// > /// > Otherwise it remains literal. /// /// The code in this module operates byte by byte, making no special allowances /// for multi-byte character sets. In other words, it's up to the caller to /// figure out encoding for non-ASCII characters. A significant use case for /// this code is to quote filenames into scripts, and on *nix variants I /// understand that filenames are essentially arrays of bytes, even if the OS /// adds some normalisation and case-insensitivity on top. /// #[derive(Debug, Clone, Copy)] pub struct Sh; impl QuoteInto> for Sh { fn quote_into<'q, S: Into>>(s: S, out: &mut Vec) { Self::quote_into_vec(s, out); } } #[cfg(unix)] impl QuoteInto for Sh { fn quote_into<'q, S: Into>>(s: S, out: &mut std::ffi::OsString) { use std::os::unix::ffi::OsStringExt; let s = Self::quote_vec(s); let s = std::ffi::OsString::from_vec(s); out.push(s); } } #[cfg(feature = "bstr")] impl QuoteInto for Sh { fn quote_into<'q, S: Into>>(s: S, out: &mut bstr::BString) { let s = Self::quote_vec(s); out.extend(s); } } impl Sh { /// Quote a string of bytes into a new `Vec`. /// /// This will return one of the following: /// - The string as-is, if no quoting is necessary. /// - A string containing single-quoted sections, like `foo' bar'`. /// /// See [`quote_into_vec`][`Self::quote_into_vec`] for a variant that /// extends an existing `Vec` instead of allocating a new one. /// /// # Examples /// /// ``` /// # use shell_quote::Sh; /// assert_eq!(Sh::quote_vec("foobar"), b"foobar"); /// assert_eq!(Sh::quote_vec("foo bar"), b"foo' bar'"); /// ``` /// pub fn quote_vec<'a, S: Into>>(s: S) -> Vec { let bytes = match s.into() { Quotable::Bytes(bytes) => bytes, Quotable::Text(s) => s.as_bytes(), }; match escape_prepare(bytes) { Prepared::Empty => vec![b'\'', b'\''], Prepared::Inert => bytes.into(), Prepared::Escape(esc) => { // Here, previously, an optimisation precalculated the required // capacity of the output `Vec` to avoid reallocations later on, // but benchmarks showed that it was slower. It _may_ have // lowered maximum RAM required, but that was not measured. let mut sout = Vec::new(); escape_chars(esc, &mut sout); sout } } } /// Quote a string of bytes into an existing `Vec`. /// /// See [`quote_vec`][`Self::quote_vec`] for more details. /// /// # Examples /// /// ``` /// # use shell_quote::Sh; /// let mut buf = Vec::with_capacity(128); /// Sh::quote_into_vec("foobar", &mut buf); /// buf.push(b' '); // Add a space. /// Sh::quote_into_vec("foo bar", &mut buf); /// assert_eq!(buf, b"foobar foo' bar'"); /// ``` /// pub fn quote_into_vec<'a, S: Into>>(s: S, sout: &mut Vec) { let bytes = match s.into() { Quotable::Bytes(bytes) => bytes, Quotable::Text(s) => s.as_bytes(), }; match escape_prepare(bytes) { Prepared::Empty => sout.extend(b"''"), Prepared::Inert => sout.extend(bytes), Prepared::Escape(esc) => { // Here, previously, an optimisation precalculated the required // capacity of the output `Vec` to avoid reallocations later on, // but benchmarks showed that it was slower. It _may_ have // lowered maximum RAM required, but that was not measured. escape_chars(esc, sout); } } } } // ---------------------------------------------------------------------------- enum Prepared { Empty, Inert, Escape(Vec), } fn escape_prepare(sin: &[u8]) -> Prepared { let esc: Vec<_> = sin.iter().map(Char::from).collect(); // An optimisation: if the string is not empty and contains only "safe" // characters we can avoid further work. if esc.is_empty() { Prepared::Empty } else if esc.iter().all(Char::is_inert) { Prepared::Inert } else { Prepared::Escape(esc) } } fn escape_chars(esc: Vec, sout: &mut Vec) { let mut inside_quotes = false; for mode in esc { use Char::*; match mode { PrintableInert(ch) | Extended(ch) => sout.push(ch), Control(ch) | Printable(ch) => { if inside_quotes { sout.push(ch); } else { sout.push(b'\''); inside_quotes = true; sout.push(ch); } } SingleQuote => { if inside_quotes { sout.extend(b"'\\'"); inside_quotes = false; } else { sout.extend(b"\\'"); } } ch => { if inside_quotes { sout.push(ch.code()); } else { sout.push(b'\''); inside_quotes = true; sout.push(ch.code()); } } } } if inside_quotes { sout.push(b'\''); } } shell-quote-0.7.2/src/utf8.rs000064400000000000000000000052041046102023000141200ustar 00000000000000#![cfg(any(feature = "bash", feature = "fish"))] //! Scanner for control codes, shell metacharacters, printable characters, and //! UTF-8 sequences, i.e. classify each byte in a stream according to where it //! appears in UTF-8. #[derive(PartialEq)] pub(crate) enum Char { Bell, Backspace, Escape, FormFeed, NewLine, CarriageReturn, HorizontalTab, VerticalTab, Control(u8), Backslash, SingleQuote, DoubleQuote, Delete, PrintableInert(u8), Printable(u8), Utf8(char), } impl Char { pub fn from(ch: char) -> Self { let ascii: Result = ch.try_into(); use Char::*; match ascii { Ok(ascii) => match ascii { // ASCII control characters that frequently have dedicated backslash // sequences when quoted. BEL => Bell, BS => Backspace, ESC => Escape, FF => FormFeed, LF => NewLine, CR => CarriageReturn, TAB => HorizontalTab, VT => VerticalTab, // ASCII control characters, the rest. 0x00..=0x06 | 0x0E..=0x1A | 0x1C..=0x1F => Control(ascii), // ASCII printable characters that can have dedicated backslash // sequences when quoted or otherwise need some special treatment. b'\\' => Backslash, b'\'' => SingleQuote, b'\"' => DoubleQuote, DEL => Delete, // ASCII printable letters, numbers, and "safe" punctuation. b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => PrintableInert(ascii), b',' | b'.' | b'/' | b'_' | b'-' => PrintableInert(ascii), // ASCII punctuation which can have significance in the shell. b'|' | b'&' | b';' | b'(' | b')' | b'<' | b'>' => Printable(ascii), b' ' | b'?' | b'[' | b']' | b'{' | b'}' | b'`' => Printable(ascii), b'~' | b'!' | b'$' | b'@' | b'+' | b'=' | b'*' => Printable(ascii), b'%' | b'#' | b':' | b'^' => Printable(ascii), // UTF-8 sequences. 0x80..=0xff => Utf8(ch), }, Err(_) => Utf8(ch), } } #[inline] pub fn is_inert(&self) -> bool { matches!(self, Char::PrintableInert(_)) } } const BEL: u8 = 0x07; // -> \a const BS: u8 = 0x08; // -> \b const TAB: u8 = 0x09; // -> \t const LF: u8 = 0x0A; // -> \n const VT: u8 = 0x0B; // -> \v const FF: u8 = 0x0C; // -> \f const CR: u8 = 0x0D; // -> \r const ESC: u8 = 0x1B; // -> \e const DEL: u8 = 0x7F;