protobuf-support-3.7.2/.cargo_vcs_info.json0000644000000001560000000000100144210ustar { "git": { "sha1": "4cb84f305c05f0376ff51b555a2740c5251c1280" }, "path_in_vcs": "protobuf-support" }protobuf-support-3.7.2/Cargo.toml0000644000000020500000000000100124120ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "protobuf-support" version = "3.7.2" authors = ["Stepan Koltsov "] description = """ Code supporting protobuf implementation. None of code in this crate is public API. """ homepage = "https://github.com/stepancheg/rust-protobuf/" documentation = "https://github.com/stepancheg/rust-protobuf/blob/master/README.md" readme = "README.md" license = "MIT" repository = "https://github.com/stepancheg/rust-protobuf/" [package.metadata.docs.rs] all-features = true [lib] bench = false [dependencies.thiserror] version = "1.0.30" [features] protobuf-support-3.7.2/Cargo.toml.orig000064400000000000000000000010711046102023000160750ustar 00000000000000[package] name = "protobuf-support" version = "3.7.2" authors = ["Stepan Koltsov "] edition = "2021" license = "MIT" homepage = "https://github.com/stepancheg/rust-protobuf/" repository = "https://github.com/stepancheg/rust-protobuf/" documentation = "https://github.com/stepancheg/rust-protobuf/blob/master/README.md" description = """ Code supporting protobuf implementation. None of code in this crate is public API. """ [lib] bench = false [features] [dependencies] thiserror = "1.0.30" [package.metadata.docs.rs] all-features = true protobuf-support-3.7.2/LICENSE.txt000064400000000000000000000020421046102023000150300ustar 00000000000000Copyright (c) 2019 Stepan Koltsov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. protobuf-support-3.7.2/README.md000064400000000000000000000003471046102023000144720ustar 00000000000000 # Supporting code for protobuf crates Code in this crate is used in protobuf crates like `protobuf` or `protobuf-parse`. None of code in this crate has public API. protobuf-support-3.7.2/src/json_name.rs000064400000000000000000000007731046102023000163240ustar 00000000000000/// Implementation must match exactly /// `ToJsonName()` function in C++ `descriptor.cc`. pub fn json_name(input: &str) -> String { let mut capitalize_next = false; let mut result = String::with_capacity(input.len()); for c in input.chars() { if c == '_' { capitalize_next = true; } else if capitalize_next { result.extend(c.to_uppercase()); capitalize_next = false; } else { result.push(c); } } result } protobuf-support-3.7.2/src/lexer/float.rs000064400000000000000000000025421046102023000165730ustar 00000000000000#[derive(Debug)] pub enum ProtobufFloatParseError { EmptyString, CannotParseFloat, } pub type ProtobufFloatParseResult = Result; pub const PROTOBUF_NAN: &str = "nan"; pub const PROTOBUF_INF: &str = "inf"; /// Format float as in protobuf `.proto` files pub fn format_protobuf_float(f: f64) -> String { if f.is_nan() { PROTOBUF_NAN.to_owned() } else if f.is_infinite() { if f > 0.0 { format!("{}", PROTOBUF_INF) } else { format!("-{}", PROTOBUF_INF) } } else { // TODO: make sure doesn't lose precision format!("{}", f) } } /// Parse float from `.proto` format pub fn parse_protobuf_float(s: &str) -> ProtobufFloatParseResult { if s.is_empty() { return Err(ProtobufFloatParseError::EmptyString); } if s == PROTOBUF_NAN { return Ok(f64::NAN); } if s == PROTOBUF_INF || s == format!("+{}", PROTOBUF_INF) { return Ok(f64::INFINITY); } if s == format!("-{}", PROTOBUF_INF) { return Ok(f64::NEG_INFINITY); } match s.parse() { Ok(f) => Ok(f), Err(_) => Err(ProtobufFloatParseError::CannotParseFloat), } } #[cfg(test)] mod test { use super::*; #[test] fn test_format_protobuf_float() { assert_eq!("10", format_protobuf_float(10.0)); } } protobuf-support-3.7.2/src/lexer/int.rs000064400000000000000000000004611046102023000162560ustar 00000000000000pub struct Overflow; /// Negate `u64` checking for overflow. pub fn neg(value: u64) -> Result { if value <= 0x7fff_ffff_ffff_ffff { Ok(-(value as i64)) } else if value == 0x8000_0000_0000_0000 { Ok(-0x8000_0000_0000_0000) } else { Err(Overflow) } } protobuf-support-3.7.2/src/lexer/json_number_lit.rs000064400000000000000000000003541046102023000206560ustar 00000000000000use std::fmt; #[derive(Clone, Debug, Eq, PartialEq)] pub struct JsonNumberLit(pub String); impl fmt::Display for JsonNumberLit { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(&self.0, f) } } protobuf-support-3.7.2/src/lexer/lexer_impl.rs000064400000000000000000000531471046102023000176350ustar 00000000000000use std::char; use std::num::ParseFloatError; use std::num::ParseIntError; use crate::lexer::float; use crate::lexer::float::ProtobufFloatParseError; use crate::lexer::json_number_lit::JsonNumberLit; use crate::lexer::loc::Loc; use crate::lexer::loc::FIRST_COL; use crate::lexer::parser_language::ParserLanguage; use crate::lexer::str_lit::StrLit; use crate::lexer::str_lit::StrLitDecodeError; use crate::lexer::token::Token; use crate::lexer::token::TokenWithLocation; #[derive(Debug, thiserror::Error)] pub enum LexerError { // TODO: something better than this #[error("Incorrect input")] IncorrectInput, #[error("Unexpected EOF")] UnexpectedEof, #[error("Expecting char: {:?}", .0)] ExpectChar(char), #[error("Parse int error")] ParseIntError, #[error("Parse float error")] ParseFloatError, // TODO: how it is different from ParseFloatError? #[error("Incorrect float literal")] IncorrectFloatLit, #[error("Incorrect JSON escape")] IncorrectJsonEscape, #[error("Incorrect JSON number")] IncorrectJsonNumber, #[error("Incorrect Unicode character")] IncorrectUnicodeChar, #[error("Expecting hex digit")] ExpectHexDigit, #[error("Expecting oct digit")] ExpectOctDigit, #[error("Expecting dec digit")] ExpectDecDigit, #[error(transparent)] StrLitDecodeError(#[from] StrLitDecodeError), #[error("Expecting identifier")] ExpectedIdent, } pub type LexerResult = Result; impl From for LexerError { fn from(_: ParseIntError) -> Self { LexerError::ParseIntError } } impl From for LexerError { fn from(_: ParseFloatError) -> Self { LexerError::ParseFloatError } } impl From for LexerError { fn from(_: ProtobufFloatParseError) -> Self { LexerError::IncorrectFloatLit } } /// The raw bytes for a single char or escape sequence in a string literal /// /// The raw bytes are available via an `into_iter` implementation. pub(crate) struct DecodedBytes { // a single char can be up to 4-bytes when encoded in utf-8 buf: [u8; 4], len: usize, } impl DecodedBytes { fn byte(b: u8) -> DecodedBytes { DecodedBytes { buf: [b, 0, 0, 0], len: 1, } } fn char(value: char) -> Self { let mut buf = [0; 4]; let len = value.encode_utf8(&mut buf).len(); DecodedBytes { buf, len } } pub(crate) fn bytes(&self) -> &[u8] { &self.buf[..self.len] } } #[derive(Copy, Clone)] pub struct Lexer<'a> { language: ParserLanguage, input: &'a str, pos: usize, pub loc: Loc, } fn is_letter(c: char) -> bool { c.is_alphabetic() || c == '_' } impl<'a> Lexer<'a> { pub fn new(input: &'a str, language: ParserLanguage) -> Lexer<'a> { Lexer { language, input, pos: 0, loc: Loc::start(), } } /// No more chars pub fn eof(&self) -> bool { self.pos == self.input.len() } /// Remaining chars fn rem_chars(&self) -> &'a str { &self.input[self.pos..] } pub fn lookahead_char_is bool>(&self, p: P) -> bool { self.lookahead_char().map_or(false, p) } fn lookahead_char_is_in(&self, alphabet: &str) -> bool { self.lookahead_char_is(|c| alphabet.contains(c)) } fn next_char_opt(&mut self) -> Option { let rem = self.rem_chars(); if rem.is_empty() { None } else { let mut char_indices = rem.char_indices(); let (_, c) = char_indices.next().unwrap(); let c_len = char_indices.next().map(|(len, _)| len).unwrap_or(rem.len()); self.pos += c_len; if c == '\n' { self.loc.line += 1; self.loc.col = FIRST_COL; } else { self.loc.col += 1; } Some(c) } } fn next_char(&mut self) -> LexerResult { self.next_char_opt().ok_or(LexerError::UnexpectedEof) } /// Skip whitespaces fn skip_whitespaces(&mut self) { self.take_while(|c| c.is_whitespace()); } fn skip_c_comment(&mut self) -> LexerResult<()> { if self.skip_if_lookahead_is_str("/*") { let end = "*/"; match self.rem_chars().find(end) { None => Err(LexerError::UnexpectedEof), Some(len) => { let new_pos = self.pos + len + end.len(); self.skip_to_pos(new_pos); Ok(()) } } } else { Ok(()) } } fn skip_cpp_comment(&mut self) { if self.skip_if_lookahead_is_str("//") { loop { match self.next_char_opt() { Some('\n') | None => break, _ => {} } } } } fn skip_sh_comment(&mut self) { if self.skip_if_lookahead_is_str("#") { loop { match self.next_char_opt() { Some('\n') | None => break, _ => {} } } } } fn skip_comment(&mut self) -> LexerResult<()> { match self.language { ParserLanguage::Proto => { self.skip_c_comment()?; self.skip_cpp_comment(); } ParserLanguage::TextFormat => { self.skip_sh_comment(); } ParserLanguage::Json => {} } Ok(()) } pub fn skip_ws(&mut self) -> LexerResult<()> { loop { let pos = self.pos; self.skip_whitespaces(); self.skip_comment()?; if pos == self.pos { // Did not advance return Ok(()); } } } pub fn take_while(&mut self, f: F) -> &'a str where F: Fn(char) -> bool, { let start = self.pos; while self.lookahead_char().map(&f) == Some(true) { self.next_char_opt().unwrap(); } let end = self.pos; &self.input[start..end] } fn lookahead_char(&self) -> Option { self.clone().next_char_opt() } fn lookahead_is_str(&self, s: &str) -> bool { self.rem_chars().starts_with(s) } fn skip_if_lookahead_is_str(&mut self, s: &str) -> bool { if self.lookahead_is_str(s) { let new_pos = self.pos + s.len(); self.skip_to_pos(new_pos); true } else { false } } fn next_char_if

(&mut self, p: P) -> Option where P: FnOnce(char) -> bool, { let mut clone = self.clone(); match clone.next_char_opt() { Some(c) if p(c) => { *self = clone; Some(c) } _ => None, } } pub fn next_char_if_eq(&mut self, expect: char) -> bool { self.next_char_if(|c| c == expect) != None } fn next_char_if_in(&mut self, alphabet: &str) -> Option { for c in alphabet.chars() { if self.next_char_if_eq(c) { return Some(c); } } None } fn next_char_expect_eq(&mut self, expect: char) -> LexerResult<()> { if self.next_char_if_eq(expect) { Ok(()) } else { Err(LexerError::ExpectChar(expect)) } } fn next_char_expect

(&mut self, expect: P, err: LexerError) -> LexerResult where P: FnOnce(char) -> bool, { self.next_char_if(expect).ok_or(err) } // str functions /// properly update line and column fn skip_to_pos(&mut self, new_pos: usize) -> &'a str { assert!(new_pos >= self.pos); assert!(new_pos <= self.input.len()); let pos = self.pos; while self.pos != new_pos { self.next_char_opt().unwrap(); } &self.input[pos..new_pos] } // Protobuf grammar // char functions // letter = "A" … "Z" | "a" … "z" // https://github.com/google/protobuf/issues/4565 fn next_letter_opt(&mut self) -> Option { self.next_char_if(is_letter) } // capitalLetter = "A" … "Z" fn _next_capital_letter_opt(&mut self) -> Option { self.next_char_if(|c| c >= 'A' && c <= 'Z') } fn next_ident_part(&mut self) -> Option { self.next_char_if(|c| c.is_ascii_alphanumeric() || c == '_') } // Identifiers // ident = letter { letter | decimalDigit | "_" } fn next_ident_opt(&mut self) -> LexerResult> { if let Some(c) = self.next_letter_opt() { let mut ident = String::new(); ident.push(c); while let Some(c) = self.next_ident_part() { ident.push(c); } Ok(Some(ident)) } else { Ok(None) } } // Integer literals // hexLit = "0" ( "x" | "X" ) hexDigit { hexDigit } fn next_hex_lit_opt(&mut self) -> LexerResult> { Ok( if self.skip_if_lookahead_is_str("0x") || self.skip_if_lookahead_is_str("0X") { let s = self.take_while(|c| c.is_ascii_hexdigit()); Some(u64::from_str_radix(s, 16)? as u64) } else { None }, ) } // decimalLit = ( "1" … "9" ) { decimalDigit } // octalLit = "0" { octalDigit } fn next_decimal_octal_lit_opt(&mut self) -> LexerResult> { // do not advance on number parse error let mut clone = self.clone(); let pos = clone.pos; Ok(if clone.next_char_if(|c| c.is_ascii_digit()) != None { clone.take_while(|c| c.is_ascii_digit()); let value = clone.input[pos..clone.pos].parse()?; *self = clone; Some(value) } else { None }) } // hexDigit = "0" … "9" | "A" … "F" | "a" … "f" fn next_hex_digit(&mut self) -> LexerResult { let mut clone = self.clone(); let r = match clone.next_char()? { c if c >= '0' && c <= '9' => c as u32 - b'0' as u32, c if c >= 'A' && c <= 'F' => c as u32 - b'A' as u32 + 10, c if c >= 'a' && c <= 'f' => c as u32 - b'a' as u32 + 10, _ => return Err(LexerError::ExpectHexDigit), }; *self = clone; Ok(r) } // octalDigit = "0" … "7" fn next_octal_digit(&mut self) -> LexerResult { self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectOctDigit) .map(|c| c as u32 - '0' as u32) } // decimalDigit = "0" … "9" fn next_decimal_digit(&mut self) -> LexerResult { self.next_char_expect(|c| c >= '0' && c <= '9', LexerError::ExpectDecDigit) .map(|c| c as u32 - '0' as u32) } // decimals = decimalDigit { decimalDigit } fn next_decimal_digits(&mut self) -> LexerResult<()> { self.next_decimal_digit()?; self.take_while(|c| c >= '0' && c <= '9'); Ok(()) } // intLit = decimalLit | octalLit | hexLit pub fn next_int_lit_opt(&mut self) -> LexerResult> { assert_ne!(ParserLanguage::Json, self.language); self.skip_ws()?; if let Some(i) = self.next_hex_lit_opt()? { return Ok(Some(i)); } if let Some(i) = self.next_decimal_octal_lit_opt()? { return Ok(Some(i)); } Ok(None) } // Floating-point literals // exponent = ( "e" | "E" ) [ "+" | "-" ] decimals fn next_exponent_opt(&mut self) -> LexerResult> { if self.next_char_if_in("eE") != None { self.next_char_if_in("+-"); self.next_decimal_digits()?; Ok(Some(())) } else { Ok(None) } } // floatLit = ( decimals "." [ decimals ] [ exponent ] | decimals exponent | "."decimals [ exponent ] ) | "inf" | "nan" fn next_float_lit(&mut self) -> LexerResult<()> { assert_ne!(ParserLanguage::Json, self.language); // "inf" and "nan" are handled as part of ident if self.next_char_if_eq('.') { self.next_decimal_digits()?; self.next_exponent_opt()?; } else { self.next_decimal_digits()?; if self.next_char_if_eq('.') { self.next_decimal_digits()?; self.next_exponent_opt()?; } else { if self.next_exponent_opt()? == None { return Err(LexerError::IncorrectFloatLit); } } } Ok(()) } // String literals // charValue = hexEscape | octEscape | charEscape | /[^\0\n\\]/ // hexEscape = '\' ( "x" | "X" ) hexDigit hexDigit // https://github.com/google/protobuf/issues/4560 // octEscape = '\' octalDigit octalDigit octalDigit // charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' ) // quote = "'" | '"' pub(crate) fn next_str_lit_bytes(&mut self) -> LexerResult { match self.next_char()? { '\\' => { match self.next_char()? { '\'' => Ok(DecodedBytes::byte(b'\'')), '"' => Ok(DecodedBytes::byte(b'"')), '\\' => Ok(DecodedBytes::byte(b'\\')), 'a' => Ok(DecodedBytes::byte(b'\x07')), 'b' => Ok(DecodedBytes::byte(b'\x08')), 'f' => Ok(DecodedBytes::byte(b'\x0c')), 'n' => Ok(DecodedBytes::byte(b'\n')), 'r' => Ok(DecodedBytes::byte(b'\r')), 't' => Ok(DecodedBytes::byte(b'\t')), 'v' => Ok(DecodedBytes::byte(b'\x0b')), 'x' => { let d1 = self.next_hex_digit()? as u8; let d2 = self.next_hex_digit()? as u8; Ok(DecodedBytes::byte((d1 << 4) | d2)) } d if d >= '0' && d <= '7' => { let mut r = d as u8 - b'0'; for _ in 0..2 { match self.next_octal_digit() { Err(_) => break, Ok(d) => r = (r << 3) + d as u8, } } Ok(DecodedBytes::byte(r)) } // https://github.com/google/protobuf/issues/4562 c => Ok(DecodedBytes::char(c)), } } '\n' | '\0' => Err(LexerError::IncorrectInput), c => Ok(DecodedBytes::char(c)), } } fn char_try_from(i: u32) -> LexerResult { char::try_from(i).map_err(|_| LexerError::IncorrectUnicodeChar) } pub fn next_json_char_value(&mut self) -> LexerResult { match self.next_char()? { '\\' => match self.next_char()? { '"' => Ok('"'), '\'' => Ok('\''), '\\' => Ok('\\'), '/' => Ok('/'), 'b' => Ok('\x08'), 'f' => Ok('\x0c'), 'n' => Ok('\n'), 'r' => Ok('\r'), 't' => Ok('\t'), 'u' => { let mut v = 0; for _ in 0..4 { let digit = self.next_hex_digit()?; v = v * 16 + digit; } Self::char_try_from(v) } _ => Err(LexerError::IncorrectJsonEscape), }, c => Ok(c), } } // https://github.com/google/protobuf/issues/4564 // strLit = ( "'" { charValue } "'" ) | ( '"' { charValue } '"' ) fn next_str_lit_raw(&mut self) -> LexerResult { let mut raw = String::new(); let mut first = true; loop { if !first { self.skip_ws()?; } let start = self.pos; let q = match self.next_char_if_in("'\"") { Some(q) => q, None if !first => break, None => return Err(LexerError::IncorrectInput), }; first = false; while self.lookahead_char() != Some(q) { self.next_str_lit_bytes()?; } self.next_char_expect_eq(q)?; raw.push_str(&self.input[start + 1..self.pos - 1]); } Ok(raw) } fn next_str_lit_raw_opt(&mut self) -> LexerResult> { if self.lookahead_char_is_in("'\"") { Ok(Some(self.next_str_lit_raw()?)) } else { Ok(None) } } /// Parse next token as JSON number fn next_json_number_opt(&mut self) -> LexerResult> { assert_eq!(ParserLanguage::Json, self.language); fn is_digit(c: char) -> bool { c >= '0' && c <= '9' } fn is_digit_1_9(c: char) -> bool { c >= '1' && c <= '9' } if !self.lookahead_char_is_in("-0123456789") { return Ok(None); } let mut s = String::new(); if self.next_char_if_eq('-') { s.push('-'); } if self.next_char_if_eq('0') { s.push('0'); } else { s.push(self.next_char_expect(is_digit_1_9, LexerError::IncorrectJsonNumber)?); while let Some(c) = self.next_char_if(is_digit) { s.push(c); } } if self.next_char_if_eq('.') { s.push('.'); s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?); while let Some(c) = self.next_char_if(is_digit) { s.push(c); } } if let Some(c) = self.next_char_if_in("eE") { s.push(c); if let Some(c) = self.next_char_if_in("+-") { s.push(c); } s.push(self.next_char_expect(is_digit, LexerError::IncorrectJsonNumber)?); while let Some(c) = self.next_char_if(is_digit) { s.push(c); } } Ok(Some(JsonNumberLit(s))) } fn next_token_inner(&mut self) -> LexerResult { if self.language == ParserLanguage::Json { if let Some(v) = self.next_json_number_opt()? { return Ok(Token::JsonNumber(v)); } } if let Some(ident) = self.next_ident_opt()? { let token = if self.language != ParserLanguage::Json && ident == float::PROTOBUF_NAN { Token::FloatLit(f64::NAN) } else if self.language != ParserLanguage::Json && ident == float::PROTOBUF_INF { Token::FloatLit(f64::INFINITY) } else { Token::Ident(ident.to_owned()) }; return Ok(token); } if self.language != ParserLanguage::Json { let mut clone = self.clone(); let pos = clone.pos; if let Ok(_) = clone.next_float_lit() { let f = float::parse_protobuf_float(&self.input[pos..clone.pos])?; *self = clone; return Ok(Token::FloatLit(f)); } if let Some(lit) = self.next_int_lit_opt()? { return Ok(Token::IntLit(lit)); } } if let Some(escaped) = self.next_str_lit_raw_opt()? { return Ok(Token::StrLit(StrLit { escaped })); } // This branch must be after str lit if let Some(c) = self.next_char_if(|c| c.is_ascii_punctuation()) { return Ok(Token::Symbol(c)); } if let Some(ident) = self.next_ident_opt()? { return Ok(Token::Ident(ident)); } Err(LexerError::IncorrectInput) } pub fn next_token(&mut self) -> LexerResult> { self.skip_ws()?; let loc = self.loc; Ok(if self.eof() { None } else { let token = self.next_token_inner()?; // Skip whitespace here to update location // to the beginning of the next token self.skip_ws()?; Some(TokenWithLocation { token, loc }) }) } } #[cfg(test)] mod test { use super::*; fn lex(input: &str, parse_what: P) -> R where P: FnOnce(&mut Lexer) -> LexerResult, { let mut lexer = Lexer::new(input, ParserLanguage::Proto); let r = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc)); assert!(lexer.eof(), "check eof failed at {}", lexer.loc); r } fn lex_opt(input: &str, parse_what: P) -> R where P: FnOnce(&mut Lexer) -> LexerResult>, { let mut lexer = Lexer::new(input, ParserLanguage::Proto); let o = parse_what(&mut lexer).expect(&format!("lexer failed at {}", lexer.loc)); let r = o.expect(&format!("lexer returned none at {}", lexer.loc)); assert!(lexer.eof(), "check eof failed at {}", lexer.loc); r } #[test] fn test_lexer_int_lit() { let msg = r#"10"#; let mess = lex_opt(msg, |p| p.next_int_lit_opt()); assert_eq!(10, mess); } #[test] fn test_lexer_float_lit() { let msg = r#"12.3"#; let mess = lex(msg, |p| p.next_token_inner()); assert_eq!(Token::FloatLit(12.3), mess); } #[test] fn test_lexer_float_lit_leading_zeros_in_exp() { let msg = r#"1e00009"#; let mess = lex(msg, |p| p.next_token_inner()); assert_eq!(Token::FloatLit(1_000_000_000.0), mess); } } protobuf-support-3.7.2/src/lexer/loc.rs000064400000000000000000000010121046102023000162320ustar 00000000000000use std::fmt; pub const FIRST_LINE: u32 = 1; pub const FIRST_COL: u32 = 1; /// Location in file #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] pub struct Loc { /// 1-based pub line: u32, /// 1-based pub col: u32, } impl fmt::Display for Loc { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}:{}", self.line, self.col) } } impl Loc { pub fn start() -> Loc { Loc { line: FIRST_LINE, col: FIRST_COL, } } } protobuf-support-3.7.2/src/lexer/mod.rs000064400000000000000000000004051046102023000162410ustar 00000000000000//! Implementation of lexer for both protobuf parser and for text format parser. pub mod float; pub mod int; pub mod json_number_lit; pub mod lexer_impl; pub mod loc; pub mod num_lit; pub mod parser_language; pub mod str_lit; pub mod token; pub mod tokenizer; protobuf-support-3.7.2/src/lexer/num_lit.rs000064400000000000000000000001071046102023000171300ustar 00000000000000#[derive(Copy, Clone)] pub enum NumLit { U64(u64), F64(f64), } protobuf-support-3.7.2/src/lexer/parser_language.rs000064400000000000000000000003571046102023000206270ustar 00000000000000/// We use the same lexer/tokenizer for all parsers for simplicity #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum ParserLanguage { // `.proto` files Proto, // Protobuf text format TextFormat, // JSON Json, } protobuf-support-3.7.2/src/lexer/str_lit.rs000064400000000000000000000037041046102023000171470ustar 00000000000000use std::fmt; use std::string::FromUtf8Error; use crate::lexer::lexer_impl::Lexer; use crate::lexer::parser_language::ParserLanguage; #[derive(Debug, thiserror::Error)] pub enum StrLitDecodeError { #[error(transparent)] FromUtf8Error(#[from] FromUtf8Error), #[error("String literal decode error")] OtherError, } pub type StrLitDecodeResult = Result; /// String literal, both `string` and `bytes`. #[derive(Clone, Eq, PartialEq, Debug)] pub struct StrLit { pub escaped: String, } impl fmt::Display for StrLit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "\"{}\"", &self.escaped) } } impl StrLit { /// May fail if not valid UTF8 pub fn decode_utf8(&self) -> StrLitDecodeResult { let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json); let mut r = Vec::new(); while !lexer.eof() { r.extend( lexer .next_str_lit_bytes() .map_err(|_| StrLitDecodeError::OtherError)? .bytes(), ); } Ok(String::from_utf8(r)?) } pub fn decode_bytes(&self) -> StrLitDecodeResult> { let mut lexer = Lexer::new(&self.escaped, ParserLanguage::Json); let mut r = Vec::new(); while !lexer.eof() { r.extend( lexer .next_str_lit_bytes() .map_err(|_| StrLitDecodeError::OtherError)? .bytes(), ); } Ok(r) } pub fn quoted(&self) -> String { format!("\"{}\"", self.escaped) } } #[cfg(test)] mod test { use crate::lexer::str_lit::StrLit; #[test] fn decode_utf8() { assert_eq!( "\u{1234}".to_owned(), StrLit { escaped: "\\341\\210\\264".to_owned() } .decode_utf8() .unwrap() ) } } protobuf-support-3.7.2/src/lexer/token.rs000064400000000000000000000024221046102023000166030ustar 00000000000000use crate::lexer::json_number_lit::JsonNumberLit; use crate::lexer::lexer_impl::LexerError; use crate::lexer::lexer_impl::LexerResult; use crate::lexer::loc::Loc; use crate::lexer::num_lit::NumLit; use crate::lexer::str_lit::StrLit; #[derive(Clone, Debug, PartialEq)] pub enum Token { Ident(String), Symbol(char), // Protobuf tokenizer has separate tokens for int and float. // Tokens do not include sign. IntLit(u64), FloatLit(f64), JsonNumber(JsonNumberLit), // including quotes StrLit(StrLit), } impl Token { /// Back to original pub fn format(&self) -> String { match self { &Token::Ident(ref s) => s.clone(), &Token::Symbol(c) => c.to_string(), &Token::IntLit(ref i) => i.to_string(), &Token::StrLit(ref s) => s.quoted(), &Token::FloatLit(ref f) => f.to_string(), &Token::JsonNumber(ref f) => f.to_string(), } } pub fn to_num_lit(&self) -> LexerResult { match self { &Token::IntLit(i) => Ok(NumLit::U64(i)), &Token::FloatLit(f) => Ok(NumLit::F64(f)), _ => Err(LexerError::IncorrectInput), } } } #[derive(Clone)] pub struct TokenWithLocation { pub token: Token, pub loc: Loc, } protobuf-support-3.7.2/src/lexer/tokenizer.rs000064400000000000000000000240321046102023000174760ustar 00000000000000use crate::lexer::lexer_impl::Lexer; use crate::lexer::lexer_impl::LexerError; use crate::lexer::loc::Loc; use crate::lexer::parser_language::ParserLanguage; use crate::lexer::str_lit::StrLit; use crate::lexer::str_lit::StrLitDecodeError; use crate::lexer::token::Token; use crate::lexer::token::TokenWithLocation; #[derive(Debug, thiserror::Error)] pub enum TokenizerError { #[error(transparent)] LexerError(#[from] LexerError), #[error(transparent)] StrLitDecodeError(#[from] StrLitDecodeError), #[error("Internal tokenizer error")] InternalError, // TODO: too broad #[error("Incorrect input")] IncorrectInput, #[error("Not allowed in this context: {0}")] NotAllowedInThisContext(&'static str), #[error("Unexpected end of input")] UnexpectedEof, #[error("Expecting string literal")] ExpectStrLit, #[error("Expecting int literal")] ExpectIntLit, #[error("Expecting float literal")] ExpectFloatLit, #[error("Expecting identifier")] ExpectIdent, #[error("Expecting identifier `{}`", .0)] ExpectNamedIdent(String), #[error("While parsing {}, expecting char `{}`", .1, .0)] ExpectChar(char, &'static str), #[error("Expecting any char of: {}", .0.iter().map(|c| format!("`{}`", c)).collect::>().join(", "))] ExpectAnyChar(Vec), } pub type TokenizerResult = Result; #[derive(Clone)] pub struct Tokenizer<'a> { lexer: Lexer<'a>, next_token: Option, last_token_loc: Option, } impl<'a> Tokenizer<'a> { pub fn new(input: &'a str, comment_style: ParserLanguage) -> Tokenizer<'a> { Tokenizer { lexer: Lexer::new(input, comment_style), next_token: None, last_token_loc: None, } } pub fn loc(&self) -> Loc { // After lookahead return the location of the next token self.next_token .as_ref() .map(|t| t.loc.clone()) // After token consumed return the location of that token .or(self.last_token_loc.clone()) // Otherwise return the position of lexer .unwrap_or(self.lexer.loc) } pub fn lookahead_loc(&mut self) -> Loc { drop(self.lookahead()); // TODO: does not handle EOF properly self.loc() } fn lookahead(&mut self) -> TokenizerResult> { Ok(match self.next_token { Some(ref token) => Some(&token.token), None => { self.next_token = self.lexer.next_token()?; self.last_token_loc = self.next_token.as_ref().map(|t| t.loc.clone()); match self.next_token { Some(ref token) => Some(&token.token), None => None, } } }) } pub fn lookahead_some(&mut self) -> TokenizerResult<&Token> { match self.lookahead()? { Some(token) => Ok(token), None => Err(TokenizerError::UnexpectedEof), } } fn next(&mut self) -> TokenizerResult> { self.lookahead()?; Ok(self .next_token .take() .map(|TokenWithLocation { token, .. }| token)) } pub fn next_some(&mut self) -> TokenizerResult { match self.next()? { Some(token) => Ok(token), None => Err(TokenizerError::UnexpectedEof), } } /// Can be called only after lookahead, otherwise it's error pub fn advance(&mut self) -> TokenizerResult { self.next_token .take() .map(|TokenWithLocation { token, .. }| token) .ok_or(TokenizerError::InternalError) } /// No more tokens pub fn syntax_eof(&mut self) -> TokenizerResult { Ok(self.lookahead()?.is_none()) } pub fn next_token_if_map(&mut self, p: P) -> TokenizerResult> where P: FnOnce(&Token) -> Option, { self.lookahead()?; let v = match self.next_token { Some(ref token) => match p(&token.token) { Some(v) => v, None => return Ok(None), }, _ => return Ok(None), }; self.next_token = None; Ok(Some(v)) } pub fn next_token_check_map(&mut self, p: P) -> Result where P: FnOnce(&Token) -> Result, E: From, { self.lookahead()?; let r = match self.next_token { Some(ref token) => p(&token.token)?, None => return Err(TokenizerError::UnexpectedEof.into()), }; self.next_token = None; Ok(r) } fn next_token_if

(&mut self, p: P) -> TokenizerResult> where P: FnOnce(&Token) -> bool, { self.next_token_if_map(|token| if p(token) { Some(token.clone()) } else { None }) } pub fn next_ident_if_in(&mut self, idents: &[&str]) -> TokenizerResult> { let v = match self.lookahead()? { Some(&Token::Ident(ref next)) => { if idents.into_iter().find(|&i| i == next).is_some() { next.clone() } else { return Ok(None); } } _ => return Ok(None), }; self.advance()?; Ok(Some(v)) } pub fn next_ident_if_eq(&mut self, word: &str) -> TokenizerResult { Ok(self.next_ident_if_in(&[word])? != None) } pub fn next_ident_expect_eq(&mut self, word: &str) -> TokenizerResult<()> { if self.next_ident_if_eq(word)? { Ok(()) } else { Err(TokenizerError::ExpectNamedIdent(word.to_owned())) } } pub fn next_ident_if_eq_error(&mut self, word: &'static str) -> TokenizerResult<()> { if self.clone().next_ident_if_eq(word)? { // TODO: which context? return Err(TokenizerError::NotAllowedInThisContext(word)); } Ok(()) } pub fn next_symbol_if_in(&mut self, symbols: &[char]) -> TokenizerResult { self.next_token_if(|token| match token { Token::Symbol(c) if symbols.contains(c) => true, _ => false, }) .map(|token| token.is_some()) } pub fn next_symbol_if_eq(&mut self, symbol: char) -> TokenizerResult { self.next_symbol_if_in(&[symbol]) } pub fn next_symbol_expect_eq( &mut self, symbol: char, desc: &'static str, ) -> TokenizerResult<()> { if self.lookahead_is_symbol(symbol)? { self.advance()?; Ok(()) } else { Err(TokenizerError::ExpectChar(symbol, desc)) } } pub fn next_symbol_expect_eq_oneof(&mut self, symbols: &[char]) -> TokenizerResult { for symbol in symbols { if let Ok(()) = self.next_symbol_expect_eq(*symbol, "ignored") { return Ok(*symbol); } } Err(TokenizerError::ExpectAnyChar(symbols.to_owned())) } pub fn lookahead_is_str_lit(&mut self) -> TokenizerResult { Ok(match self.lookahead()? { Some(&Token::StrLit(..)) => true, _ => false, }) } pub fn lookahead_is_int_lit(&mut self) -> TokenizerResult { Ok(match self.lookahead()? { Some(&Token::IntLit(..)) => true, _ => false, }) } pub fn lookahead_is_json_number(&mut self) -> TokenizerResult { Ok(match self.lookahead()? { Some(&Token::JsonNumber(..)) => true, _ => false, }) } pub fn lookahead_if_symbol(&mut self) -> TokenizerResult> { Ok(match self.lookahead()? { Some(&Token::Symbol(c)) => Some(c), _ => None, }) } pub fn lookahead_is_symbol(&mut self, symbol: char) -> TokenizerResult { Ok(self.lookahead_if_symbol()? == Some(symbol)) } pub fn lookahead_is_ident(&mut self, ident: &str) -> TokenizerResult { Ok(match self.lookahead()? { Some(Token::Ident(i)) => i == ident, _ => false, }) } pub fn next_ident(&mut self) -> TokenizerResult { self.next_token_check_map(|token| match token { &Token::Ident(ref ident) => Ok(ident.clone()), _ => Err(TokenizerError::ExpectIdent), }) } pub fn next_str_lit(&mut self) -> TokenizerResult { self.next_token_check_map(|token| match token { &Token::StrLit(ref str_lit) => Ok(str_lit.clone()), _ => Err(TokenizerError::ExpectStrLit), }) } pub fn next_int_lit(&mut self) -> TokenizerResult { self.next_token_check_map(|token| match token { &Token::IntLit(v) => Ok(v), _ => Err(TokenizerError::ExpectIntLit), }) } pub fn next_float_lit(&mut self) -> TokenizerResult { self.next_token_check_map(|token| match token { &Token::FloatLit(v) => Ok(v), _ => Err(TokenizerError::ExpectFloatLit), }) } } #[cfg(test)] mod test { use super::*; fn tokenize(input: &str, what: P) -> R where P: FnOnce(&mut Tokenizer) -> TokenizerResult, { let mut tokenizer = Tokenizer::new(input, ParserLanguage::Proto); let r = what(&mut tokenizer).expect(&format!("parse failed at {}", tokenizer.loc())); let eof = tokenizer .syntax_eof() .expect(&format!("check eof failed at {}", tokenizer.loc())); assert!(eof, "{}", tokenizer.loc()); r } #[test] fn test_ident() { let msg = r#" aabb_c "#; let mess = tokenize(msg, |p| p.next_ident().map(|s| s.to_owned())); assert_eq!("aabb_c", mess); } #[test] fn test_str_lit() { let msg = r#" "a\nb" "#; let mess = tokenize(msg, |p| p.next_str_lit()); assert_eq!( StrLit { escaped: r#"a\nb"#.to_owned() }, mess ); } } protobuf-support-3.7.2/src/lib.rs000064400000000000000000000003761046102023000151200ustar 00000000000000//! # Supporting code for protobuf crates //! //! Code in this crate is used in protobuf crates like `protobuf` or `protobuf-parse`. //! None of code in this crate has public API. pub mod json_name; pub mod lexer; pub mod text_format; pub mod toposort; protobuf-support-3.7.2/src/text_format.rs000064400000000000000000000046051046102023000167050ustar 00000000000000pub fn escape_bytes_to(bytes: &[u8], buf: &mut String) { for &c in bytes { match c { b'\n' => buf.push_str(r"\n"), b'\r' => buf.push_str(r"\r"), b'\t' => buf.push_str(r"\t"), b'\'' => buf.push_str("\\\'"), b'"' => buf.push_str("\\\""), b'\\' => buf.push_str(r"\\"), b'\x20'..=b'\x7e' => buf.push(c as char), _ => { buf.push('\\'); buf.push((b'0' + (c >> 6)) as char); buf.push((b'0' + ((c >> 3) & 7)) as char); buf.push((b'0' + (c & 7)) as char); } } } } pub fn quote_bytes_to(bytes: &[u8], buf: &mut String) { buf.push('"'); escape_bytes_to(bytes, buf); buf.push('"'); } #[cfg(test)] mod test { use crate::lexer::str_lit::StrLit; use crate::text_format::escape_bytes_to; fn escape(data: &[u8]) -> String { let mut s = String::with_capacity(data.len() * 4); escape_bytes_to(data, &mut s); s } fn unescape_string(escaped: &str) -> Vec { StrLit { escaped: escaped.to_owned(), } .decode_bytes() .expect("decode_bytes") } fn test_escape_unescape(text: &str, escaped: &str) { assert_eq!(text.as_bytes(), &unescape_string(escaped)[..]); assert_eq!(escaped, &escape(text.as_bytes())[..]); } #[test] fn test_print_to_bytes() { assert_eq!("ab", escape(b"ab")); assert_eq!("a\\\\023", escape(b"a\\023")); assert_eq!("a\\r\\n\\t \\'\\\"\\\\", escape(b"a\r\n\t '\"\\")); assert_eq!("\\344\\275\\240\\345\\245\\275", escape("你好".as_bytes())); } #[test] fn test_unescape_string() { test_escape_unescape("", ""); test_escape_unescape("aa", "aa"); test_escape_unescape("\n", "\\n"); test_escape_unescape("\r", "\\r"); test_escape_unescape("\t", "\\t"); test_escape_unescape("你好", "\\344\\275\\240\\345\\245\\275"); // hex assert_eq!(b"aaa\x01bbb", &unescape_string("aaa\\x01bbb")[..]); assert_eq!(b"aaa\xcdbbb", &unescape_string("aaa\\xCDbbb")[..]); assert_eq!(b"aaa\xcdbbb", &unescape_string("aaa\\xCDbbb")[..]); // quotes assert_eq!(b"aaa\"bbb", &unescape_string("aaa\\\"bbb")[..]); assert_eq!(b"aaa\'bbb", &unescape_string("aaa\\\'bbb")[..]); } } protobuf-support-3.7.2/src/toposort.rs000064400000000000000000000055211046102023000162400ustar 00000000000000use std::collections::HashSet; use std::hash::Hash; #[derive(Debug, thiserror::Error)] #[error("Cycle detected")] pub struct TopoSortCycle; pub fn toposort( input: impl IntoIterator, deps: impl Fn(&K) -> I, ) -> Result, TopoSortCycle> where K: Eq + Hash + Clone, I: Iterator, { struct Ts where K: Eq + Hash + Clone, I: Iterator, D: Fn(&K) -> I, { result_set: HashSet, result: Vec, deps: D, stack: HashSet, } impl Ts where K: Eq + Hash + Clone, I: Iterator, D: Fn(&K) -> I, { fn visit(&mut self, i: &K) -> Result<(), TopoSortCycle> { if self.result_set.contains(i) { return Ok(()); } if !self.stack.insert(i.clone()) { return Err(TopoSortCycle); } for dep in (self.deps)(i) { self.visit(&dep)?; } let removed = self.stack.remove(i); assert!(removed); self.result.push(i.clone()); self.result_set.insert(i.clone()); Ok(()) } } let mut ts = Ts { result: Vec::new(), result_set: HashSet::new(), deps, stack: HashSet::new(), }; for i in input { ts.visit(&i)?; } Ok(ts.result) } #[cfg(test)] mod tests { use std::collections::HashMap; use crate::toposort::toposort; use crate::toposort::TopoSortCycle; fn test_toposort(input: &str) -> Result, TopoSortCycle> { let mut keys: Vec<&str> = Vec::new(); let mut edges: HashMap<&str, Vec<&str>> = HashMap::new(); for part in input.split(" ") { match part.split_once("->") { Some((k, vs)) => { keys.push(k); edges.insert(k, vs.split(",").collect()); } None => keys.push(part), }; } toposort(keys, |k| { edges .get(k) .map(|v| v.as_slice()) .unwrap_or_default() .into_iter() .copied() }) } fn test_toposort_check(input: &str, expected: &str) { let sorted = test_toposort(input).unwrap(); let expected = expected.split(" ").collect::>(); assert_eq!(expected, sorted); } #[test] fn test() { test_toposort_check("1 2 3", "1 2 3"); test_toposort_check("1->2 2->3 3", "3 2 1"); test_toposort_check("1 2->1 3->2", "1 2 3"); test_toposort_check("1->2,3 2->3 3", "3 2 1"); } #[test] fn cycle() { assert!(test_toposort("1->1").is_err()); assert!(test_toposort("1->2 2->1").is_err()); } }