biblatex-0.10.0/.cargo_vcs_info.json0000644000000001360000000000100127040ustar { "git": { "sha1": "190576a2de80f156f7c30ec105735820a5a168e4" }, "path_in_vcs": "" }biblatex-0.10.0/.github/workflows/ci.yml000064400000000000000000000013121046102023000162040ustar 00000000000000name: Continuous integration on: [push, pull_request] env: RUSTFLAGS: "-Dwarnings" RUSTDOCFLAGS: "-Dwarnings" jobs: test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@1.81.0 - run: cargo build - run: cargo test checks: name: Check clippy, formatting, and documentation runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@1.81.0 with: components: clippy, rustfmt - uses: Swatinem/rust-cache@v2 - run: cargo clippy --workspace --all-targets --all-features - run: cargo fmt --check --all - run: cargo doc --workspace --no-deps biblatex-0.10.0/.gitignore000064400000000000000000000000521046102023000134610ustar 00000000000000/target Cargo.lock bench/target .DS_Store biblatex-0.10.0/Cargo.toml0000644000000025270000000000100107100ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "biblatex" version = "0.10.0" authors = ["Martin Haug "] build = false autobins = false autoexamples = false autotests = false autobenches = false description = "Parsing, writing, and evaluating BibTeX and BibLaTeX files" readme = "README.md" keywords = [ "bibtex", "biblatex", "biber", "bibliography", "latex", ] categories = [ "database", "parser-implementations", "text-processing", ] license = "MIT OR Apache-2.0" repository = "https://github.com/typst/biblatex" [lib] name = "biblatex" path = "src/lib.rs" [dependencies.numerals] version = "0.1" [dependencies.paste] version = "1" [dependencies.serde] version = "1" features = ["derive"] optional = true [dependencies.strum] version = "0.26" features = ["derive"] [dependencies.unicode-normalization] version = "0.1" [dependencies.unscanny] version = "0.1" biblatex-0.10.0/Cargo.toml.orig000064400000000000000000000012271046102023000143650ustar 00000000000000[package] name = "biblatex" version = "0.10.0" authors = ["Martin Haug "] license = "MIT OR Apache-2.0" description = "Parsing, writing, and evaluating BibTeX and BibLaTeX files" repository = "https://github.com/typst/biblatex" readme = "README.md" categories = ["database", "parser-implementations", "text-processing"] keywords = ["bibtex", "biblatex", "biber", "bibliography", "latex"] edition = "2021" [workspace] members = ["bench"] [dependencies] numerals = "0.1" paste = "1" strum = { version = "0.26", features = ["derive"] } unicode-normalization = "0.1" serde = { version = "1", features = ["derive"], optional = true } unscanny = "0.1" biblatex-0.10.0/LICENSE-APACHE000064400000000000000000000251231046102023000134230ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [2020] [Martin Haug] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. biblatex-0.10.0/LICENSE-MIT000064400000000000000000000017771046102023000131440ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. biblatex-0.10.0/README.md000064400000000000000000000066161046102023000127640ustar 00000000000000# BibLaTeX [![Build status](https://github.com/typst/biblatex/workflows/Continuous%20integration/badge.svg)](https://github.com/typst/biblatex/actions) [![Current crates.io release](https://img.shields.io/crates/v/biblatex)](https://crates.io/crates/biblatex) [![Documentation](https://img.shields.io/badge/docs.rs-biblatex-66c2a5?labelColor=555555&logoColor=white&logo=data:image/svg+xml;base64,PHN2ZyByb2xlPSJpbWciIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgdmlld0JveD0iMCAwIDUxMiA1MTIiPjxwYXRoIGZpbGw9IiNmNWY1ZjUiIGQ9Ik00ODguNiAyNTAuMkwzOTIgMjE0VjEwNS41YzAtMTUtOS4zLTI4LjQtMjMuNC0zMy43bC0xMDAtMzcuNWMtOC4xLTMuMS0xNy4xLTMuMS0yNS4zIDBsLTEwMCAzNy41Yy0xNC4xIDUuMy0yMy40IDE4LjctMjMuNCAzMy43VjIxNGwtOTYuNiAzNi4yQzkuMyAyNTUuNSAwIDI2OC45IDAgMjgzLjlWMzk0YzAgMTMuNiA3LjcgMjYuMSAxOS45IDMyLjJsMTAwIDUwYzEwLjEgNS4xIDIyLjEgNS4xIDMyLjIgMGwxMDMuOS01MiAxMDMuOSA1MmMxMC4xIDUuMSAyMi4xIDUuMSAzMi4yIDBsMTAwLTUwYzEyLjItNi4xIDE5LjktMTguNiAxOS45LTMyLjJWMjgzLjljMC0xNS05LjMtMjguNC0yMy40LTMzLjd6TTM1OCAyMTQuOGwtODUgMzEuOXYtNjguMmw4NS0zN3Y3My4zek0xNTQgMTA0LjFsMTAyLTM4LjIgMTAyIDM4LjJ2LjZsLTEwMiA0MS40LTEwMi00MS40di0uNnptODQgMjkxLjFsLTg1IDQyLjV2LTc5LjFsODUtMzguOHY3NS40em0wLTExMmwtMTAyIDQxLjQtMTAyLTQxLjR2LS42bDEwMi0zOC4yIDEwMiAzOC4ydi42em0yNDAgMTEybC04NSA0Mi41di03OS4xbDg1LTM4Ljh2NzUuNHptMC0xMTJsLTEwMiA0MS40LTEwMi00MS40di0uNmwxMDItMzguMiAxMDIgMzguMnYuNnoiPjwvcGF0aD48L3N2Zz4K)](https://docs.rs/biblatex/) A Rust crate for parsing and writing BibTeX and BibLaTeX files. BibLaTeX can help you to parse `.bib` bibliography files. As opposed to other available crates, this crate attempts to parse the data within the fields into easily usable structs and enums like `Person` and `Date` for downstream consumption. ## Usage Add this to your `Cargo.toml`: ```toml [dependencies] biblatex = "0.10" ``` Parsing a bibliography and getting the author of an item is as simple as: ```rust let src = "@book{tolkien1937, author = {J. R. R. Tolkien}}"; let bibliography = Bibliography::parse(src).unwrap(); let entry = bibliography.get("tolkien1937").unwrap(); let author = entry.author().unwrap(); assert_eq!(author[0].name, "Tolkien"); ``` This library operates on a `Bibliography` struct, which is a collection of _entries_ (the items in your `.bib` file that start with an `@` and are wrapped in curly braces). The entries may hold multiple fields. Entries have getter methods for each of the possible fields in a Bib(La)TeX file which handle possible field aliases, composition and type conversion automatically. Refer to the [WikiBook section on LaTeX bibliography management](https://en.wikibooks.org/wiki/LaTeX/Bibliography_Management) and the [BibLaTeX package manual](http://ctan.ebinger.cc/tex-archive/macros/latex/contrib/biblatex/doc/biblatex.pdf) to learn more about the intended meaning of each of the fields. The generated documentation more specifically describes the selection and behavior of the getters but generally, they follow the convention of being the snake-case name of the corresponding field (such that the getter for `booktitleaddon` is named `book_title_addon`). ## Limitations This library attempts to provide fairly comprehensive coverage of the BibLaTeX spec with which most of the `.bib` files in circulation can be processed. However, the crate currently has some limitations: - There is no explicit support for entry sets, although it is easy to account for them by manually getting the `entryset` field and calling `parse::>()` on it biblatex-0.10.0/rustfmt.toml000064400000000000000000000002111046102023000140670ustar 00000000000000use_small_heuristics = "Max" max_width = 90 chain_width = 70 struct_lit_width = 50 use_field_init_shorthand = true merge_derives = false biblatex-0.10.0/src/chunk.rs000064400000000000000000000370211046102023000137440ustar 00000000000000use crate::resolve::is_escapable; use crate::types::Type; use crate::{Span, Spanned, TypeError}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; /// A vector of chunks. pub type Chunks = Vec>; /// A slice of chunks. pub type ChunksRef<'a> = &'a [Spanned]; /// Represents one part of a field value. #[derive(Debug, Clone, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Chunk { /// Normal values within quotes or single braces subject to /// capitalization formatting. Normal(String), /// Values nested in braces that are to be printed like specified /// in the file. Escapes keywords. /// /// Example: `"Inside {NASA}"` or `{Memes are {gReAT}}`. Verbatim(String), /// Values nested in dollar signs. Math(String), } impl Chunk { /// Get the string contained in the chunk. pub fn get(&self) -> &str { match self { Chunk::Normal(s) => s, Chunk::Verbatim(s) => s, Chunk::Math(s) => s, } } /// Get the string contained in the chunk and whether it is verbatim. fn get_and_verb(&self) -> (&str, bool) { match self { Chunk::Normal(s) => (s, false), Chunk::Verbatim(s) => (s, true), Chunk::Math(s) => (s, false), } } /// Mutably get the string contained in the chunk. pub fn get_mut(&mut self) -> &mut String { match self { Chunk::Normal(s) => s, Chunk::Verbatim(s) => s, Chunk::Math(s) => s, } } /// Get the string contained in the chunk with the characters escaped. /// /// There is no difference for BibTeX and BibLaTeX here, so there is only one function applicable to both. /// /// The `is_verbatim` argument indicates whether this string is intended for /// a verbatim field like `file` with limited escapes. pub fn to_biblatex_string(&self, is_verbatim: bool) -> String { let mut s = String::new(); for c in self.get().chars() { if is_escapable(c, is_verbatim, false) { s.push('\\'); } s.push(c); } s } } /// Additional methods for chunk slices. pub trait ChunksExt { /// Parse the chunks into a type. fn parse(&self) -> Result; /// Format the chunks in sentence case. fn format_sentence(&self) -> String; /// Format the chunks verbatim. fn format_verbatim(&self) -> String; /// Output a span for all chunks in the collection. fn span(&self) -> Span; /// Serialize the chunks into a BibLaTeX string. /// /// There is no difference for BibTeX and BibLaTeX here, so there is only one function applicable to both. fn to_biblatex_string(&self, is_verbatim: bool) -> String; } impl ChunksExt for [Spanned] { fn parse(&self) -> Result { T::from_chunks(self) } fn format_sentence(&self) -> String { let mut out = String::new(); let mut first = true; let mut prev_was_whitespace = false; for val in self { match &val.v { Chunk::Normal(s) => { for mut c in s.chars() { if c == '\n' || c == '\r' { if prev_was_whitespace { continue; } else { c = ' '; } } if first { out.extend(c.to_uppercase()); } else { out.extend(c.to_lowercase()); } first = false; prev_was_whitespace = c.is_whitespace(); } } Chunk::Verbatim(s) => { out.push_str(s); prev_was_whitespace = s.chars().last().map(char::is_whitespace).unwrap_or(false); } Chunk::Math(s) => { out.push('$'); out += s; out.push('$'); } } first = false; } out } fn format_verbatim(&self) -> String { let mut out = String::new(); let mut prev_was_whitespace = false; for val in self { match &val.v { Chunk::Normal(s) => { for mut c in s.chars() { if c == '\n' || c == '\r' { if prev_was_whitespace { continue; } else { c = ' '; } } out.push(c); prev_was_whitespace = c.is_whitespace(); } } Chunk::Verbatim(s) => { out += s; prev_was_whitespace = s.chars().last().map(char::is_whitespace).unwrap_or(false); } Chunk::Math(s) => { out.push('$'); out += s; out.push('$'); } } } out } fn span(&self) -> Span { let start = self.first().map(|c| c.span.start).unwrap_or(0); let end = self.last().map(|c| c.span.end).unwrap_or(start); start..end } fn to_biblatex_string(&self, is_verbatim: bool) -> String { let mut res = String::new(); res.push('{'); let mut extra_brace = false; for chunk in self.iter() { match &chunk.v { Chunk::Verbatim(_) if !extra_brace => { res.push('{'); extra_brace = true; } Chunk::Normal(_) if extra_brace => { res.push('}'); extra_brace = false; } Chunk::Math(_) => { res.push('$'); } _ => {} } res.push_str(&chunk.v.to_biblatex_string(is_verbatim)); if let Chunk::Math(_) = &chunk.v { res.push('$'); } } for _ in 0..if extra_brace { 2 } else { 1 } { res.push('}'); } res } } /// An iterator over the characters in each chunk, indicating whether they are /// verbatim or not. Chunk types other than `Normal` or `Verbatim` are omitted. pub(crate) fn chunk_chars(chunks: ChunksRef) -> impl Iterator + '_ { chunks.iter().flat_map(|chunk| { let (s, verbatim) = chunk.v.get_and_verb(); s.chars().map(move |c| (c, verbatim)) }) } /// Combines the chunks, interlacing with the separator. pub(crate) fn join_chunk_list(chunks: ChunksRef, sep: &str) -> Chunks { let mut res = vec![]; let mut first = true; for chunk in chunks { if first { first = false; } else { res.push(Spanned::new( Chunk::Normal(sep.to_string()), chunk.span.start..chunk.span.start, )); } res.push(chunk.clone()); } res } /// Splits chunk vectors that are a token lists as defined per the /// [BibLaTeX Manual][manual] p. 16 along occurrences of the keyword. /// /// [manual]: http://ctan.ebinger.cc/tex-archive/macros/latex/contrib/biblatex/doc/biblatex.pdf pub(crate) fn split_token_lists(vals: ChunksRef, keyword: &str) -> Vec { let mut out = vec![]; let mut latest = vec![]; for val in vals { if let Chunk::Normal(s) = &val.v { let mut target = s.as_str(); let mut start = val.span.start; while let Some(pos) = target.find(keyword) { let first = target[..pos].trim_end(); latest.push(Spanned::new( Chunk::Normal(first.to_string()), start..start + pos, )); out.push(std::mem::take(&mut latest)); target = target[pos + keyword.len()..].trim_start(); start += pos + keyword.len(); } latest.push(Spanned::new( Chunk::Normal(target.to_string()), start..val.span.end, )); } else { latest.push(val.clone()); } } out.push(latest); out } /// Split the token list based on a keyword surrounded by whitespace /// /// For Normal Chunks, /// - The leading/trailing keyword is not considered as a valid split /// (regardless of whether the keyword is preceded/followed by some /// whitespace). /// - If there are consecutive keywords, the characters between two consecutive /// keywords (whether only whitespace or not) will be considered as a valid /// split. pub(crate) fn split_token_lists_with_kw(vals: ChunksRef, keyword: &str) -> Vec { let mut out = vec![]; let mut latest = vec![]; // Trim the beginning and the end of the parsed field let sanitize_latest = |latest: &mut Vec>| { if latest.is_empty() { return; } let mut diff = 0; if let Chunk::Normal(s) = &mut latest[0].v { diff = s.len() - s.trim_start().len(); s.drain(0..diff); } if !latest[0].is_detached() { latest[0].span.start += diff; } let mut new_len = 0; let end = latest.len() - 1; if let Chunk::Normal(s) = &mut latest[end].v { new_len = s.trim_end().len(); s.truncate(new_len); } if !latest[end].is_detached() { latest[end].span.end = latest[end].span.start + new_len; } }; for (chunk_idx, chunk) in vals.iter().enumerate() { if let Chunk::Normal(s) = &chunk.v { let mut start = chunk.span.start; // If the first chunk is normal -> leading keyword let s = if chunk_idx == 0 { let new_s = s.trim_start(); if !chunk.is_detached() { // Offset the span start by the number of characters trimmed start = chunk.span.start + s.len() - new_s.len(); } new_s } else { s }; // If the last chunk is normal -> trailing keyword let s = if chunk_idx == vals.len() - 1 { s.trim_end() } else { s }; let mut splits = s.split(keyword); // guaranteed to have a value let mut prev = splits.next().unwrap(); let mut cur = String::new(); for split in splits { if prev.ends_with(char::is_whitespace) && split.starts_with(char::is_whitespace) { cur += prev; let end = if chunk.is_detached() { usize::MAX } else { start + cur.len() }; latest.push(Spanned::new( Chunk::Normal(std::mem::take(&mut cur)), start..end, )); sanitize_latest(&mut latest); out.push(std::mem::take(&mut latest)); start = end; prev = split; continue; } cur += prev; cur += keyword; prev = split; } cur += prev; let end = if chunk.is_detached() { usize::MAX } else { start + cur.len() }; latest .push(Spanned::new(Chunk::Normal(std::mem::take(&mut cur)), start..end)); } else { latest.push(chunk.clone()); } } sanitize_latest(&mut latest); out.push(latest); out } /// Splits a chunk vector into two at the first occurrence of the character `c`. /// `omit` controls whether the output will contain `c`. pub(crate) fn split_at_normal_char( src: ChunksRef, c: char, omit: bool, ) -> (Chunks, Chunks) { let mut search_result = None; for (chunk_idx, val) in src.iter().enumerate() { if let Chunk::Normal(s) = &val.v { if let Some(str_idx) = s.find(c) { search_result = Some((chunk_idx, str_idx)); break; } } } if let Some((chunk_idx, str_idx)) = search_result { let (v1, mut v2) = split_values(src, chunk_idx, str_idx); if omit { if let Chunk::Normal(s) = &mut v2[0].v { s.remove(0); *s = s.trim_start().to_string(); } v2[0].span.start = v2[0].span.end - v2[0].v.get().len(); } (v1, v2) } else { (src.to_vec(), vec![]) } } /// Returns two chunk vectors with `src` split at some chunk index and /// the string byte index `str_idx` within that chunk. pub(crate) fn split_values( src: ChunksRef, chunk_idx: usize, str_idx: usize, ) -> (Chunks, Chunks) { let mut src = src.to_vec(); let mut new = vec![]; if chunk_idx >= src.len() { return (src, new); } if chunk_idx + 1 < src.len() { new.extend(src.drain(chunk_idx + 1..)); } let item = src.last_mut().unwrap(); let content = item.v.get_mut(); let (s1, s2) = content.split_at(str_idx); let boundary = item.span.start.saturating_add(str_idx); item.span = item.span.start..boundary; let new_span = boundary..boundary.saturating_add(s2.len()); let s1 = s1.trim_end().to_string(); let s2 = s2.trim_start().to_string(); *content = s1; match &item.v { Chunk::Normal(_) => { new.insert(0, Spanned::new(Chunk::Normal(s2), new_span)); } Chunk::Verbatim(_) => { new.insert(0, Spanned::new(Chunk::Verbatim(s2), new_span)); } Chunk::Math(_) => { new.insert(0, Spanned::new(Chunk::Math(s2), new_span)); } } (src, new) } /// Returns the number of characters in the chunks. pub(crate) fn count_num_char(chunks: ChunksRef, c: char) -> usize { chunks .iter() .map(|val| if let Chunk::Normal(s) = &val.v { s.matches(c).count() } else { 0 }) .sum() } #[cfg(test)] #[allow(non_snake_case)] pub(crate) mod tests { use crate::Span; use super::*; pub fn N(s: &str) -> Chunk { Chunk::Normal(s.to_string()) } pub fn V(s: &str) -> Chunk { Chunk::Verbatim(s.to_string()) } pub fn s(v: T, span: Span) -> Spanned { Spanned::new(v, span) } pub fn d(v: T) -> Spanned { Spanned::detached(v) } #[test] fn test_split() { let vls = &[s(N("split "), 1..7), s(V("exac^tly"), 9..17), s(N("here"), 19..23)]; let ref1 = &[s(N("split "), 1..7), s(V("exac^"), 9..14)]; let ref2 = &[s(V("tly"), 14..17), s(N("here"), 19..23)]; let split = split_values(vls, 1, 5); assert_eq!(split.0, ref1); assert_eq!(split.1, ref2); } #[test] fn test_split_at_normal_char() { let vls = &[ s(N("split "), 1..7), s(V("not, "), 9..14), s(N("but rather, here"), 16..32), ]; let ref1 = &[s(N("split "), 1..7), s(V("not, "), 9..14), s(N("but rather"), 16..26)]; let ref2 = &[s(N("here"), 28..32)]; let split = split_at_normal_char(vls, ',', true); assert_eq!(split.0, ref1); assert_eq!(split.1, ref2); } } biblatex-0.10.0/src/lib.rs000064400000000000000000001270001046102023000133770ustar 00000000000000/*! A crate for parsing Bib(La)TeX files. The main API entrypoint is the [`Bibliography`] struct. # Example Finding out the author of a work. ``` # use biblatex::Bibliography; # fn main() -> std::io::Result<()> { let src = "@book{tolkien1937, author = {J. R. R. Tolkien}}"; let bibliography = Bibliography::parse(src).unwrap(); let entry = bibliography.get("tolkien1937").unwrap(); let author = entry.author().unwrap(); assert_eq!(author[0].name, "Tolkien"); # Ok(()) # } ``` */ #![deny(missing_docs)] mod chunk; mod macros; mod mechanics; mod raw; mod resolve; mod types; pub use chunk::{Chunk, Chunks, ChunksExt, ChunksRef}; pub use mechanics::EntryType; pub use raw::{ Field, Pair, ParseError, ParseErrorKind, RawBibliography, RawChunk, RawEntry, Token, }; pub use types::*; use std::collections::BTreeMap; use std::fmt; use std::fmt::{Debug, Display, Formatter, Write}; use macros::*; use mechanics::{is_verbatim_field, AuthorMode, PagesChapterMode}; use paste::paste; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; /// A fully parsed bibliography. #[derive(Debug, Clone, Default, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Bibliography { /// The bibliography entries. entries: Vec, /// Maps from citation keys to indices in `items`. keys: BTreeMap, } /// A bibliography entry containing chunk fields, which can be parsed into more /// specific types on demand. #[derive(Debug, Clone, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Entry { /// The citation key. pub key: String, /// Denotes the type of bibliography item (e.g., `Article`). pub entry_type: EntryType, /// Maps from field names to their associated chunk vectors. pub fields: BTreeMap, } /// Errors that can occur when retrieving a field of an [`Entry`]. #[derive(Debug, Clone, PartialEq)] pub enum RetrievalError { /// The entry has no field with this name. Missing(String), /// The field contains malformed data. TypeError(TypeError), } impl Display for RetrievalError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::Missing(s) => write!(f, "field {} is missing", s), Self::TypeError(err) => write!(f, "{}", err), } } } impl From for RetrievalError { fn from(err: TypeError) -> Self { Self::TypeError(err) } } fn convert_result(err: Result) -> Result, TypeError> { match err { Ok(val) => Ok(Some(val)), Err(RetrievalError::Missing(_)) => Ok(None), Err(RetrievalError::TypeError(err)) => Err(err), } } impl Bibliography { /// Create a new, empty bibliography. pub fn new() -> Self { Self::default() } /// Parse a bibliography from a source string. pub fn parse(src: &str) -> Result { Self::from_raw(RawBibliography::parse(src)?) } /// Construct a bibliography from a raw bibliography, with the `xdata` and /// `crossref` links resolved. pub fn from_raw(raw: RawBibliography) -> Result { let mut res = Self::new(); let abbr = &raw.abbreviations; for entry in raw.entries { // Check that the key is not repeated if res.get(entry.v.key.v).is_some() { return Err(ParseError::new( entry.span, ParseErrorKind::DuplicateKey(entry.v.key.v.to_string()), )); } let mut fields: BTreeMap>> = BTreeMap::new(); for spanned_field in entry.v.fields.into_iter() { let field_key = spanned_field.key.v.to_string().to_ascii_lowercase(); let parsed = resolve::parse_field(&field_key, &spanned_field.value.v, abbr)?; fields.insert(field_key, parsed); } res.insert(Entry { key: entry.v.key.v.to_string(), entry_type: EntryType::new(entry.v.kind.v), fields, }); } let mut entries = res.entries.clone(); for entry in &mut entries { entry.resolve_crossrefs(&res).map_err(|e| { ParseError::new(e.span, ParseErrorKind::ResolutionError(e.kind)) })?; } res.entries = entries; Ok(res) } /// The number of bibliography entries. pub fn len(&self) -> usize { self.entries.len() } /// Whether the bibliography is empty. pub fn is_empty(&self) -> bool { self.entries.is_empty() } /// Returns the entry with the given cite key. pub fn get(&self, key: &str) -> Option<&Entry> { let index = *self.keys.get(key)?; self.entries.get(index) } /// Returns a mutable reference to the entry with the given cite key. pub fn get_mut(&mut self, key: &str) -> Option<&mut Entry> { let index = *self.keys.get(key)?; self.entries.get_mut(index) } /// Insert an entry into the bibliography. /// /// If an entry with the same cite key is already present, the entry is /// updated and the old entry is returned. pub fn insert(&mut self, entry: Entry) -> Option { if let Some(prev) = self.get_mut(&entry.key) { Some(std::mem::replace(prev, entry)) } else { let index = self.entries.len(); self.keys.insert(entry.key.clone(), index); if let Some(ids) = convert_result(entry.get_as::>("ids")).unwrap() { for alias in ids { self.keys.insert(alias, index); } } self.entries.push(entry); None } } /// Remove the entry with the given cite key. pub fn remove(&mut self, key: &str) -> Option { let index = *self.keys.get(key)?; let entry = self.entries.remove(index); // Remove equal indices and update later indices. self.keys.retain(|_, v| { if *v > index { *v -= 1; true } else { *v != index } }); Some(entry) } /// Add an alias for a cite key. /// /// Does nothing if no entry with the given cite key exists. pub fn alias(&mut self, key: &str, alias: impl Into) { if let Some(&index) = self.keys.get(key) { self.keys.insert(alias.into(), index); } } /// An iterator over the bibliography's entries. pub fn iter(&self) -> std::slice::Iter { self.entries.iter() } /// A mutable iterator over the bibliography's entries. pub fn iter_mut(&mut self) -> std::slice::IterMut { self.entries.iter_mut() } /// An iterator over the bibliography's entry keys. pub fn keys(&self) -> impl Iterator { self.entries.iter().map(|entry| &*entry.key) } /// Consume this struct and return a vector of the bibliography's entries. pub fn into_vec(self) -> Vec { self.entries } /// Write the entry into a writer in the BibLaTeX format. pub fn write_biblatex(&self, mut sink: impl Write) -> fmt::Result { let mut first = true; for entry in &self.entries { if !first { writeln!(sink)?; } writeln!(sink, "{}", entry.to_biblatex_string())?; first = false; } Ok(()) } /// Serialize this bibliography into a BibLaTeX string. pub fn to_biblatex_string(&self) -> String { let mut biblatex = String::new(); self.write_biblatex(&mut biblatex).unwrap(); biblatex } /// Write the entry into a writer in the BibTeX format. pub fn write_bibtex(&self, mut sink: impl Write) -> fmt::Result { let mut first = true; for entry in &self.entries { if !first { writeln!(sink)?; } writeln!(sink, "{}", entry.to_bibtex_string().map_err(|_| fmt::Error)?)?; first = false; } Ok(()) } /// Serialize this bibliography into a BibTeX string. pub fn to_bibtex_string(&self) -> String { let mut bibtex = String::new(); self.write_bibtex(&mut bibtex).unwrap(); bibtex } } impl IntoIterator for Bibliography { type Item = Entry; type IntoIter = std::vec::IntoIter; fn into_iter(self) -> Self::IntoIter { self.entries.into_iter() } } impl Entry { /// Construct new, empty entry. pub fn new(key: String, entry_type: EntryType) -> Self { Self { key, entry_type, fields: BTreeMap::new() } } /// Get the chunk slice of a field. /// /// The field key must be lowercase. pub fn get(&self, key: &str) -> Option { self.fields.get(key).map(AsRef::as_ref) } /// Parse the value of a field into a specific type. /// /// The field key must be lowercase. pub fn get_as(&self, key: &str) -> Result { self.get(key) .ok_or_else(|| RetrievalError::Missing(key.to_string()))? .parse::() .map_err(Into::into) } /// Set the chunk slice for a field. /// /// The field key is lowercase before insertion. pub fn set(&mut self, key: &str, chunks: Chunks) { self.fields.insert(key.to_lowercase(), chunks); } /// Set the value of a field as a specific type. /// /// The field key is lowercase before insertion. pub fn set_as(&mut self, key: &str, value: &T) { self.set(key, value.to_chunks()); } /// Remove a field from the entry. pub fn remove(&mut self, key: &str) -> Option { self.fields.remove(key) } /// The parents of an entry in a semantic sense (`crossref` and `xref`). pub fn parents(&self) -> Result, TypeError> { let mut parents = vec![]; if let Some(crossref) = convert_result(self.get_as::("crossref"))? { parents.push(crossref); } if let Some(xrefs) = convert_result(self.get_as::>("xref"))? { parents.extend(xrefs); } Ok(parents) } /// Verify if the entry has the appropriate fields for its [`EntryType`]. pub fn verify(&self) -> Report { let reqs = self.entry_type.requirements(); let mut missing = vec![]; let mut superfluous = vec![]; for field in reqs.required { match field { "journaltitle" => { if self .get_non_empty(field) .or_else(|| self.get_non_empty("journal")) .is_none() { missing.push(field); } } "location" => { if self .get_non_empty(field) .or_else(|| self.get_non_empty("address")) .is_none() { missing.push(field); } } "school" if self.entry_type == EntryType::Thesis || self.entry_type == EntryType::MastersThesis || self.entry_type == EntryType::PhdThesis => { if self .get_non_empty(field) .or_else(|| self.get_non_empty("institution")) .is_none() { missing.push(field); } } _ => { if self.get_non_empty(field).is_none() { missing.push(field); } } } } for field in reqs.forbidden { if self.get_non_empty(field).is_some() { superfluous.push(field); } } match reqs.author_eds_field { AuthorMode::OneRequired => { if self.author().is_err() && self.editors().unwrap_or_default().is_empty() { missing.push("author"); } } AuthorMode::BothRequired => { if self.editors().unwrap_or_default().is_empty() { missing.push("editor"); } if self.author().is_err() { missing.push("author"); } } AuthorMode::AuthorRequired | AuthorMode::AuthorRequiredEditorOptional => { if self.author().is_err() { missing.push("author"); } } AuthorMode::EditorRequiredAuthorForbidden => { if self.editors().unwrap_or_default().is_empty() { missing.push("editor"); } if self.author().is_ok() { superfluous.push("author"); } } _ => {} } match reqs.page_chapter_field { PagesChapterMode::OneRequired => { if self.pages().is_err() && self.chapter().is_err() { missing.push("pages"); } } PagesChapterMode::BothForbidden => { if self.pages().is_ok() { superfluous.push("pages"); } if self.chapter().is_ok() { superfluous.push("chapter"); } } PagesChapterMode::PagesRequired => { if self.pages().is_err() { missing.push("pages"); } } _ => {} } let mut malformed = vec![]; for (key, chunks) in &self.fields { let error = match key.as_str() { "edition" => chunks.parse::>().err(), "organization" => chunks.parse::>().err(), "pages" => chunks.parse::>>().err(), "publisher" => chunks.parse::>().err(), "volume" => chunks.parse::().err(), "bookpagination" => chunks.parse::().err(), "pagination" => chunks.parse::().err(), "volumes" => chunks.parse::().err(), "gender" => chunks.parse::().err(), "editortype" => chunks.parse::().err(), "editoratype" => chunks.parse::().err(), "editorbtype" => chunks.parse::().err(), "editorctype" => chunks.parse::().err(), "xref" => chunks.parse::>().err(), "xdata" => chunks.parse::>().err(), "ids" => chunks.parse::>().err(), _ => continue, }; if let Some(err) = error { malformed.push((key.clone(), err)) } } for (key, err) in [ ("date", self.date().err()), ("urldate", self.url_date().err()), ("origdate", self.orig_date().err()), ("eventdate", self.event_date().err()), ] { if let Some(RetrievalError::TypeError(t)) = err { malformed.push((key.to_string(), t)); } } if reqs.needs_date { if let Err(RetrievalError::Missing(_)) = self.date() { missing.push("year"); } } Report { missing, superfluous, malformed } } /// Serialize this entry into a BibLaTeX string. pub fn to_biblatex_string(&self) -> String { let mut biblatex = String::new(); let ty = self.entry_type.to_biblatex(); writeln!(biblatex, "@{}{{{},", ty, self.key).unwrap(); for (key, value) in &self.fields { let key = match key.as_ref() { "journal" => "journaltitle", "address" => "location", "school" => "institution", k => k, }; writeln!( biblatex, "{} = {},", key, value.to_biblatex_string(is_verbatim_field(key)) ) .unwrap(); } biblatex.push('}'); biblatex } /// Serialize this entry into a BibTeX string. /// /// This function can return an error if there is a malformed date field. pub fn to_bibtex_string(&self) -> Result { let mut bibtex = String::new(); let ty = self.entry_type.to_bibtex(); let thesis = matches!(ty, EntryType::PhdThesis | EntryType::MastersThesis); writeln!(bibtex, "@{}{{{},", ty, self.key).unwrap(); for (key, value) in &self.fields { if key == "date" { if let Some(date) = convert_result(self.date())? { if let PermissiveType::Typed(date) = date { for (key, value) in date.to_fieldset() { let v = [Spanned::zero(Chunk::Normal(value))] .to_biblatex_string(false); writeln!(bibtex, "{} = {},", key, v).unwrap(); } continue; } } else { continue; } } let key = match key.as_ref() { "journaltitle" => "journal", "location" => "address", "institution" if thesis => "school", k => k, }; writeln!( bibtex, "{} = {},", key, value.to_biblatex_string(is_verbatim_field(key)) ) .unwrap(); } bibtex.push('}'); Ok(bibtex) } /// Get an entry but return None for empty chunk slices. fn get_non_empty(&self, key: &str) -> Option { let entry = self.get(key)?; if !entry.is_empty() { Some(entry) } else { None } } /// Resolves all data dependencies defined by `crossref` and `xdata` fields. fn resolve_crossrefs(&mut self, bib: &Bibliography) -> Result<(), TypeError> { let mut refs = vec![]; if let Some(crossref) = convert_result(self.get_as::("crossref"))? { refs.extend(bib.get(&crossref).cloned()); } if let Some(keys) = convert_result(self.get_as::>("xdata"))? { for key in keys { refs.extend(bib.get(&key).cloned()); } } for mut crossref in refs { crossref.resolve_crossrefs(bib)?; self.resolve_single_crossref(crossref)?; } self.remove("xdata"); Ok(()) } /// Resolve data dependencies using another entry. fn resolve_single_crossref(&mut self, crossref: Entry) -> Result<(), TypeError> { let req = self.entry_type.requirements(); let mut relevant = req.required; relevant.extend(req.optional); relevant.extend(req.page_chapter_field.possible()); relevant.extend(req.author_eds_field.possible()); if self.entry_type == EntryType::XData { for f in crossref.fields.keys() { relevant.push(f); } } for f in relevant { if self.get(f).is_some() { continue; } match f { "journaltitle" | "journalsubtitle" if crossref.entry_type == EntryType::Periodical => { let key = if f.contains('s') { "subtitle" } else { "title" }; if let Some(item) = crossref.get(key) { self.set(f, item.to_vec()) } } "booktitle" | "booksubtitle" | "booktitleaddon" if crossref.entry_type.is_collection() => { let key = if f.contains('s') { "subtitle" } else if f.contains('a') { "titleaddon" } else { "title" }; if let Some(item) = crossref.get(key) { self.set(f, item.to_vec()) } } "maintitle" | "mainsubtitle" | "maintitleaddon" if crossref.entry_type.is_multi_volume() => { let key = if f.contains('s') { "subtitle" } else if f.contains('a') { "titleaddon" } else { "title" }; if let Some(item) = crossref.get(key) { self.set(f, item.to_vec()) } } "address" => { if let Some(item) = crossref.get(f).or_else(|| crossref.get("location")) { self.set(f, item.to_vec()) } } "institution" => { if let Some(item) = crossref.get(f).or_else(|| crossref.get("school")) { self.set(f, item.to_vec()) } } "school" => { if let Some(item) = crossref.get(f).or_else(|| crossref.get("institution")) { self.set(f, item.to_vec()) } } "journaltitle" => { if let Some(item) = crossref.get(f).or_else(|| crossref.get("journal")) { self.set(f, item.to_vec()) } } "title" | "addendum" | "note" => {} _ => { if let Some(item) = crossref.get(f) { self.set(f, item.to_vec()) } } } } if self.entry_type == EntryType::XData { return Ok(()); } if req.needs_date { if let Some(date) = convert_result(crossref.date())? { self.set_date(date); } } Ok(()) } } /// A report of the validity of an `Entry`. Can be obtained by calling [`Entry::verify`]. pub struct Report { /// These fields were missing, although they are required for the entry type. pub missing: Vec<&'static str>, /// These fields were present but are not allowed for the entry type. pub superfluous: Vec<&'static str>, /// These fields were present but contained malformed data. pub malformed: Vec<(String, TypeError)>, } impl Report { /// Whether the report is empty and contains no errors. pub fn is_ok(&self) -> bool { self.missing.is_empty() && self.superfluous.is_empty() && self.malformed.is_empty() } } impl Entry { // BibTeX fields. fields! { // Fields without a specified return type simply return `ChunksRef`. author: "author" => Vec, book_title: "booktitle", chapter: "chapter", edition: "edition" => PermissiveType, how_published: "howpublished", note: "note", number: "number", organization: "organization" => Vec, pages: "pages" => PermissiveType>>, publisher: "publisher" => Vec, series: "series", title: "title", type_: "type" => String, volume: "volume" => PermissiveType, } alias_fields! { address: "address" | "location", location: "location" | "address", annotation: "annotation" | "annote", eprint_type: "eprinttype" | "archiveprefix", journal: "journal" | "journaltitle", journal_title: "journaltitle" | "journal", sort_key: "key" | "sortkey" => String, file: "file" | "pdf" => String, school: "school" | "institution", institution: "institution" | "school", } date_fields! { date: "", event_date: "event", orig_date: "orig", url_date: "url", } /// Get the `editor` and `editora` through `editorc` fields and their /// respective `editortype` annotation fields, returning a vector with zero /// to four entries, one for each editorial role. /// /// The default `EditorType::Editor` is assumed if the type field is empty. pub fn editors(&self) -> Result, EditorType)>, TypeError> { let mut editors = vec![]; let mut parse = |name_field: &str, editor_field: &str| -> Result<(), TypeError> { if let Some(persons) = convert_result(self.get_as::>(name_field))? { let editor_type = self .get(editor_field) .map(|chunks| chunks.parse::()) .transpose()? .unwrap_or(EditorType::Editor); editors.push((persons, editor_type)); } Ok(()) }; parse("editor", "editortype")?; parse("editora", "editoratype")?; parse("editorb", "editorbtype")?; parse("editorc", "editorctype")?; Ok(editors) } // BibLaTeX supplemental fields. fields! { abstract_: "abstract", addendum: "addendum", afterword: "afterword" => Vec, annotator: "annotator" => Vec, author_type: "authortype" => String, book_author: "bookauthor" => Vec, book_pagination: "bookpagination" => Pagination, book_subtitle: "booksubtitle", book_title_addon: "booktitleaddon", commentator: "commentator" => Vec, doi: "doi" => String, eid: "eid", entry_subtype: "entrysubtype", eprint: "eprint" => String, eprint_class: "eprintclass", eventtitle: "eventtitle", eventtitle_addon: "eventtitleaddon", foreword: "foreword" => Vec, holder: "holder" => Vec, index_title: "indextitle", introduction: "introduction" => Vec, isan: "isan", isbn: "isbn", ismn: "ismn", isrn: "isrn", issn: "issn", issue: "issue", issue_subtitle: "issuesubtitle", issue_title: "issuetitle", issue_title_addon: "issuetitleaddon", iswc: "iswc", journal_subtitle: "journalsubtitle", journal_title_addon: "journaltitleaddon", keywords: "keywords", label: "label", language: "language" => String, library: "library", main_subtitle: "mainsubtitle", main_title: "maintitle", main_title_addon: "maintitleaddon", name_addon: "nameaddon", options: "options", orig_language: "origlanguage" => String, orig_location: "origlocation", page_total: "pagetotal", pagination: "pagination" => Pagination, part: "part", pubstate: "pubstate", reprint_title: "reprinttitle", short_author: "shortauthor" => Vec, short_editor: "shorteditor" => Vec, shorthand: "shorthand", shorthand_intro: "shorthandintro", short_journal: "shortjournal", short_series: "shortseries", short_title: "shorttitle", subtitle: "subtitle", title_addon: "titleaddon", translator: "translator" => Vec, url: "url" => String, venue: "venue", version: "version", volumes: "volumes" => i64, gender: "gender" => Gender, } } type Span = std::ops::Range; /// A value with the span it corresponds to in the source code. /// /// Spans can be _detached,_ this means that they deliberately do not point /// into the source code. Such spans are created when manually setting fields /// with an empty bibliography or after parsing a file. Detached spans do not /// indicate valid index ranges in the source files and must not be used as /// such. A spanned item can be checked for detachment by calling /// [`Self::is_detached`]. #[derive(Clone, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Spanned { /// The spanned value. pub v: T, /// The location in source code of the value. pub span: Span, } impl Spanned { /// Create a new instance from a value and its span. pub fn new(v: T, span: Span) -> Self { Self { v, span } } /// Create a new instance with a value and a zero-length span. pub fn zero(v: T) -> Self { Self { v, span: 0..0 } } /// Create a new instance with a detached span. pub fn detached(v: T) -> Self { Self { v, span: usize::MAX..usize::MAX } } /// Whether the span is detached. pub fn is_detached(&self) -> bool { self.span.start == usize::MAX } /// Convert from `&Spanned` to `Spanned<&T>` pub fn as_ref(&self) -> Spanned<&T> { Spanned { v: &self.v, span: self.span.clone() } } /// Map the value using a function keeping the span. pub fn map(self, f: F) -> Spanned where F: FnOnce(T) -> U, { Spanned { v: f(self.v), span: self.span } } } impl Debug for Spanned { fn fmt(&self, f: &mut Formatter) -> fmt::Result { self.v.fmt(f)?; if f.alternate() { f.write_str(" <")?; self.span.fmt(f)?; f.write_str(">")?; } Ok(()) } } #[cfg(test)] mod tests { use std::fs; use super::*; use crate::raw::Token; #[test] fn test_correct_bib() { let contents = fs::read_to_string("tests/gral.bib").unwrap(); let bibliography = Bibliography::parse(&contents).unwrap(); assert_eq!(bibliography.entries.len(), 83) } #[test] fn test_repeated_key() { let contents = fs::read_to_string("tests/gral_rep_key.bib").unwrap(); let bibliography = Bibliography::parse(&contents); match bibliography { Ok(_) => panic!("Should return Err"), Err(s) => { assert_eq!(s.kind, ParseErrorKind::DuplicateKey("ishihara2012".into())); } }; } #[test] fn test_parse_incorrect_result() { let contents = fs::read_to_string("tests/incorrect_syntax.bib").unwrap(); let bibliography = Bibliography::parse(&contents); match bibliography { Ok(_) => { panic!("Should return Err") } Err(s) => { assert_eq!( s, ParseError::new(369..369, ParseErrorKind::Expected(Token::Equals)) ); } }; } #[test] fn test_parse_incorrect_types() { let contents = fs::read_to_string("tests/incorrect_data.bib").unwrap(); let bibliography = Bibliography::parse(&contents).unwrap(); let rashid = bibliography.get("rashid2016").unwrap(); match rashid.pagination() { Err(RetrievalError::TypeError(s)) => { assert_eq!(s, TypeError::new(352..359, TypeErrorKind::UnknownPagination)); } _ => { panic!() } }; } #[test] fn test_keys() { let contents = fs::read_to_string("tests/editortypes.bib").unwrap(); let bibliography = Bibliography::parse(&contents).unwrap(); assert_eq!( bibliography.keys().collect::>(), &["acerolaThisDifferenceGaussians2022", "mozart_KV183_1773", "Smith2018"] ); } #[test] fn test_gral_paper() { dump_debug("tests/gral.bib"); } #[test] fn test_ds_report() { dump_debug("tests/ds.bib"); } #[test] fn test_libra_paper() { dump_author_title("tests/libra.bib"); } #[test] fn test_rass_report() { dump_author_title("tests/rass.bib"); } #[test] fn test_polar_report() { dump_author_title("tests/polaritons.bib"); } #[test] fn test_extended_name_format() { dump_author_title("tests/extended_name_format.bib"); } #[test] fn test_alias() { let contents = fs::read_to_string("tests/cross.bib").unwrap(); let mut bibliography = Bibliography::parse(&contents).unwrap(); assert_eq!(bibliography.get("issue201"), bibliography.get("github")); bibliography.alias("issue201", "crap"); assert_eq!(bibliography.get("crap"), bibliography.get("unstable")); bibliography.remove("crap").unwrap(); let entry = bibliography.get("cannonfodder").unwrap(); assert_eq!(entry.key, "cannonfodder"); assert_eq!(entry.entry_type, EntryType::Misc); } #[test] fn test_bibtex_conversion() { let contents = fs::read_to_string("tests/cross.bib").unwrap(); let mut bibliography = Bibliography::parse(&contents).unwrap(); let biblatex = bibliography.get_mut("haug2019").unwrap().to_biblatex_string(); assert!(biblatex.contains("institution = {Technische Universität Berlin},")); let bibtex = bibliography.get_mut("haug2019").unwrap().to_bibtex_string().unwrap(); assert!(bibtex.contains("school = {Technische Universität Berlin},")); assert!(bibtex.contains("year = {2019},")); assert!(bibtex.contains("month = {10},")); assert!(!bibtex.contains("institution")); assert!(!bibtex.contains("date")); } #[test] fn test_verify() { let contents = fs::read_to_string("tests/cross.bib").unwrap(); let mut bibliography = Bibliography::parse(&contents).unwrap(); assert!(bibliography.get_mut("haug2019").unwrap().verify().is_ok()); assert!(bibliography.get_mut("cannonfodder").unwrap().verify().is_ok()); let ill = bibliography.get("ill-defined").unwrap(); let report = ill.verify(); assert_eq!(report.missing.len(), 3); assert_eq!(report.superfluous.len(), 3); assert_eq!(report.malformed.len(), 1); assert!(report.missing.contains(&"title")); assert!(report.missing.contains(&"year")); assert!(report.missing.contains(&"editor")); assert!(report.superfluous.contains(&"maintitle")); assert!(report.superfluous.contains(&"author")); assert!(report.superfluous.contains(&"chapter")); assert_eq!(report.malformed[0].0.as_str(), "gender"); } #[test] fn test_crossref() { let contents = fs::read_to_string("tests/cross.bib").unwrap(); let bibliography = Bibliography::parse(&contents).unwrap(); let e = bibliography.get("macmillan").unwrap(); assert_eq!(e.publisher().unwrap()[0].format_verbatim(), "Macmillan"); assert_eq!(e.location().unwrap().format_verbatim(), "New York and London"); let book = bibliography.get("recursive").unwrap(); assert_eq!(book.publisher().unwrap()[0].format_verbatim(), "Macmillan"); assert_eq!(book.location().unwrap().format_verbatim(), "New York and London"); assert_eq!( book.title().unwrap().format_verbatim(), "Recursive shennenigans and other important stuff" ); assert_eq!( bibliography.get("arrgh").unwrap().parents().unwrap(), vec!["polecon".to_string()] ); let arrgh = bibliography.get("arrgh").unwrap(); assert_eq!(arrgh.entry_type, EntryType::Article); assert_eq!(arrgh.volume().unwrap(), PermissiveType::Typed(115)); assert_eq!(arrgh.editors().unwrap()[0].0[0].name, "Uhlig"); assert_eq!(arrgh.number().unwrap().format_verbatim(), "6"); assert_eq!( arrgh.journal().unwrap().format_verbatim(), "Journal of Political Economy" ); assert_eq!( arrgh.title().unwrap().format_verbatim(), "An‐arrgh‐chy: The Law and Economics of Pirate Organization" ); } fn dump_debug(file: &str) { let contents = fs::read_to_string(file).unwrap(); let bibliography = Bibliography::parse(&contents).unwrap(); println!("{:#?}", bibliography); } fn dump_author_title(file: &str) { let contents = fs::read_to_string(file).unwrap(); let bibliography = Bibliography::parse(&contents).unwrap(); println!("{}", bibliography.to_biblatex_string()); for x in bibliography { let authors = x.author().unwrap_or_default(); for a in authors { print!("{}, ", a); } println!("\"{}\".", x.title().unwrap().format_sentence()); } } #[test] fn linebreak_field() { let contents = r#"@book{key, title = {Hello Martin}}"#; let bibliography = Bibliography::parse(contents).unwrap(); let entry = bibliography.get("key").unwrap(); assert_eq!(entry.title().unwrap().format_verbatim(), "Hello Martin"); } #[test] fn test_verbatim_fields() { let contents = fs::read_to_string("tests/libra.bib").unwrap(); let bibliography = Bibliography::parse(&contents).unwrap(); // Import an entry/field with escaped colons let e = bibliography.get("dierksmeierJustHODLMoral2018").unwrap(); assert_eq!(e.doi().unwrap(), "10.1007/s41463-018-0036-z"); assert_eq!( e.file().unwrap(), "C:\\Users\\mhaug\\Zotero\\storage\\DTPR7TES\\Dierksmeier - 2018 - Just HODL On the Moral Claims of Bitcoin and Ripp.pdf" ); // Import an entry/field with unescaped colons let e = bibliography.get("LibraAssociationIndependent").unwrap(); assert_eq!(e.url().unwrap(), "https://libra.org/association/"); // Test export of entry (not escaping colons) let e = bibliography.get("finextraFedGovernorChallenges2019").unwrap(); assert_eq!( e.to_biblatex_string(), "@online{finextraFedGovernorChallenges2019,\nauthor = {FinExtra},\ndate = {2019-12-18},\nfile = {C:\\\\Users\\\\mhaug\\\\Zotero\\\\storage\\\\VY9LAKFE\\\\fed-governor-challenges-facebooks-libra-project.html},\ntitle = {Fed {Governor} Challenges {Facebook}'s {Libra} Project},\nurl = {https://www.finextra.com/newsarticle/34986/fed-governor-challenges-facebooks-libra-project},\nurldate = {2020-08-22},\n}" ); // Test URLs with math and backslashes let e = bibliography.get("weirdUrl2023").unwrap(); assert_eq!(e.url().unwrap(), r#"example.com?A=$B\%\{}"#); assert_eq!(e.doi().unwrap(), r#"example.com?A=$B\%\{}"#); } #[test] fn test_synthesized_entry() { let mut e = Entry::new("Test123".to_owned(), EntryType::Article); let brian = vec![Person { name: "Monroe".to_string(), given_name: "Brian Albert".to_string(), prefix: "".to_string(), suffix: "".to_string(), }]; e.set_author(brian.clone()); assert_eq!(Ok(brian), e.author()); } #[test] fn test_case_sensitivity() { let contents = fs::read_to_string("tests/case.bib").unwrap(); let bibliography = Bibliography::parse(&contents).unwrap(); let entry = bibliography.get("biblatex2023").unwrap(); let author = entry.author(); match author { Ok(a) => assert_eq!(a[0].name, "Kime"), Err(RetrievalError::Missing(_)) => { panic!("Tags should be case insensitive."); } _ => panic!(), } } #[test] fn test_whitespace_collapse() { let raw = r#"@article{aksin, title = {Effect of immobilization on catalytic characteristics of saturated {Pd-N}-heterocyclic carbenes in {Mizoroki-Heck} reactions}, }"#; let bibliography = Bibliography::parse(raw).unwrap(); let entry = bibliography.get("aksin").unwrap(); assert_eq!( entry.title().unwrap().first().map(|s| s.as_ref().v), Some(Chunk::Normal( "Effect of immobilization on catalytic characteristics of saturated " .to_string() )) .as_ref() ); } #[test] fn test_empty_date_fields() { let raw = r#"@article{test, year = 2000, day = {}, month = {}, }"#; let bibliography = Bibliography::parse(raw).unwrap(); assert_eq!( bibliography.get("test").unwrap().date(), Err(TypeError::new(74..74, TypeErrorKind::MissingNumber).into()) ); } #[test] #[allow(clippy::single_range_in_vec_init)] fn test_page_ranges() { let raw = r#"@article{test, pages = {1---2}, } @article{test1, pages = {2--3}, } @article{test2, pages = {1}, }"#; let bibliography = Bibliography::parse(raw).unwrap(); assert_eq!( bibliography.get("test").unwrap().pages(), Ok(PermissiveType::Typed(vec![1..2])) ); assert_eq!( bibliography.get("test1").unwrap().pages(), Ok(PermissiveType::Typed(vec![2..3])) ); assert_eq!( bibliography.get("test2").unwrap().pages(), Ok(PermissiveType::Typed(vec![1..1])) ); } #[test] fn test_editor_types() { let contents = fs::read_to_string("tests/editortypes.bib").unwrap(); let bibliography = Bibliography::parse(&contents).unwrap(); let video = bibliography.get("acerolaThisDifferenceGaussians2022").unwrap(); assert_eq!( video.editors(), Ok(vec![( vec![Person { name: "Acerola".into(), given_name: "".into(), prefix: "".into(), suffix: "".into() }], EditorType::Director )]) ); let music = bibliography.get("mozart_KV183_1773").unwrap(); assert_eq!( music.editors(), Ok(vec![( vec![Person { name: "Mozart".into(), given_name: "Wolfgang Amadeus".into(), prefix: "".into(), suffix: "".into() }], EditorType::Unknown("pianist".into()), )]) ); let audio = bibliography.get("Smith2018").unwrap(); assert_eq!( audio.editors(), Ok(vec![ ( vec![Person { name: "Smith".into(), given_name: "Stacey Vanek".into(), prefix: "".into(), suffix: "".into() }], EditorType::Unknown("host".into()), ), ( vec![Person { name: "Plotkin".into(), given_name: "Stanley".into(), prefix: "".into(), suffix: "".into() }], EditorType::Unknown("participant".into()), ) ]) ); } } biblatex-0.10.0/src/macros.rs000064400000000000000000000066361046102023000141300ustar 00000000000000macro_rules! fields { ($($name:ident: $field:expr $(=> $ret:ty)?),* $(,)*) => { $(paste! { #[doc = "Get the `" $field "` field."] pub fn $name(&self) -> Result { self .get($field) .ok_or_else(|| RetrievalError::Missing($field.to_string())) $(?.parse::<$ret>().map_err(Into::into))? } fields!(@set $name => $field, $($ret)?); })* }; (@ret) => {ChunksRef}; (@ret $ret:ty) => {$ret}; (@set $name:ident => $field:literal, ) => { paste! { #[doc = "Set the value of the `" $field "` field."] pub fn [](&mut self, item: Chunks) { self.set($field, item); } } }; (@set $name:ident => $field:literal, $ty:ty) => { paste! { #[doc = "Set the value of the `" $field "` field."] pub fn [](&mut self, item: $ty) { self.set($field, item.to_chunks()); } } }; } pub(crate) use fields; macro_rules! alias_fields { ($($name:ident: $field:literal | $alias:literal $(=> $ret:ty)?),* $(,)*) => { $(paste! { #[doc = "Get the `" $field "` field, falling back on `" $alias "` if `" $field "` is empty."] pub fn $name(&self) -> Result { self.get($field) .or_else(|| self.get($alias)) .ok_or_else(|| RetrievalError::Missing($field.to_string())) $(?.parse::<$ret>().map_err(Into::into))? } fields!(@set $name => $field, $($ret)?); })* }; } pub(crate) use alias_fields; macro_rules! date_fields { ($($name:ident: $prefix:literal),* $(,)*) => { $(paste! { #[doc = "Get the `" $prefix "date` field, falling back on the `" $prefix "year`, `" $prefix "month`, and `" $prefix "day` fields if it is not present."] pub fn $name(&self) -> Result, RetrievalError> { if let Some(chunks) = self.get(concat!($prefix, "date")) { chunks.parse::() .map(|d| PermissiveType::Typed(d)) .or_else(|_| Ok::<_, RetrievalError>(PermissiveType::Chunks(chunks.to_vec()))) } else { Ok(PermissiveType::Typed(Date::parse_three_fields( self.get(concat!($prefix, "year")).ok_or_else(|| RetrievalError::Missing("year".to_string()))?, self.get(concat!($prefix, "month")), self.get(concat!($prefix, "day")), )?)) }.map_err(Into::into) } #[doc = "Set the value of the `" $prefix "date` field, removing the `" $prefix "year`, `" $prefix "month`, and `" $prefix "day` fields if present."] pub fn [](&mut self, item: PermissiveType) { self.set(concat!($prefix, "date"), item.to_chunks()); self.remove(concat!($prefix, "year")); self.remove(concat!($prefix, "month")); self.remove(concat!($prefix, "day")); } })* }; } pub(crate) use date_fields; biblatex-0.10.0/src/mechanics.rs000064400000000000000000000572231046102023000145740ustar 00000000000000//! Defines the different bibliographical items and which fields should be //! attached to each of them. use std::str::FromStr; use strum::{Display, EnumString}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; /// Describes the type of a bibliographical entry. /// /// Each type comes with a different set of required and allowable fields that /// are taken into consideration in [`Entry::verify`](crate::Entry::verify). #[derive(Debug, Clone, Eq, PartialEq, Display, EnumString)] #[allow(missing_docs)] #[strum(serialize_all = "lowercase")] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum EntryType { // BibTeX Article, Book, Booklet, InBook, InCollection, InProceedings, Manual, MastersThesis, PhdThesis, Misc, Proceedings, TechReport, Unpublished, // BibLaTeX MvBook, BookInBook, SuppBook, Periodical, SuppPeriodical, Collection, MvCollection, SuppCollection, Reference, MvReference, InReference, MvProceedings, Report, Patent, Thesis, Online, Software, Dataset, Set, XData, Unknown(String), } /// Describes the optionality mode of the `author` and `editor` fields. #[derive(Clone, Debug)] pub enum AuthorMode { /// Neither of the fields are required to be set. NoneRequired, /// At least one of the fields must be present. OneRequired, /// Both fields must be set. BothRequired, /// The `author` field must be present. AuthorRequired, /// The `author` field must be present, the `editor` field is optional. AuthorRequiredEditorOptional, /// The `editor` field must be set while the `author` field must not be set. EditorRequiredAuthorForbidden, } impl Default for AuthorMode { fn default() -> Self { Self::AuthorRequired } } impl AuthorMode { pub(crate) fn possible(&self) -> &'static [&'static str] { match self { Self::OneRequired | Self::BothRequired | Self::AuthorRequiredEditorOptional => &["author", "editor"], Self::AuthorRequired => &["author"], Self::EditorRequiredAuthorForbidden => &["editor"], _ => &[], } } } /// Describes the optionality mode of the `pages` and `chapter` field #[derive(Clone, Debug)] pub enum PagesChapterMode { /// No specification for the `page` and `chapter` field is given. None, /// At least one of the fields must be present. OneRequired, /// Both fields are optional. BothOptional, /// Neither field may appear. BothForbidden, /// The `pages` field might be present, there is no specification for the /// `chapter` field. PagesOptional, /// The `pages` field must be present. #[allow(dead_code)] PagesRequired, } impl Default for PagesChapterMode { fn default() -> Self { Self::None } } impl PagesChapterMode { pub(crate) fn possible(&self) -> &'static [&'static str] { match self { Self::OneRequired | Self::BothOptional => &["pages", "chapter"], Self::PagesOptional | Self::PagesRequired => &["pages"], _ => &[], } } } /// Specifies what kinds of fields an entry might have to hold. #[derive(Debug, Default, Clone)] pub struct Requirements { /// Fields that have to be present for the entry to be valid. pub required: Vec<&'static str>, /// Fields that might be present and are often used by bibliography styles. /// /// These fields, together with the required fields, will be taken into /// consideration for `crossref` and `xdata` transfers. pub optional: Vec<&'static str>, /// These fields must not appear for the entry to be valid. pub forbidden: Vec<&'static str>, /// Specifies the relation of author and editor field compulsiveness. pub author_eds_field: AuthorMode, /// Specifies the relation of page and chapter field compulsiveness. pub page_chapter_field: PagesChapterMode, /// Shows whether a `date` or `year` field has to be present. pub needs_date: bool, } impl EntryType { /// Parse from a string. /// /// Use this instead of the basic `from_str` when constructing from `.bib` /// files because case and aliases are considered here. pub fn new(name: &str) -> Self { let name = name.to_lowercase(); if let Ok(ty) = EntryType::from_str(&name) { return ty; } match name.as_str() { "conference" => EntryType::InProceedings, "electronic" => EntryType::Online, "www" => EntryType::Online, _ => EntryType::Unknown(name), } } /// Is this a multi-volume work? pub fn is_multi_volume(&self) -> bool { matches!( self, Self::MvBook | Self::MvCollection | Self::MvReference | Self::MvProceedings ) } /// Is this a single-volume composite work? pub fn is_collection(&self) -> bool { matches!( self, Self::Book | Self::Collection | Self::Periodical | Self::Reference | Self::Proceedings ) } /// Convert into a type native to BibLaTeX. pub fn to_biblatex(&self) -> Self { match self { Self::MastersThesis => Self::Thesis, Self::PhdThesis => Self::Thesis, Self::TechReport => Self::Report, Self::Unknown(_) => Self::Misc, _ => self.clone(), } } /// Convert into a type supported by BibTeX. pub fn to_bibtex(&self) -> Self { match self { Self::MvBook => Self::Book, Self::BookInBook => Self::InBook, Self::SuppBook => Self::InBook, Self::Periodical => Self::Misc, Self::SuppPeriodical => Self::Article, Self::Collection => Self::Proceedings, Self::MvCollection => Self::Proceedings, Self::SuppCollection => Self::InCollection, Self::Reference => Self::Misc, Self::MvReference => Self::Misc, Self::InReference => Self::InCollection, Self::MvProceedings => Self::Proceedings, Self::Report => Self::TechReport, Self::Patent => Self::Misc, Self::Thesis => Self::PhdThesis, Self::Online => Self::Misc, Self::Software => Self::Misc, Self::Dataset => Self::Misc, Self::Set => Self::Misc, Self::XData => Self::Misc, Self::Unknown(_) => Self::Misc, _ => self.clone(), } } /// Get the required fields for the `EntryType`. pub(crate) fn requirements(&self) -> Requirements { let mut reqs = Requirements { needs_date: true, ..Default::default() }; reqs.required.push("title"); reqs.optional.push("note"); reqs.optional.push("location"); reqs.optional.push("titleadddon"); reqs.optional.push("subtitle"); reqs.optional.push("url"); reqs.optional.push("urldate"); reqs.optional.push("doi"); reqs.optional.push("eprint"); reqs.optional.push("eprintclass"); reqs.optional.push("eprinttype"); reqs.optional.push("pubstate"); reqs.optional.push("language"); reqs.optional.push("addendum"); if self.is_multi_volume() { reqs.forbidden.push("maintitle"); reqs.forbidden.push("mainsubtitle"); reqs.forbidden.push("maintitleaddon"); reqs.forbidden.push("part"); reqs.forbidden.push("volume"); } match self { Self::Article => { reqs.required.push("journaltitle"); reqs.optional.remove(1); reqs.optional.push("number"); reqs.optional.push("series"); reqs.optional.push("version"); reqs.optional.push("volume"); reqs.optional.push("annotator"); reqs.optional.push("commentator"); reqs.optional.push("translator"); reqs.optional.push("origlanguage"); reqs.optional.push("journalsubtitle"); reqs.optional.push("issue"); reqs.optional.push("issuetitle"); reqs.optional.push("issuesubtitle"); reqs.optional.push("eid"); reqs.optional.push("issn"); reqs.page_chapter_field = PagesChapterMode::PagesOptional; reqs.author_eds_field = AuthorMode::AuthorRequiredEditorOptional; } Self::Book => { reqs.required.push("publisher"); reqs.optional.push("edition"); reqs.optional.push("number"); reqs.optional.push("series"); reqs.optional.push("part"); reqs.optional.push("volume"); reqs.optional.push("volumes"); reqs.optional.push("annotator"); reqs.optional.push("commentator"); reqs.optional.push("translator"); reqs.optional.push("origlanguage"); reqs.optional.push("afterword"); reqs.optional.push("foreword"); reqs.optional.push("introduction"); reqs.optional.push("maintitle"); reqs.optional.push("mainsubtitle"); reqs.optional.push("maintitleaddon"); reqs.optional.push("isbn"); reqs.optional.push("pagetotal"); reqs.author_eds_field = AuthorMode::OneRequired; reqs.page_chapter_field = PagesChapterMode::BothOptional; } Self::Booklet => { reqs.optional.push("howpublished"); reqs.optional.push("type"); reqs.optional.push("pagetotal"); reqs.author_eds_field = AuthorMode::OneRequired; reqs.page_chapter_field = PagesChapterMode::BothOptional; reqs.needs_date = false; } Self::InBook => { reqs.required.push("publisher"); reqs.required.push("booktitle"); reqs.optional.push("bookauthor"); reqs.optional.push("volume"); reqs.optional.push("volumes"); reqs.optional.push("part"); reqs.optional.push("type"); reqs.optional.push("series"); reqs.optional.push("number"); reqs.optional.push("edition"); reqs.optional.push("annotator"); reqs.optional.push("commentator"); reqs.optional.push("translator"); reqs.optional.push("origlanguage"); reqs.optional.push("afterword"); reqs.optional.push("foreword"); reqs.optional.push("introduction"); reqs.optional.push("maintitle"); reqs.optional.push("mainsubtitle"); reqs.optional.push("maintitleaddon"); reqs.optional.push("booksubtitle"); reqs.optional.push("booktitleaddon"); reqs.optional.push("isbn"); reqs.forbidden.push("pagetotal"); reqs.author_eds_field = AuthorMode::OneRequired; reqs.page_chapter_field = PagesChapterMode::OneRequired; } Self::InCollection => { reqs.required.push("publisher"); reqs.required.push("booktitle"); reqs.optional.push("volume"); reqs.optional.push("type"); reqs.optional.push("series"); reqs.optional.push("number"); reqs.optional.push("edition"); reqs.optional.push("annotator"); reqs.optional.push("commentator"); reqs.optional.push("translator"); reqs.optional.push("origlanguage"); reqs.optional.push("afterword"); reqs.optional.push("foreword"); reqs.optional.push("introduction"); reqs.optional.push("maintitle"); reqs.optional.push("mainsubtitle"); reqs.optional.push("maintitleaddon"); reqs.optional.push("booksubtitle"); reqs.optional.push("booktitleaddon"); reqs.optional.push("part"); reqs.optional.push("volumes"); reqs.optional.push("isbn"); reqs.forbidden.push("pagetotal"); reqs.author_eds_field = AuthorMode::BothRequired; reqs.page_chapter_field = PagesChapterMode::BothOptional; } Self::InProceedings => { reqs.required.push("booktitle"); reqs.optional.push("number"); reqs.optional.push("organization"); reqs.optional.push("series"); reqs.optional.push("volume"); reqs.optional.push("volumes"); reqs.optional.push("part"); reqs.optional.push("publisher"); reqs.optional.push("maintitle"); reqs.optional.push("mainsubtitle"); reqs.optional.push("maintitleaddon"); reqs.optional.push("booksubtitle"); reqs.optional.push("booktitleaddon"); reqs.optional.push("eventtitle"); reqs.optional.push("eventsubtitle"); reqs.optional.push("eventtitleaddon"); reqs.optional.push("venue"); reqs.optional.push("isbn"); reqs.optional.push("publisher"); reqs.forbidden.push("pagetotal"); reqs.page_chapter_field = PagesChapterMode::BothOptional; reqs.author_eds_field = AuthorMode::BothRequired; } Self::Manual => { reqs.optional.push("edition"); reqs.optional.push("organization"); reqs.optional.push("series"); reqs.optional.push("version"); reqs.optional.push("isbn"); reqs.optional.push("publisher"); reqs.optional.push("type"); reqs.optional.push("pagetotal"); reqs.author_eds_field = AuthorMode::OneRequired; reqs.page_chapter_field = PagesChapterMode::BothOptional; reqs.needs_date = false; } Self::MastersThesis => { reqs.required.push("school"); reqs.optional.push("type"); reqs.author_eds_field = AuthorMode::AuthorRequired; } Self::Misc => { reqs.optional.remove(1); reqs.optional.push("howpublished"); reqs.optional.push("organization"); reqs.optional.push("type"); reqs.author_eds_field = AuthorMode::OneRequired; // reqs.page_chapter_field = PagesChapterMode::BothOptional; reqs.needs_date = false; } Self::Proceedings => { reqs.optional.push("number"); reqs.optional.push("organization"); reqs.optional.push("series"); reqs.optional.push("volume"); reqs.optional.push("volumes"); reqs.optional.push("part"); reqs.optional.push("publisher"); reqs.optional.push("maintitle"); reqs.optional.push("mainsubtitle"); reqs.optional.push("maintitleaddon"); reqs.optional.push("isbn"); reqs.optional.push("publisher"); reqs.optional.push("pagetotal"); reqs.author_eds_field = AuthorMode::EditorRequiredAuthorForbidden; reqs.page_chapter_field = PagesChapterMode::BothOptional; } Self::TechReport => { reqs.required.push("institution"); reqs.optional.push("number"); reqs.optional.push("type"); } Self::Unpublished => { reqs.required.push("note"); reqs.optional.remove(1); reqs.optional.remove(0); reqs.optional.push("isbn"); reqs.optional.push("howpublished"); reqs.needs_date = false; } Self::MvBook => { reqs.optional.push("annotator"); reqs.optional.push("commentator"); reqs.optional.push("translator"); reqs.optional.push("origlanguage"); reqs.optional.push("afterword"); reqs.optional.push("foreword"); reqs.optional.push("introduction"); reqs.optional.push("edition"); reqs.optional.push("number"); reqs.optional.push("series"); reqs.optional.push("volumes"); reqs.optional.push("isbn"); reqs.optional.push("publisher"); reqs.optional.push("pagetotal"); reqs.page_chapter_field = PagesChapterMode::BothOptional; reqs.author_eds_field = AuthorMode::AuthorRequiredEditorOptional; } Self::Periodical => { reqs.optional.push("issue"); reqs.optional.push("issuetitle"); reqs.optional.push("issuesubtitle"); reqs.optional.push("number"); reqs.optional.push("series"); reqs.optional.push("volume"); reqs.optional.push("issn"); reqs.author_eds_field = AuthorMode::EditorRequiredAuthorForbidden; } Self::Collection => { reqs.optional.push("annotator"); reqs.optional.push("commentator"); reqs.optional.push("translator"); reqs.optional.push("origlanguage"); reqs.optional.push("afterword"); reqs.optional.push("foreword"); reqs.optional.push("introduction"); reqs.optional.push("maintitle"); reqs.optional.push("mainsubtitle"); reqs.optional.push("maintitleaddon"); reqs.optional.push("edition"); reqs.optional.push("number"); reqs.optional.push("series"); reqs.optional.push("part"); reqs.optional.push("volume"); reqs.optional.push("volumes"); reqs.optional.push("isbn"); reqs.optional.push("publisher"); reqs.optional.push("pagetotal"); reqs.author_eds_field = AuthorMode::EditorRequiredAuthorForbidden; reqs.page_chapter_field = PagesChapterMode::BothOptional; } Self::MvCollection => { reqs.optional.push("annotator"); reqs.optional.push("commentator"); reqs.optional.push("translator"); reqs.optional.push("origlanguage"); reqs.optional.push("afterword"); reqs.optional.push("foreword"); reqs.optional.push("introduction"); reqs.optional.push("edition"); reqs.optional.push("number"); reqs.optional.push("series"); reqs.optional.push("volumes"); reqs.optional.push("isbn"); reqs.optional.push("publisher"); reqs.optional.push("pagetotal"); reqs.author_eds_field = AuthorMode::EditorRequiredAuthorForbidden; reqs.page_chapter_field = PagesChapterMode::BothForbidden; } Self::MvProceedings => { reqs.optional.push("number"); reqs.optional.push("series"); reqs.optional.push("volumes"); reqs.optional.push("publisher"); reqs.optional.push("organization"); reqs.optional.push("pagetotal"); reqs.author_eds_field = AuthorMode::EditorRequiredAuthorForbidden; reqs.page_chapter_field = PagesChapterMode::BothForbidden; } Self::Report => { reqs.required.push("institution"); reqs.required.push("type"); reqs.optional.push("number"); reqs.optional.push("version"); reqs.optional.push("isrn"); reqs.optional.push("pagetotal"); reqs.page_chapter_field = PagesChapterMode::BothOptional; } Self::Patent => { reqs.required.push("number"); reqs.optional.push("holder"); reqs.optional.push("type"); } Self::Thesis => { reqs.optional.push("isbn"); reqs.required.push("institution"); reqs.required.push("type"); reqs.optional.push("pagetotal"); reqs.page_chapter_field = PagesChapterMode::BothOptional; } Self::Online => { reqs.required.push("url"); reqs.optional.remove(9); reqs.optional.remove(8); reqs.optional.remove(7); reqs.optional.remove(6); reqs.optional.remove(4); reqs.optional.remove(1); reqs.optional.push("organization"); reqs.author_eds_field = AuthorMode::OneRequired; } Self::Dataset => { reqs.optional.push("edition"); reqs.optional.push("type"); reqs.optional.push("series"); reqs.optional.push("number"); reqs.optional.push("version"); reqs.optional.push("organization"); reqs.optional.push("publisher"); reqs.author_eds_field = AuthorMode::OneRequired; } Self::PhdThesis => { reqs = Self::MastersThesis.requirements(); } Self::SuppPeriodical => { reqs = Self::Article.requirements(); } Self::BookInBook => { reqs = Self::InBook.requirements(); } Self::SuppBook => { reqs = Self::InBook.requirements(); } Self::SuppCollection => { reqs = Self::InCollection.requirements(); } Self::Reference => { reqs = Self::Collection.requirements(); } Self::MvReference => { reqs = Self::MvCollection.requirements(); } Self::InReference => { reqs = Self::InCollection.requirements(); } Self::Software => { reqs = Self::Misc.requirements(); } Self::Set => { reqs.optional.clear(); reqs.required = vec!["entryset"]; reqs.author_eds_field = AuthorMode::NoneRequired; reqs.needs_date = false; } Self::XData => { reqs.optional.clear(); reqs.required.clear(); reqs.author_eds_field = AuthorMode::NoneRequired; reqs.needs_date = false; } Self::Unknown(_) => { reqs = Self::MvCollection.requirements(); } } reqs } } /// Whether a field with this key should be parsed with commands and most /// escapes turned off. pub fn is_verbatim_field(key: &str) -> bool { matches!( key, "file" | "doi" | "uri" | "eprint" | "verba" | "verbb" | "verbc" | "pdf" | "url" | "urlraw" ) } biblatex-0.10.0/src/raw.rs000064400000000000000000000441011046102023000134220ustar 00000000000000//! Low-level representation of a bibliography file. use std::fmt; use crate::{Span, Spanned, TypeErrorKind}; use unscanny::Scanner; /// The content of a field or abbreviation. pub type Field<'s> = Vec>>; /// A literal representation of a bibliography file, with abbreviations not yet /// resolved. #[derive(Debug, Clone)] pub struct RawBibliography<'s> { /// TeX commands to be prepended to the document, only supported by BibTeX. pub preamble: String, /// The collection of citation keys and bibliography entries. pub entries: Vec>>, /// A map of reusable abbreviations, only supported by BibTeX. pub abbreviations: Vec>, } /// A raw extracted entry, with abbreviations not yet resolved. #[derive(Debug, Clone)] pub struct RawEntry<'s> { /// The citation key. pub key: Spanned<&'s str>, /// Denotes the type of bibliographic item (e.g., `article`). pub kind: Spanned<&'s str>, /// Maps from field names to their values. pub fields: Vec>, } /// A literal representation of a bibliography entry field. #[derive(Debug, Clone, PartialEq)] pub enum RawChunk<'s> { /// A normal field value. Normal(&'s str), /// A field with strings and abbreviations. Abbreviation(&'s str), } impl<'s> RawBibliography<'s> { /// Parse a raw bibliography from a source string. pub fn parse(src: &'s str) -> Result { BiblatexParser::new(src).parse() } } /// Backing struct for parsing a Bib(La)TeX file into a [`RawBibliography`]. struct BiblatexParser<'s> { s: Scanner<'s>, res: RawBibliography<'s>, } /// An error that might occur during initial parsing of the bibliography. #[derive(Debug, Clone, PartialEq)] pub struct ParseError { /// Where in the source the error occurred. pub span: std::ops::Range, /// What kind of error occurred. pub kind: ParseErrorKind, } impl ParseError { pub(crate) fn new(span: std::ops::Range, kind: ParseErrorKind) -> Self { Self { span, kind } } } impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}: {}-{}", self.kind, self.span.start, self.span.end) } } /// Error conditions that might occur during initial parsing of the /// bibliography. /// /// Also see [`ParseError`]. #[derive(Debug, Clone, PartialEq)] #[non_exhaustive] pub enum ParseErrorKind { /// The file ended prematurely. UnexpectedEof, /// An unexpected token was encountered. Unexpected(Token), /// A token was expected, but not found. Expected(Token), /// A field contained an abbreviation that was not defined. UnknownAbbreviation(String), /// A TeX command was malformed. MalformedCommand, /// A duplicate citation key was found. DuplicateKey(String), /// A type error occurred while trying to resolve cross-references. ResolutionError(TypeErrorKind), } /// A token that can be encountered during parsing. #[derive(Debug, Copy, Clone, PartialEq)] pub enum Token { /// An identifier for a field key, citation type, abbreviation, or citation /// key. Identifier, /// An opening brace: `{`. OpeningBrace, /// A closing brace: `}`. ClosingBrace, /// A comma: `,`. Comma, /// A quotation mark: `"`. QuotationMark, /// An equals sign: `=`. Equals, /// A decimal point: `.`. DecimalPoint, } impl fmt::Display for ParseErrorKind { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::UnexpectedEof => write!(f, "unexpected end of file"), Self::Expected(token) => write!(f, "expected {}", token), Self::Unexpected(token) => write!(f, "unexpected {}", token), Self::UnknownAbbreviation(s) => write!(f, "unknown abbreviation {:?}", s), Self::MalformedCommand => write!(f, "malformed command"), Self::DuplicateKey(s) => write!(f, "duplicate key {:?}", s), Self::ResolutionError(e) => { write!(f, "type error occurred during crossref resolution: {}", e) } } } } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(match self { Self::Identifier => "identifier", Self::OpeningBrace => "opening brace", Self::ClosingBrace => "closing brace", Self::Comma => "comma", Self::QuotationMark => "double quote", Self::Equals => "equals", Self::DecimalPoint => "decimal point", }) } } impl<'s> BiblatexParser<'s> { /// Constructs a new parser. pub fn new(src: &'s str) -> Self { Self { s: Scanner::new(src), res: RawBibliography { preamble: String::new(), entries: Vec::new(), abbreviations: Vec::new(), }, } } /// Parses the file, consuming the parser in the process. pub fn parse(mut self) -> Result, ParseError> { while !self.s.done() { self.s.eat_whitespace(); match self.s.peek() { Some('@') => self.entry()?, Some(_) => { self.s.eat(); } None => break, } } Ok(self.res) } /// Eat a comma. fn comma(&mut self) -> Result<(), ParseError> { if !self.s.eat_if(',') { return Err(ParseError::new( self.here(), ParseErrorKind::Expected(Token::Comma), )); } Ok(()) } /// Eat a delimiter. fn brace(&mut self, open: bool) -> Result<(), ParseError> { let (brace, token) = if open { ('{', Token::OpeningBrace) } else { ('}', Token::ClosingBrace) }; let peeked = self.s.peek(); if peeked == Some(brace) || peeked == Some('\"') { self.s.eat(); Ok(()) } else { Err(ParseError::new(self.here(), ParseErrorKind::Expected(token))) } } /// Eat a quote. fn quote(&mut self) -> Result<(), ParseError> { if !self.s.eat_if('"') { Err(ParseError::new( self.here(), ParseErrorKind::Expected(Token::QuotationMark), )) } else { Ok(()) } } /// Eat an equals sign. fn equals(&mut self) -> Result<(), ParseError> { if !self.s.eat_if('=') { Err(ParseError::new(self.here(), ParseErrorKind::Expected(Token::Equals))) } else { Ok(()) } } /// Eat a string. fn string(&mut self) -> Result, ParseError> { self.quote()?; let idx = self.s.cursor(); while let Some(c) = self.s.peek() { match c { '"' => { let res = self.s.from(idx); let span = idx..self.s.cursor(); self.quote()?; return Ok(Spanned::new(res, span)); } '\\' => { self.s.eat(); self.s.eat(); } _ => { self.s.eat(); } } } Err(ParseError::new(self.here(), ParseErrorKind::UnexpectedEof)) } /// Eat a number. fn number(&mut self) -> Result<&'s str, ParseError> { let idx = self.s.cursor(); let mut has_dot = false; while let Some(c) = self.s.peek() { let start = self.s.cursor(); match c { '0'..='9' => { self.s.eat(); } '.' => { if !has_dot { self.s.eat(); has_dot = true; } else { return Err(ParseError::new( start..self.s.cursor(), ParseErrorKind::Unexpected(Token::DecimalPoint), )); } } _ => { return Ok(self.s.from(idx)); } } } Err(ParseError::new(self.here(), ParseErrorKind::UnexpectedEof)) } /// Eat a braced value. fn braced(&mut self) -> Result>, ParseError> { self.brace(true)?; let idx = self.s.cursor(); let mut braces = 0; while let Some(c) = self.s.peek() { match c { '{' => { self.brace(true)?; braces += 1; } '}' => { let res = self.s.from(idx); let span = idx..self.s.cursor(); self.brace(false)?; if braces == 0 { return Ok(Spanned::new(RawChunk::Normal(res), span)); } braces -= 1; } '\\' => { self.s.eat(); self.s.eat(); } _ => { self.s.eat(); } } } Err(ParseError::new(self.here(), ParseErrorKind::UnexpectedEof)) } /// Eat an element of an abbreviation. fn abbr_element(&mut self) -> Result>, ParseError> { let start = self.s.cursor(); let res = match self.s.peek() { Some(c) if c.is_ascii_digit() => self.number().map(RawChunk::Normal), Some(c) if is_id_start(c) => { self.ident().map(|s| RawChunk::Abbreviation(s.v)) } _ => { return self.single_field(); } }; res.map(|v| Spanned::new(v, start..self.s.cursor())) } /// Eat an abbreviation field. fn abbr_field(&mut self) -> Result>, ParseError> { let start = self.s.cursor(); let mut elems = vec![]; loop { elems.push(self.abbr_element()?); self.s.eat_whitespace(); if !self.s.eat_if('#') { break; } self.s.eat_whitespace(); } Ok(Spanned::new(elems, start..self.s.cursor())) } /// Eat a field. fn field(&mut self) -> Result<(Spanned<&'s str>, Spanned>), ParseError> { let key = self.ident()?; self.s.eat_whitespace(); self.equals()?; self.s.eat_whitespace(); let value = self.abbr_field()?; self.s.eat_whitespace(); Ok((key, value)) } fn single_field(&mut self) -> Result>, ParseError> { match self.s.peek() { Some('{') => self.braced(), Some('"') => { self.string().map(|s| Spanned::new(RawChunk::Normal(s.v), s.span)) } _ => Err(ParseError::new(self.here(), ParseErrorKind::UnexpectedEof)), } } /// Eat fields. fn fields(&mut self) -> Result>, ParseError> { let mut fields = Vec::new(); while !self.s.done() { self.s.eat_whitespace(); if self.s.peek() == Some('}') { return Ok(fields); } let (key, value) = self.field()?; self.s.eat_whitespace(); fields.push(Pair::new(key, value)); match self.s.peek() { Some(',') => self.comma()?, Some('}') => { return Ok(fields); } _ => { return Err(ParseError::new( self.here(), ParseErrorKind::Expected(Token::Comma), )); } } } Err(ParseError::new(self.here(), ParseErrorKind::UnexpectedEof)) } /// Eat an entry key. fn key(&mut self) -> Result, ParseError> { let idx = self.s.cursor(); self.s.eat_while(is_key); Ok(Spanned::new(self.s.from(idx), idx..self.s.cursor())) } /// Eat an identifier. fn ident(&mut self) -> Result, ParseError> { let idx = self.s.cursor(); let is_start = self.s.peek().map(is_id_start).unwrap_or_default(); if is_start { self.s.eat(); self.s.eat_while(is_id_continue); Ok(Spanned::new(self.s.from(idx), idx..self.s.cursor())) } else { Err(ParseError::new(self.here(), ParseErrorKind::Expected(Token::Identifier))) } } /// Eat an entry. fn entry(&mut self) -> Result<(), ParseError> { let start = self.s.cursor(); if self.s.eat() != Some('@') { panic!("must not call entry when not at an '@'"); } let entry_type = self.ident()?; self.s.eat_whitespace(); self.brace(true)?; self.s.eat_whitespace(); match entry_type.v.to_ascii_lowercase().as_str() { "string" => self.strings()?, "preamble" => self.preamble()?, "comment" => { self.s.eat_until('}'); } _ => self.body(entry_type, start)?, } self.s.eat_whitespace(); self.brace(false)?; Ok(()) } /// Eat the body of a strings entry. fn strings(&mut self) -> Result<(), ParseError> { let fields = self.fields()?; self.res.abbreviations.extend(fields); Ok(()) } /// Eat the body of a preamble entry. fn preamble(&mut self) -> Result<(), ParseError> { let idx = self.s.cursor(); self.string()?; let string = self.s.from(idx); if !self.res.preamble.is_empty() { self.res.preamble.push_str(" # "); } self.res.preamble.push_str(string); Ok(()) } /// Eat the body of an entry. fn body(&mut self, kind: Spanned<&'s str>, start: usize) -> Result<(), ParseError> { let key = self.key()?; self.s.eat_whitespace(); self.comma()?; self.s.eat_whitespace(); let fields = self.fields()?; self.res .entries .push(Spanned::new(RawEntry { key, kind, fields }, start..self.s.cursor())); Ok(()) } fn here(&self) -> Span { self.s.cursor()..self.s.cursor() } } /// The keys for fields and their values. #[derive(Debug, Clone)] pub struct Pair<'s> { /// The key. pub key: Spanned<&'s str>, /// The value. pub value: Spanned>, } impl<'s> Pair<'s> { /// Constructs a new key-value pair. pub fn new(key: Spanned<&'s str>, value: Spanned>) -> Self { Self { key, value } } } /// Whether a character is allowed in an entry key #[inline] pub fn is_key(c: char) -> bool { !matches!(c, ',' | '}') && !c.is_control() && !c.is_whitespace() } /// Whether a character can start an identifier. #[inline] pub fn is_id_start(c: char) -> bool { !matches!(c, ':' | '<' | '-' | '>') && is_id_continue(c) } /// Whether a character can continue an identifier. #[inline] pub fn is_id_continue(c: char) -> bool { !matches!( c, '@' | '{' | '}' | '"' | '#' | '\'' | '(' | ')' | ',' | '=' | '%' | '\\' | '~' ) && !c.is_control() && !c.is_whitespace() } #[cfg(test)] #[rustfmt::skip] mod tests { use super::*; fn format(field: &Field<'_>) -> String { if field.len() == 1 { if let Some(RawChunk::Normal(s)) = field.first().map(|s| &s.v) { return format!("{{{}}}", s); } } let mut res = String::new(); let mut first = true; for field in field { if !first { res.push_str(" # "); } else { first = false; } match field.v { RawChunk::Normal(s) => { res.push('"'); res.push_str(s); res.push('"'); }, RawChunk::Abbreviation(s) => res.push_str(s), } } res } #[track_caller] fn test_prop(key: &str, value: &str) -> String { let test = format!("@article{{test, {}={}}}", key, value); let bt = RawBibliography::parse(&test).unwrap(); let article = &bt.entries[0]; format(&article.v.fields[0].value.v) } #[test] fn test_entry_key() { let file = "@article{!\"#$%&'()*+-./123:;<=>?@ABC[\\]^_`abc{|~,}"; let bt = RawBibliography::parse(file).unwrap(); let article = &bt.entries[0]; assert_eq!(article.v.key.v, "!\"#$%&'()*+-./123:;<=>?@ABC[\\]^_`abc{|~"); } #[test] fn test_empty_entry_key() { let file = "@article{,}"; let bt = RawBibliography::parse(file).unwrap(); let article = &bt.entries[0]; assert_eq!(article.v.key.v, ""); } #[test] fn test_parse_article() { let file = "@article{haug2020, title = \"Great proceedings\\{\", year=2002, author={Haug, {Martin} and Haug, Gregor}}"; let bt = RawBibliography::parse(file).unwrap(); let article = &bt.entries[0]; assert_eq!(article.v.kind.v, "article"); assert_eq!(article.v.fields[0].key.v, "title"); assert_eq!(article.v.fields[1].key.v, "year"); assert_eq!(article.v.fields[2].key.v, "author"); assert_eq!(format(&article.v.fields[0].value.v), "{Great proceedings\\{}"); assert_eq!(format(&article.v.fields[1].value.v), "{2002}"); assert_eq!(format(&article.v.fields[2].value.v), "{Haug, {Martin} and Haug, Gregor}"); } #[test] fn test_resolve_string() { let bt = RawBibliography::parse("@string{BT = \"bibtex\"}").unwrap(); assert_eq!(bt.abbreviations[0].key.v, "BT"); assert_eq!(&bt.abbreviations[0].value.v, &vec![Spanned::new(RawChunk::Normal("bibtex"), 14..20)]); } #[test] fn test_escape() { assert_eq!(test_prop("author", "{Mister A\\}\"B\"}"), "{Mister A\\}\"B\"}"); } #[test] fn test_abbr() { assert_eq!(test_prop("author", "dec # {~12}"), "dec # \"~12\""); } } biblatex-0.10.0/src/resolve.rs000064400000000000000000000452141046102023000143160ustar 00000000000000use unicode_normalization::char; use crate::chunk::{Chunk, Chunks}; use crate::mechanics::is_verbatim_field; use crate::raw::{ is_id_continue, Field, Pair, ParseError, ParseErrorKind, RawChunk, Token, }; use crate::types::get_month_for_abbr; use crate::{ChunksExt, Span, Spanned}; use unscanny::Scanner; /// Fully parse a field, resolving abbreviations and LaTeX commands. pub fn parse_field( key: &str, field: &Field, abbreviations: &Vec>, ) -> Result { let mut chunks = vec![]; for e in field { match e.v { RawChunk::Abbreviation(s) => { chunks.extend(resolve_abbreviation( key, s, e.span.clone(), abbreviations, )?); } RawChunk::Normal(s) => { chunks.extend(ContentParser::new(key, s, e.span.start).parse()?); } } } flatten(&mut chunks); Ok(chunks) } #[derive(Clone)] struct ContentParser<'s> { s: Scanner<'s>, verb_field: bool, current_chunk: Chunk, result: Chunks, start: usize, offset: usize, } impl<'s> ContentParser<'s> { fn new(key: &'s str, field: &'s str, offset: usize) -> Self { Self { s: Scanner::new(field), verb_field: is_verbatim_field(key), current_chunk: Self::default_chunk(0), result: vec![], start: 0, offset, } } fn parse(self) -> Result { let offset = self.offset; self.parse_impl() .map_err(|mut e| { e.span.start += offset; e.span.end += offset; e }) .map(|mut chunks| { for chunk in &mut chunks { chunk.span.start += offset; chunk.span.end += offset; } chunks }) } fn parse_impl(mut self) -> Result { let mut depth = 0; self.current_chunk = Self::default_chunk(depth); while let Some(c) = self.s.peek() { match c { '\\' => { let sequence = self.backslash()?; self.current_chunk.get_mut().push_str(&sequence) } '$' if !self.verb_field => { self.turnaround(depth); let math = self.math()?; self.result.push(math); } '{' => { depth += 1; self.turnaround(depth); self.s.eat(); self.start += 1; } '}' => { if depth == 0 { let idx = self.s.cursor(); self.s.eat(); return Err(ParseError::new( idx..self.s.cursor(), ParseErrorKind::Unexpected(Token::ClosingBrace), )); } depth -= 1; self.turnaround(depth); self.start += 1; self.s.eat(); } '-' => { let mut count = 0; let hyphens = self.s.eat_while(|c| { let res = c == '-' && count < 3; if res { count += 1; } res }); match count { 1 => self.current_chunk.get_mut().push('-'), 2 => self.current_chunk.get_mut().push('–'), 3 => self.current_chunk.get_mut().push('—'), _ => self.current_chunk.get_mut().push_str(hyphens), } } _ if c.is_whitespace() => { self.current_chunk.get_mut().push(' '); self.s.eat_whitespace(); } _ => self.current_chunk.get_mut().push(self.s.eat().unwrap()), } } if !self.current_chunk.get().is_empty() || self.result.is_empty() { self.turnaround(depth); } Ok(self.result) } fn turnaround(&mut self, depth: usize) { self.result.push(Spanned::new( std::mem::replace(&mut self.current_chunk, Self::default_chunk(depth)), self.start..self.s.cursor(), )); self.start = self.s.cursor(); } fn backslash(&mut self) -> Result { self.eat_assert('\\'); match self.s.peek() { Some(c) if c != '^' && c != '~' && is_escapable(c, self.verb_field, true) => { self.s.eat(); Ok(c.to_string()) } _ if self.verb_field => Ok("\\".to_string()), Some(c) if !c.is_whitespace() && !c.is_control() => self.command(), Some(c) => Ok(format!("\\{}", c)), None => Err(ParseError::new(self.here(), ParseErrorKind::UnexpectedEof)), } } fn command(&mut self) -> Result { let pos = self.s.cursor(); let valid_start = self .s .peek() .map(|c| !c.is_whitespace() && !c.is_control()) .unwrap_or_default(); if !valid_start { return Err(ParseError::new( pos..self.s.cursor(), ParseErrorKind::MalformedCommand, )); } if !is_single_char_func(self.s.eat().unwrap()) { self.s.eat_while(is_id_continue); } let command = self.s.from(pos); let ws = !self.s.eat_whitespace().is_empty(); let first_char = command.chars().next().unwrap(); let arg = if self.s.peek() != Some('{') && command.chars().count() == 1 && first_char != '-' && is_single_char_func(first_char) { let idx = self.s.cursor(); self.s.eat(); Some(self.s.from(idx).into()) } else if !ws && self.s.eat_if('{') { let mut depth = 1; let idx = self.s.cursor(); loop { self.s.eat_until(['{', '}']); match self.s.eat() { Some('{') => { depth += 1; } Some('}') => { depth -= 1; if depth == 0 { break; } } Some(_) => unreachable!(), None => { return Err(ParseError::new( self.here(), ParseErrorKind::UnexpectedEof, )); } } } let brace = '}'.len_utf8(); let arg = self.s.from(idx); let arg = ContentParser::new("", &arg[..arg.len() - brace], idx) .parse()? .format_verbatim(); Some(arg) } else { None }; Ok(execute_command(command, arg.as_deref())) } fn math(&mut self) -> Result, ParseError> { self.eat_assert('$'); let idx = self.s.cursor(); let res = self.s.eat_until(|c| c == '$'); let span = idx..self.s.cursor(); if self.s.done() { return Err(ParseError::new(self.here(), ParseErrorKind::UnexpectedEof)); } self.s.eat(); self.start = self.s.cursor(); Ok(Spanned::new(Chunk::Math(res.into()), span)) } #[track_caller] fn eat_assert(&mut self, c: char) { if self.s.eat() != Some(c) { panic!("assertion failed: expected '{}'", c); } } fn here(&self) -> Span { self.s.cursor()..self.s.cursor() } fn default_chunk(depth: usize) -> Chunk { if depth > 0 { Chunk::Verbatim(String::new()) } else { Chunk::Normal(String::new()) } } } /// Resolves `Chunk::Abbreviation` items to their respective string values. fn resolve_abbreviation( key: &str, abbr: &str, span: Span, map: &Vec>, ) -> Result { let fields = map.iter() .find(|e| e.key.v == abbr) .map(|e| &e.value.v) .ok_or(ParseError::new( span.clone(), ParseErrorKind::UnknownAbbreviation(abbr.into()), )); if fields.is_err() { if let Some(month) = get_month_for_abbr(abbr) { return Ok(vec![Spanned::new(Chunk::Normal(month.0.to_string()), span)]); } } parse_field(key, fields?, map) } /// Best-effort evaluation of LaTeX commands with a focus on diacritics. /// Will dump the command arguments if evaluation is not possible. /// Nested commands are not supported. fn execute_command(command: &str, arg: Option<&str>) -> String { fn last_char_combine(v: Option<&str>, combine: char) -> String { if let Some(v) = v { if v.is_empty() { match combine { '\u{302}' => '^'.into(), '\u{303}' => '~'.into(), _ => combine.into(), } } else { let mut chars = v.chars(); // Account for legacy TeX behavior of requiring an uncapped i or // j to add another diacritic. let last = match chars.next_back().unwrap() { 'ı' => 'i', 'ȷ' => 'j', c => c, }; let combined = char::compose(last, combine).unwrap_or(last); let mut res = chars.as_str().to_string(); res.push(combined); res } } else { combine.into() } } match command { "LaTeX" => "LaTeX".to_string(), "TeX" => "TeX".to_string(), "textendash" => "–".to_string(), "textemdash" => "—".to_string(), "textquotesingle" => "'".to_string(), "textquotedblleft" => "“".to_string(), "textquotedblright" => "”".to_string(), "textquoteleft" => "‘".to_string(), "textquoteright" => "’".to_string(), "textquotestraightdblbase" | "quotedblbase" => "„".to_string(), "textquotestraightbase" | "quotesinglbase" => "‚".to_string(), "textquotedbl" => "\"".to_string(), "textasciicircum" => "^".to_string(), "textasciigrave" => "`".to_string(), "textasciitilde" => "~".to_string(), "textasteriskcentered" => "⁎".to_string(), "textbackslash" => "\\".to_string(), "textbar" => "|".to_string(), "textbraceleft" => "{".to_string(), "textbraceright" => "}".to_string(), "textbullet" => "•".to_string(), "textdagger" => "†".to_string(), "textdaggerdbl" => "‡".to_string(), "textdollar" => "$".to_string(), "textless" => "<".to_string(), "textgreater" => ">".to_string(), "textexclamdown" => "¡".to_string(), "textquestiondown" => "¿".to_string(), "textordfeminine" => "ª".to_string(), "textordmasculine" => "º".to_string(), "textperiodcentered" => "·".to_string(), "textregistered" => "®".to_string(), "texttrademark" => "™".to_string(), "textsection" => "§".to_string(), "textunderscore" => "_".to_string(), "textvisiblespace" => "␣".to_string(), "guillemotleft" => "«".to_string(), "guillemotright" => "»".to_string(), "guilsinglleft" => "‹".to_string(), "guilsinglright" => "›".to_string(), "aa" => "å".to_string(), "AA" => "Å".to_string(), "ae" => "æ".to_string(), "AE" => "Æ".to_string(), "dh" => "ð".to_string(), "DH" => "Ð".to_string(), "dj" => "đ".to_string(), "DJ" => "Đ".to_string(), "ng" => "ŋ".to_string(), "NG" => "Ŋ".to_string(), "l" => "ł".to_string(), "L" => "Ł".to_string(), "i" => "ı".to_string(), "oe" => "œ".to_string(), "OE" => "Œ".to_string(), "o" if arg.is_none() => "ø".to_string(), "O" => "Ø".to_string(), "ss" => "ß".to_string(), "SS" => "ẞ".to_string(), "th" => "þ".to_string(), "TH" => "Þ".to_string(), "P" | "textparagraph" => "¶".to_string(), "S" => "§".to_string(), "copyright" => { if let Some(arg) = arg { format!("©{}", arg) } else { "©".to_string() } } "ddag" => "‡".to_string(), "dots" | "textellipsis" => "…".to_string(), "pounds" => "£".to_string(), "`" => last_char_combine(arg, '\u{300}'), "´" => last_char_combine(arg, '\u{301}'), "'" => last_char_combine(arg, '\u{301}'), "^" => last_char_combine(arg, '\u{302}'), "~" => last_char_combine(arg, '\u{303}'), "=" => last_char_combine(arg, '\u{304}'), "u" => last_char_combine(arg, '\u{306}'), "." => last_char_combine(arg, '\u{307}'), "\"" => last_char_combine(arg, '\u{308}'), "r" => last_char_combine(arg, '\u{30A}'), "H" => last_char_combine(arg, '\u{30B}'), "v" => last_char_combine(arg, '\u{30C}'), "d" => last_char_combine(arg, '\u{323}'), "c" => last_char_combine(arg, '\u{327}'), "k" => last_char_combine(arg, '\u{328}'), "b" => last_char_combine(arg, '\u{332}'), "o" => last_char_combine(arg, '\u{338}'), "-" => String::new(), _ => { if let Some(arg) = arg { format!("\\{}{{{}}}", command, arg) } else { format!("\\{} ", command) } } } } /// Simplifies a chunk vector by collapsing neighboring Normal or Verbatim chunks. fn flatten(chunks: &mut Chunks) { let mut i = 1; loop { if i >= chunks.len() { break; } let merge = matches!( (&chunks[i - 1].v, &chunks[i].v), (Chunk::Normal(_), Chunk::Normal(_)) | (Chunk::Verbatim(_), Chunk::Verbatim(_)) ); if merge { let redundant = std::mem::replace( &mut chunks[i], Spanned::new(Chunk::Normal("".to_string()), 0..0), ); chunks[i - 1].v.get_mut().push_str(redundant.v.get()); chunks[i - 1].span.end = redundant.span.end; chunks.remove(i); } else { i += 1; } } } /// Characters that can be escaped. /// /// In read mode (`read_char = true`), colons are also converted to an unescaped /// string to keep compatibility with Zotero. Zotero escapes colons when /// exporting verbatim fields. This crate doesn't escape colons when exporting. /// /// List of reserved characters here /// http://latexref.xyz/Reserved-characters.html pub fn is_escapable(c: char, verb: bool, read_char: bool) -> bool { match c { '{' | '}' | '\\' => true, '~' | '^' | '#' | '&' | '%' | '$' | '_' if !verb => true, ':' if read_char => true, _ => false, } } /// Characters that are the name of a single-char command /// that automatically terminates. fn is_single_char_func(c: char) -> bool { matches!(c, '"' | '´' | '`' | '\'' | '^' | '~' | '=' | '.' | '\\' | '-') } #[cfg(test)] #[allow(non_snake_case)] mod tests { use crate::raw::Pair; use super::{parse_field, Chunk, RawChunk, Spanned}; fn N(s: &str) -> Chunk { Chunk::Normal(s.to_string()) } fn V(s: &str) -> Chunk { Chunk::Verbatim(s.to_string()) } fn M(s: &str) -> Chunk { Chunk::Math(s.to_string()) } fn z(c: RawChunk) -> Spanned { Spanned::new(c, 0..0) } #[test] fn test_process() { let map: Vec<_> = [("abc", "ABC"), ("hi", "hello"), ("you", "person")] .into_iter() .map(|(k, v)| { Pair::new( Spanned::detached(k), Spanned::detached(vec![z(RawChunk::Normal(v))]), ) }) .collect(); let field = vec![ z(RawChunk::Abbreviation("abc")), z(RawChunk::Normal("good {TIMES}")), z(RawChunk::Abbreviation("hi")), z(RawChunk::Abbreviation("you")), z(RawChunk::Normal("last")), ]; let res = parse_field("", &field, &map).unwrap(); assert_eq!(res[0].v, N("ABCgood ")); assert_eq!(res[1].v, V("TIMES")); assert_eq!(res[2].v, N("hellopersonlast")); assert_eq!(res.len(), 3); } #[test] fn test_resolve_commands_and_escape() { let field = vec![z(RawChunk::Normal( "\\\"{A}ther und {\"\\LaTeX \"} {\\relax for you\\}}", ))]; let res = parse_field("", &field, &Vec::new()).unwrap(); assert_eq!(res[0].v, N("Äther und ")); assert_eq!(res[1].v, V("\"LaTeX\"")); assert_eq!(res[2].v, N(" ")); assert_eq!(res[3].v, V("\\relax for you}")); assert_eq!(res.len(), 4); let field = vec![z(RawChunk::Normal("M\\\"etal S\\= ound"))]; let res = parse_field("", &field, &Vec::new()).unwrap(); assert_eq!(res[0].v, N("Mëtal Sōund")); let field = vec![z(RawChunk::Normal(r"L\^{e} D\~{u}ng Tr\'{a}ng"))]; let res = parse_field("", &field, &Vec::new()).unwrap(); assert_eq!(res[0].v, N("Lê Dũng Tráng")); } #[test] fn test_math() { let field = vec![z(RawChunk::Normal( "The $11^{th}$ International Conference on How To Make \\$\\$", ))]; let res = parse_field("", &field, &Vec::new()).unwrap(); assert_eq!(res[0].v, N("The ")); assert_eq!(res[1].v, M("11^{th}")); assert_eq!(res[2].v, N(" International Conference on How To Make $$")); assert_eq!(res.len(), 3); } #[test] fn test_commands() { let field = vec![z(RawChunk::Normal("Bose\\textendash{}Einstein uses Win\\-dows"))]; let res = parse_field("", &field, &Vec::new()).unwrap(); assert_eq!(res[0].v, N("Bose–Einstein uses Windows")); } #[test] fn test_hyphens() { let field = vec![z(RawChunk::Normal("- Knitting A--Z --- A practical guide -----"))]; let res = parse_field("", &field, &Vec::new()).unwrap(); assert_eq!(res[0].v, N("- Knitting A–Z — A practical guide —–")); } } biblatex-0.10.0/src/types/date.rs000064400000000000000000001013311046102023000147110ustar 00000000000000use std::cmp::Ordering; use std::fmt::{self, Display, Formatter}; use std::str::FromStr; use crate::chunk::*; use crate::{Span, Spanned, Type, TypeError, TypeErrorKind}; use unscanny::Scanner; /// A date or a range of dates and their certainty and exactness. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Date { /// The date or the date range. pub value: DateValue, /// Indicates whether the sources are sure about the date. pub uncertain: bool, /// Indicates the specificity of the date value. pub approximate: bool, } /// A single date or a range of dates. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum DateValue { /// A single date. At(Datetime), /// A range of dates with known start, but open end. After(Datetime), /// A range of dates with open start, but known end. Before(Datetime), /// A range of dates with known start and end. Between(Datetime, Datetime), } /// Timezone-unaware date and time. /// /// Must specify a year and may specify month, day, and time. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Datetime { /// The year. /// /// AD years are counted starting from one and thus represented as exactly /// their year (e.g., 2000 AD is `2000`) whereas BC years are counted /// starting from zero downwards (e.g., 1000 BC is `999`) pub year: i32, /// The month (starting at zero). pub month: Option, /// The day (starting at zero). pub day: Option, /// The timezone-unaware time. pub time: Option