tl-0.7.8/.cargo_vcs_info.json 0000644 00000000136 00000000001 0011467 0 ustar {
"git": {
"sha1": "604f644ada25d8aac78f423b4117932d766d6b79"
},
"path_in_vcs": ""
} tl-0.7.8/.gitignore 0000644 0000000 0000000 00000000036 10461020230 0012246 0 ustar 0000000 0000000 /target
Cargo.lock
*.html
*.js tl-0.7.8/CHANGELOG.md 0000644 0000000 0000000 00000012606 10461020230 0012075 0 ustar 0000000 0000000 Changes annotated with `⚠` are breaking.
# 0.7.8
- Fixes a build error if compiled with the `simd` feature flag. See [y21/tl#60]
- Fixes MDN-related doc comments ([y21/tl#51])
# 0.7.7
- Fixes a bug in the query selector parser that made it fail to parse values containing `:`. See [y21/tl#46](https://github.com/y21/tl/issues/46) and [y21/tl#47] for more details.
# 0.7.6
- Fixes a build error if compiled with the `simd` feature flag. See [y21/tl#41](https://github.com/y21/tl/issues/41) for more details.
- ⚠ In prior versions, `innerHTML()` actually had the behavior of `Element#outerHTML`. This was changed and `innerHTML` now correctly only returns the markup of its subnodes, and not the markup of the own node.
- `outerHTML()` was added to nodes, which moves the old behavior to another function.
- Added `children_mut()`, which allows mutating the subnodes of an HTML Tag.
# 0.7.5
- Fixed a bug that caused the parser to parse closing tags incorrectly. See [y21/tl#37](https://github.com/y21/tl/issues/37) and [y21/tl#38](https://github.com/y21/tl/pull/38) for more details.
# 0.7.4
- Restructure internals (mainly SIMD functions)
- Add fuzzing targets for internals
- Optimize stable parser (adds stable alternatives when the `simd` feature isn't set)
# 0.7.3
- Fixed `HTMLTag::raw()` returning one byte less than it should have. See [y21/tl#31](https://github.com/y21/tl/issues/31).
# 0.7.2
- Add `Attributes::contains(key)` to check if an attribute exists.
- Add `Attributes::remove(key)` to remove an attribute.
- Add `Attributes::remove_value(key)` to delete the value of a given attribute key.
# 0.7.1
- Version bump in README.md
# 0.7.0
> **Warning: This release contains breaking changes**
- ⚠ Function signature of `Attributes::insert` has changed:
- It now takes two generic parameters `K, V` instead of just one.
Prior to this version, this meant that the key and value type had to match.
See [y21/tl#27](https://github.com/y21/tl/pull/26) for more details.
- Added a `TryFrom for Bytes` implementation for convenience to create owned `Bytes`.
- Added `HTMLTag::boundaries` method for obtaining the start and end position of a tag in the source string.
- Fixed a panic when source string abruptly ends with ``) is interpreted as `>` and causes the next `>` to be interpreted as a text node on its own.
# 0.6.1
- Fixed an off-by-one error in the `QueryIterable` trait implementation for `HTMLTag` that caused query selectors on HTML tags to return one node less than they should.
# 0.6.0
> **Warning: This release contains breaking changes**
- ⚠ Removed deprecated method `VDom::find_node`
- Alternative: use `VDom::nodes().iter().find(...)` instead
- ⚠ `Attributes::get()` now returns a reference to `Bytes` instead of cloning.
- Prior to this version, it wasn't necessary to return a reference as the
`Bytes` type was just an immutable `&[u8]`. Now it can hold owned data.
- ⚠ `HTMLTag::children()` no longer returns an iterator, and instead returns a wrapper struct around the children of the HTML tag.
This wrapper struct makes it easy to obtain direct children of the tag (`Children::top()`),
or all children (including their children, etc...) (`Children::all()`).
- ⚠ `Node::children()` no longer returns an iterator (see above).
- ⚠ `HTMLTag::name()` now returns a reference to `Bytes` instead of cloning (see above).
- Ability to create/parse query selectors independent of any parser (`tl::parse_query_selector`)
- Ability to reuse query selectors
- Ability to apply query selectors on `HTMLTag`s (see [#18](https://github.com/y21/tl/issues/18))
- `queryselector` module is now public
- `InnerNodeHandle` is now u32
- Remove unused `max_depth` parser option
- Add convenience `PartialEq` and `PartialEq<[u8]>` impls for Bytes
# 0.5.0
> **Warning: This release contains breaking changes**
- Allow `Bytes` to store owned data through `Bytes::set()`
- ⚠ The maximum length for `Bytes` is `u32::MAX`
- ⚠ `tl::parse()` now returns `Result, ParseError>`
- ⚠ `Attributes` fields are no longer public, instead use one of the provided methods
- ⚠ `HTMLTag::inner_html()` now takes a `&Parser` and no longer directly returns the substring
- Node mutations to the tag or any of its subnodes means `inner_html` needs to be recomputed
- Consider using `HTMLTag::raw()` if you never mutate any nodes
# 0.4.4
- Parse unquoted attribute values properly (``) [#12]
- Parse valueless attributes properly (`
"#;
let dom = parse(input, ParserOptions::default()).unwrap();
let parser = dom.parser();
let element = dom
.nodes()
.iter()
.find(|x| x.as_tag().map_or(false, |x| x.name().eq("a")));
assert_eq!(element.map(|x| x.inner_text(parser)), Some("nested".into()));
}
#[test]
fn fuzz() {
// Some tests that would previously panic or end in an infinite loop
// We don't need to assert anything here, just see that they finish
parse("J\x00<", ParserOptions::default()).unwrap();
parse("".repeat(count), ParserOptions::default()).unwrap();
}
#[test]
fn mutate_dom() {
let input = r#""#;
let mut dom = parse(input, ParserOptions::default()).unwrap();
let mut selector = dom.query_selector("[src]").unwrap();
let handle = selector.next().unwrap();
let parser = dom.parser_mut();
let el = handle.get_mut(parser).unwrap();
let tag = el.as_tag_mut().unwrap();
let attr = tag.attributes_mut();
let bytes = attr.get_mut("src").flatten().unwrap();
bytes.set("world.png").unwrap();
assert_eq!(attr.get("src"), Some(Some(&"world.png".into())));
}
mod simd {
// These tests make sure that SIMD functions do the right thing
#[test]
fn matches_case_insensitive_test() {
assert!(crate::simd::matches_case_insensitive(b"hTmL", *b"html"));
assert!(!crate::simd::matches_case_insensitive(b"hTmLs", *b"html"));
assert!(!crate::simd::matches_case_insensitive(b"hTmy", *b"html"));
assert!(!crate::simd::matches_case_insensitive(b"/Tmy", *b"html"));
}
#[test]
fn string_search() {
assert_eq!(crate::simd::find(b"a", b' '), None);
assert_eq!(crate::simd::find(b"", b' '), None);
assert_eq!(crate::simd::find(b"a ", b' '), Some(1));
assert_eq!(crate::simd::find(b"abcd ", b' '), Some(4));
assert_eq!(crate::simd::find(b"ab cd ", b' '), Some(2));
assert_eq!(crate::simd::find(b"abcdefgh ", b' '), Some(8));
assert_eq!(crate::simd::find(b"abcdefghi ", b' '), Some(9));
assert_eq!(crate::simd::find(b"abcdefghi", b' '), None);
assert_eq!(crate::simd::find(b"abcdefghiabcdefghi .", b' '), Some(18));
assert_eq!(crate::simd::find(b"abcdefghiabcdefghi.", b' '), None);
let count = if cfg!(miri) { 500usize } else { 1000usize };
let long = "a".repeat(count) + "b";
assert_eq!(crate::simd::find(long.as_bytes(), b'b'), Some(count));
}
#[test]
fn string_search_4() {
const NEEDLE: [u8; 4] = [b'a', b'b', b'c', b'd'];
assert_eq!(crate::simd::find4(b"e", NEEDLE), None);
assert_eq!(crate::simd::find4(b"a", NEEDLE), Some(0));
assert_eq!(crate::simd::find4(b"ea", NEEDLE), Some(1));
assert_eq!(crate::simd::find4(b"ef", NEEDLE), None);
assert_eq!(crate::simd::find4(b"ef a", NEEDLE), Some(3));
assert_eq!(crate::simd::find4(b"ef g", NEEDLE), None);
assert_eq!(crate::simd::find4(b"ef ghijk", NEEDLE), None);
assert_eq!(crate::simd::find4(b"ef ghijkl", NEEDLE), None);
assert_eq!(crate::simd::find4(b"ef ghijkla", NEEDLE), Some(9));
assert_eq!(crate::simd::find4(b"ef ghiajklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find4(b"ef ghibjklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find4(b"ef ghicjklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find4(b"ef ghidjklm", NEEDLE), Some(6));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstua", NEEDLE), Some(18));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstub", NEEDLE), Some(18));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstuc", NEEDLE), Some(18));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstud", NEEDLE), Some(18));
assert_eq!(crate::simd::find4(b"ef ghijklmnopqrstu", NEEDLE), None);
}
#[test]
#[rustfmt::skip]
fn search_non_ident() {
assert_eq!(crate::simd::search_non_ident(b"this-is-a-very-long-identifier<"), Some(30));
assert_eq!(crate::simd::search_non_ident(b"0123456789Abc_-<"), Some(15));
assert_eq!(crate::simd::search_non_ident(b"0123456789Abc-<"), Some(14));
assert_eq!(crate::simd::search_non_ident(b"0123456789Abcdef_-<"), Some(18));
assert_eq!(crate::simd::search_non_ident(b""), None);
assert_eq!(crate::simd::search_non_ident(b"short"), None);
assert_eq!(crate::simd::search_non_ident(b"short_<"), Some(6));
assert_eq!(crate::simd::search_non_ident(b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_"), None);
assert_eq!(crate::simd::search_non_ident(b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_<"), Some(64));
assert_eq!(crate::simd::search_non_ident(b"0123456789ab
let mut x4 = x1.clone();
x4.set(vec![0u8, 1, 2, 3, 4].into_boxed_slice()).unwrap(); // Box<[u8]>
let mut x5 = x1.clone();
x5.set(String::from("Tests are important")).unwrap(); // String
}
}
#[test]
fn valueless_attribute() {
// https://github.com/y21/tl/issues/11
let input = r#"
"#;
let dom = parse(input, ParserOptions::default()).unwrap();
let element = dom.get_element_by_id("u54423");
assert!(element.is_some());
}
#[test]
fn unquoted() {
// https://github.com/y21/tl/issues/12
let input = r#"
Hello World
"#;
let dom = parse(input, ParserOptions::default()).unwrap();
let parser = dom.parser();
let element = dom.get_element_by_id("u54423");
assert_eq!(
element.and_then(|x| x.get(parser).map(|x| x.inner_text(parser))),
Some("Hello World".into())
);
}
mod query_selector {
use super::*;
#[test]
fn query_selector_simple() {
let input = "
hello
";
let dom = parse(input, ParserOptions::default()).unwrap();
let parser = dom.parser();
let mut selector = dom.query_selector(".hi").unwrap();
let el = force_as_tag(selector.next().and_then(|x| x.get(parser)).unwrap());
assert_eq!(dom.nodes().len(), 3);
assert_eq!(el.inner_text(parser), "hello");
}
#[test]
fn tag_query_selector() {
// empty
let dom = parse("", ParserOptions::default()).unwrap();
let parser = dom.parser();
let selector = dom.nodes()[0]
.as_tag()
.unwrap()
.query_selector(parser, "div.z")
.unwrap();
assert_eq!(selector.count(), 0);
// one child
let dom = parse(
r#"
PASS
"#,
ParserOptions::default(),
)
.unwrap();
let parser = dom.parser();
let mut selector = dom.nodes()[0]
.as_tag()
.unwrap()
.query_selector(parser, "div.z")
.unwrap();
assert_eq!(selector.clone().count(), 1);
assert_eq!(
selector
.next()
.unwrap()
.get(parser)
.unwrap()
.inner_text(parser),
"PASS"
);
// nested
let dom = parse(
r#"
PASS
"#,
ParserOptions::default(),
)
.unwrap();
let parser = dom.parser();
let mut selector = dom.nodes()[0]
.as_tag()
.unwrap()
.query_selector(parser, "div.y")
.unwrap();
assert_eq!(selector.clone().count(), 1);
assert_eq!(
selector
.next()
.unwrap()
.get(parser)
.unwrap()
.inner_text(parser),
"PASS"
);
}
#[test]
fn query_selector_with_quote() {
let input = r#""#;
let dom = parse(input, ParserOptions::default()).unwrap();
let parser = dom.parser();
let node_option = dom
.query_selector(r#"meta[property="og:title"]"#)
.and_then(|mut iter| iter.next());
let value = if let Some(node) = node_option {
Some(
node.get(parser)
.unwrap()
.as_tag()
.unwrap()
.attributes()
.get("content")
.flatten()
.unwrap()
.try_as_utf8_str()
.unwrap()
.to_string(),
)
} else {
None
};
assert_eq!(value, Some("hello".to_string()));
}
}
#[test]
fn nodes_order() {
let input = r#"
test
test2
"#
.trim();
let dom = parse(input, Default::default()).unwrap();
let nodes = dom.nodes();
// 5 nodes in total
assert_eq!(nodes.len(), 5);
// First node is
assert_eq!(&nodes[0].as_tag().unwrap()._name, "p");
// Second node is inner text of
: test
assert_eq!(nodes[1].as_raw().unwrap().as_bytes(), b"test");
// Third node is
assert_eq!(&nodes[2].as_tag().unwrap()._name, "div");
// Fourth node is inner node
assert_eq!(&nodes[3].as_tag().unwrap()._name, "span");
// Fifth node is inner text of : test2
assert_eq!(nodes[4].as_raw().unwrap().as_bytes(), b"test2");
}
#[test]
fn comment() {
let dom = parse("", Default::default()).unwrap();
let nodes = dom.nodes();
assert_eq!(nodes.len(), 1);
assert_eq!(
nodes[0].as_comment().unwrap().as_utf8_str(),
""
);
}
#[test]
fn tag_all_children() {
fn assert_len(input: &str, len: usize) {
let dom = parse(input, Default::default()).unwrap();
let el = dom.nodes()[0].as_tag().unwrap();
assert_eq!(el.children().all(dom.parser()).len(), len);
}
fn assert_last(input: &str, last: &str) {
let dom = parse(input, Default::default()).unwrap();
let el = dom.nodes()[0].as_tag().unwrap();
assert_eq!(
el.children()
.all(dom.parser())
.last()
.unwrap()
.inner_text(dom.parser()),
last
);
}
assert_len(r#""#, 0);
assert_len(r#"
a
"#, 1);
assert_len(r#"
"#, 1);
assert_len(r#"
a
"#, 2);
assert_len(r#"
"#, 2);
assert_len(r#"
a
"#, 3);
assert_last(r#"
a
"#, "a");
assert_last(r#"
a
"#, "a");
assert_last(r#"
b
a
"#, "a");
assert_last(r#"
b
a
"#, "a");
}
#[test]
fn assert_length() {
fn assert_len(input: &str, selector: &str, len: usize) {
let dom = parse(input, Default::default()).unwrap();
let el = dom.nodes()[0].as_tag().unwrap();
let query = el.query_selector(dom.parser(), selector).unwrap();
assert_eq!(query.count(), len);
}
assert_len("", "a", 0);
assert_len("
", "a", 1);
assert_len("
", "a", 2);
assert_len("
", "span", 1);
}
#[test]
fn self_closing_no_child() {
let dom = parse("
test
", Default::default()).unwrap();
let nodes = dom.nodes();
assert_eq!(nodes.len(), 3);
assert_eq!(nodes[0].as_tag().unwrap()._children.len(), 0);
assert_eq!(nodes[0].as_tag().unwrap().raw(), " ");
}
#[test]
fn insert_attribute_owned() {
// https://github.com/y21/tl/issues/27
let mut attr = Attributes::new();
let style = "some style".to_string();
attr.insert("style", Some(Bytes::try_from(style).unwrap()));
assert_eq!(attr.get("style"), Some(Some(&"some style".into())));
}
#[test]
fn boundaries() {
// https://github.com/y21/tl/issues/25
let dom = parse("
haha
", Default::default()).unwrap();
let span = dom.nodes()[1].as_tag().unwrap();
let boundary = span.boundaries(dom.parser());
assert_eq!(boundary, (5, 15));
}
#[test]
fn attributes_remove_inner_html() {
let mut dom = parse(
"testing",
Default::default(),
)
.unwrap();
dom.nodes_mut()[0]
.as_tag_mut()
.unwrap()
.attributes_mut()
.remove_value("contenteditable");
assert_eq!(dom.outer_html(), "testing");
dom.nodes_mut()[0]
.as_tag_mut()
.unwrap()
.attributes_mut()
.remove("contenteditable");
assert_eq!(dom.outer_html(), "testing");
}
#[test]
fn tag_raw() {
let input = "
abcd
";
let vdom = parse(input, Default::default()).unwrap();
let first_tag = vdom.children()[0]
.get(vdom.parser())
.unwrap()
.as_tag()
.unwrap();
let from_raw = first_tag.raw().try_as_utf8_str().unwrap();
assert_eq!(from_raw, "
abcd
");
}
#[test]
fn tag_raw_abrupt_stop() {
let input = "