Skip to content

Commit

Permalink
feat: support parsing documents
Browse files Browse the repository at this point in the history
  • Loading branch information
Orion Gonzalez committed Jul 19, 2024
1 parent 25017a4 commit beb34a2
Showing 1 changed file with 21 additions and 2 deletions.
23 changes: 21 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ pub struct Builder<'a> {
strip_comments: bool,
id_prefix: Option<&'a str>,
generic_attribute_prefixes: Option<HashSet<&'a str>>,
is_document: bool,
}

impl<'a> Default for Builder<'a> {
Expand Down Expand Up @@ -486,6 +487,7 @@ impl<'a> Default for Builder<'a> {
strip_comments: true,
id_prefix: None,
generic_attribute_prefixes: None,
is_document: false,
}
}
}
Expand Down Expand Up @@ -1705,6 +1707,16 @@ impl<'a> Builder<'a> {
}
}

/// Use this to parse a full document instead of a document fragment (like a div)
pub fn parse_as_document(&mut self) -> &mut Self {
self.is_document = true;
// TODO: expand on this
self.add_tags(["base", "body", "head", "title"])
.add_tag_attributes("meta", ["name", "content"])
.add_tag_attributes("html", ["lang"]);
self
}

/// Sanitizes an HTML fragment in a string according to the configured options.
///
/// # Examples
Expand All @@ -1725,7 +1737,11 @@ impl<'a> Builder<'a> {
/// # }
/// # fn main() { do_main().unwrap() }
pub fn clean(&self, src: &str) -> Document {
let parser = Self::make_parser();
let parser = if self.is_document {
html::parse_document(RcDom::default(), html::ParseOpts::default())
} else {
Self::make_parser()
};
let dom = parser.one(src);
self.clean_dom(dom)
}
Expand Down Expand Up @@ -1788,7 +1804,10 @@ impl<'a> Builder<'a> {
.is_none());
}
for tag_name in &self.clean_content_tags {
assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time");
assert!(
!self.tags.contains(tag_name),
"`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time"
);
assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time");
}
let body = {
Expand Down

0 comments on commit beb34a2

Please sign in to comment.