add scraping

This commit is contained in:
annieversary 2022-08-16 17:49:32 +01:00
parent 4cdb7919b2
commit 3241f5570d
3 changed files with 52 additions and 0 deletions

View File

@ -7,9 +7,11 @@ edition = "2021"
default = []
inventory = ["dep:inventory"]
scraping = ["dep:scraper"]
[dependencies]
inventory = { version = "0.3", optional = true }
scraper = { version = "0.13.0", optional = true }
tracing = "0.1.35"
[[example]]

View File

@ -16,6 +16,9 @@ mod tests;
#[macro_use]
pub mod inventory;
#[cfg(feature = "scraping")]
pub mod scraping;
/// used to generate css out of classes
///
/// contains shorthands and replacements that can be modified

47
src/scraping.rs Normal file
View File

@ -0,0 +1,47 @@
use scraper::{ElementRef, Html};
/// Gets all classes from an html
///
/// ```
/// # use zephyr::{*, scraping::*};
/// # fn main() {
/// let c = get_classes("<h1 class=\"m[1rem]\">Hello world!</h1>");
/// let z = Zephyr::new();
/// let css = z.generate_classes(c.iter().map(String::as_str));
/// # }
/// ```
pub fn get_classes(html: &str) -> Vec<String> {
let document = Html::parse_document(html);
let mut classes = vec![];
let mut queue: Vec<ElementRef> = vec![document.root_element()];
while let Some(handle) = queue.pop() {
let el = handle.value();
if let Some(c) = el.attr("class") {
classes.push(c.to_string());
}
for child in handle.children() {
if let Some(child) = ElementRef::wrap(child) {
queue.push(child);
}
}
}
classes
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse() {
let c = get_classes(
"<h1 class=\"hey hello\">Hello, <i class=\"hiii\">world!</i></h1>
<h1 class=\"hey hello\">Hello, <i class=\"hiii\">world!</i></h1>",
);
assert_eq!(c, vec!["hey hello", "hiii", "hey hello", "hiii",]);
}
}