aboutsummaryrefslogtreecommitdiff
path: root/src/api/icons.rs
diff options
context:
space:
mode:
authorDaniel García <[email protected]>2021-02-07 22:28:02 +0100
committerDaniel García <[email protected]>2021-02-07 22:28:02 +0100
commitc836f88ff2a7f94ee7427ae04b91e702a31ab52a (patch)
tree804ae38cc16134a63829a7de62f81c3578375673 /src/api/icons.rs
parent8b660ae090179248544c9dc713a5ae2d896aad37 (diff)
downloadvaultwarden-c836f88ff2a7f94ee7427ae04b91e702a31ab52a.tar.gz
vaultwarden-c836f88ff2a7f94ee7427ae04b91e702a31ab52a.zip
Remove soup and use a newer html5ever directly
Diffstat (limited to 'src/api/icons.rs')
-rw-r--r--src/api/icons.rs73
1 files changed, 46 insertions, 27 deletions
diff --git a/src/api/icons.rs b/src/api/icons.rs
index 60e5fc07..5abcf375 100644
--- a/src/api/icons.rs
+++ b/src/api/icons.rs
@@ -11,7 +11,6 @@ use once_cell::sync::Lazy;
use regex::Regex;
use reqwest::{blocking::Client, blocking::Response, header, Url};
use rocket::{http::ContentType, http::Cookie, response::Content, Route};
-use soup::prelude::*;
use crate::{error::Error, util::Cached, CONFIG};
@@ -332,6 +331,42 @@ impl Icon {
}
}
+fn get_favicons_node(node: &std::rc::Rc<markup5ever_rcdom::Node>, icons: &mut Vec<Icon>, url: &Url) {
+ if let markup5ever_rcdom::NodeData::Element { name, attrs, .. } = &node.data {
+ if name.local.as_ref() == "link" {
+ let mut has_rel = false;
+ let mut href = None;
+ let mut sizes = None;
+
+ let attrs = attrs.borrow();
+ for attr in attrs.iter() {
+ let attr_name = attr.name.local.as_ref();
+ let attr_value = attr.value.as_ref();
+
+ if attr_name == "rel" && ICON_REL_REGEX.is_match(attr_value) {
+ has_rel = true;
+ } else if attr_name == "href" {
+ href = Some(attr_value);
+ } else if attr_name == "sizes" {
+ sizes = Some(attr_value);
+ }
+ }
+
+ if has_rel && href.is_some() {
+ if let Ok(full_href) = url.join(&href.unwrap()).map(|h| h.into_string()) {
+ let priority = get_icon_priority(&full_href, sizes);
+ icons.push(Icon::new(priority, full_href));
+ }
+ }
+ }
+ }
+
+ // TODO: Might want to limit the recursion depth?
+ for child in node.children.borrow().iter() {
+ get_favicons_node(child, icons, url);
+ }
+}
+
struct IconUrlResult {
iconlist: Vec<Icon>,
cookies: String,
@@ -431,30 +466,14 @@ fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
// 512KB should be more than enough for the HTML, though as we only really need
// the HTML header, it could potentially be reduced even further
- let limited_reader = content.take(512 * 1024);
-
- let soup = Soup::from_reader(limited_reader)?;
- // Search for and filter
- let favicons = soup
- .tag("link")
- .attr("rel", ICON_REL_REGEX.clone()) // Only use icon rels
- .attr_name("href") // Make sure there is a href
- .find_all();
-
- // Loop through all the found icons and determine it's priority
- for favicon in favicons {
- let sizes = favicon.get("sizes");
- let href = favicon.get("href").unwrap();
- // Skip invalid url's
- let full_href = match url.join(&href) {
- Ok(h) => h.into_string(),
- _ => continue,
- };
-
- let priority = get_icon_priority(&full_href, sizes);
-
- iconlist.push(Icon::new(priority, full_href))
- }
+ let mut limited_reader = content.take(512 * 1024);
+
+ use html5ever::tendril::TendrilSink;
+ let dom = html5ever::parse_document(markup5ever_rcdom::RcDom::default(), Default::default())
+ .from_utf8()
+ .read_from(&mut limited_reader)?;
+
+ get_favicons_node(&dom.document, &mut iconlist, &url);
} else {
// Add the default favicon.ico to the list with just the given domain
iconlist.push(Icon::new(35, format!("{}/favicon.ico", ssldomain)));
@@ -506,7 +525,7 @@ fn get_page_with_cookies(url: &str, cookie_str: &str, referer: &str) -> Result<R
/// priority1 = get_icon_priority("http://example.com/path/to/a/favicon.png", "32x32");
/// priority2 = get_icon_priority("https://example.com/path/to/a/favicon.ico", "");
/// ```
-fn get_icon_priority(href: &str, sizes: Option<String>) -> u8 {
+fn get_icon_priority(href: &str, sizes: Option<&str>) -> u8 {
// Check if there is a dimension set
let (width, height) = parse_sizes(sizes);
@@ -554,7 +573,7 @@ fn get_icon_priority(href: &str, sizes: Option<String>) -> u8 {
/// let (width, height) = parse_sizes("x128x128"); // (128, 128)
/// let (width, height) = parse_sizes("32"); // (0, 0)
/// ```
-fn parse_sizes(sizes: Option<String>) -> (u16, u16) {
+fn parse_sizes(sizes: Option<&str>) -> (u16, u16) {
let mut width: u16 = 0;
let mut height: u16 = 0;