From 64b71087d8ac527be7354c322a5e1f0e5eddaa6a Mon Sep 17 00:00:00 2001 From: mindfreq <144544047+mindfreq@users.noreply.github.com> Date: Sat, 2 May 2026 12:17:23 +0200 Subject: [PATCH] add parser --- src-tauri/Cargo.lock | 214 ++++++++++++++++++++++++++++++-- src-tauri/Cargo.toml | 3 + src-tauri/exam.atom | 98 +++++++++++++++ src-tauri/feeds.json | 1 + src-tauri/src/client.rs | 3 +- src-tauri/src/commands.rs | 1 + src-tauri/src/commands/feeds.rs | 29 ++++- src-tauri/src/config.rs | 63 ++++++++++ src-tauri/src/lib.rs | 42 +++++++ src-tauri/src/main.rs | 6 +- src-tauri/src/parser.rs | 143 +++++++++++++++------ 11 files changed, 544 insertions(+), 59 deletions(-) create mode 100644 src-tauri/exam.atom create mode 100644 src-tauri/feeds.json create mode 100644 src-tauri/src/commands.rs create mode 100644 src-tauri/src/config.rs diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 32348b6..feb3055 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -625,6 +625,19 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf 0.11.3", + "smallvec", +] + [[package]] name = "cssparser" version = "0.36.0" @@ -719,6 +732,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_more" +version = "0.99.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -824,12 +848,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521e380c0c8afb8d9a1e83a1822ee03556fc3e3e7dbc1fd30be14e37f9cb3f89" dependencies = [ "bit-set", - "cssparser", + "cssparser 0.36.0", "foldhash 0.2.0", - "html5ever", + "html5ever 0.38.0", "precomputed-hash", - "selectors", - "tendril", + "selectors 0.36.1", + "tendril 0.5.0", ] [[package]] @@ -883,6 +907,12 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "ego-tree" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" + [[package]] name = "embed-resource" version = "3.0.9" @@ -1105,6 +1135,16 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures-channel" version = "0.3.32" @@ -1189,6 +1229,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "gdk" version = "0.18.2" @@ -1298,6 +1347,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.17" @@ -1550,6 +1608,18 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "html5ever" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" +dependencies = [ + "log", + "mac", + "markup5ever 0.14.1", + "match_token", +] + [[package]] name = "html5ever" version = "0.38.0" @@ -1557,7 +1627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1054432bae2f14e0061e33d23402fbaa67a921d319d56adc6bcf887ddad1cbc2" dependencies = [ "log", - "markup5ever", + "markup5ever 0.38.0", ] [[package]] @@ -2131,6 +2201,26 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" +dependencies = [ + "log", + "phf 0.11.3", + "phf_codegen 0.11.3", + "string_cache 0.8.9", + "string_cache_codegen 0.5.4", + "tendril 0.4.3", +] + [[package]] name = "markup5ever" version = "0.38.0" @@ -2138,10 +2228,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8983d30f2915feeaaab2d6babdd6bc7e9ed1a00b66b5e6d74df19aa9c0e91862" dependencies = [ "log", - "tendril", + "tendril 0.5.0", "web_atoms", ] +[[package]] +name = "match_token" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "mediatype" version = "0.19.20" @@ -2609,6 +2710,16 @@ dependencies = [ "serde", ] +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + [[package]] name = "phf_codegen" version = "0.13.1" @@ -3140,6 +3251,7 @@ dependencies = [ "feed-rs", "once_cell", "reqwest", + "scraper", "serde", "serde_json", "specta", @@ -3147,7 +3259,9 @@ dependencies = [ "tauri-build", "tauri-plugin-opener", "tauri-specta", + "thiserror 2.0.18", "tokio", + "uuid", ] [[package]] @@ -3334,6 +3448,21 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc3d051b884f40e309de6c149734eab57aa8cc1347992710dc80bcc1c2194c15" +dependencies = [ + "cssparser 0.34.0", + "ego-tree", + "getopts", + "html5ever 0.29.1", + "precomputed-hash", + "selectors 0.26.0", + "tendril 0.4.3", +] + [[package]] name = "security-framework" version = "3.7.0" @@ -3357,6 +3486,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" +dependencies = [ + "bitflags 2.11.1", + "cssparser 0.34.0", + "derive_more 0.99.20", + "fxhash", + "log", + "new_debug_unreachable", + "phf 0.11.3", + "phf_codegen 0.11.3", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "selectors" version = "0.36.1" @@ -3364,12 +3512,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5d9c0c92a92d33f08817311cf3f2c29a3538a8240e94a6a3c622ce652d7e00c" dependencies = [ "bitflags 2.11.1", - "cssparser", - "derive_more", + "cssparser 0.36.0", + "derive_more 2.1.1", "log", "new_debug_unreachable", "phf 0.13.1", - "phf_codegen", + "phf_codegen 0.13.1", "precomputed-hash", "rustc-hash", "servo_arc", @@ -3718,6 +3866,19 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "string_cache" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared 0.11.3", + "precomputed-hash", + "serde", +] + [[package]] name = "string_cache" version = "0.9.0" @@ -3730,6 +3891,18 @@ dependencies = [ "precomputed-hash", ] +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", + "proc-macro2", + "quote", +] + [[package]] name = "string_cache_codegen" version = "0.6.1" @@ -4190,6 +4363,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "tendril" version = "0.5.0" @@ -4647,6 +4831,12 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -4912,9 +5102,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7cff6eef815df1834fd250e3a2ff436044d82a9f1bc1980ca1dbdf07effc538" dependencies = [ "phf 0.13.1", - "phf_codegen", - "string_cache", - "string_cache_codegen", + "phf_codegen 0.13.1", + "string_cache 0.9.0", + "string_cache_codegen 0.6.1", ] [[package]] diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 7f8c5a6..d6cafb8 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -28,3 +28,6 @@ once_cell = "1" tokio = { version = "1", features = ["full"] } specta = { version = "=2.0.0-rc.22" } tauri-specta = { version = "=2.0.0-rc.21", features = ["derive", "typescript"] } +thiserror = "2.0.18" +uuid = { version = "1", features = ["v4"] } +scraper = "0.22" diff --git a/src-tauri/exam.atom b/src-tauri/exam.atom new file mode 100644 index 0000000..ce7ac27 --- /dev/null +++ b/src-tauri/exam.atom @@ -0,0 +1,98 @@ + + + + + Example Tech Blog + + + urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 + 2024-03-15T14:30:00Z + + + Latest posts about tech, programming, and web development + + Jane Developer + jane@example.com + https://example.com/jane + + © 2024 Example Tech Blog. All rights reserved. + Example CMS + https://example.com/favicon.ico + https://example.com/logo.png + + + + Getting Started with Rust + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2024-03-15T14:30:00Z + 2024-03-14T09:00:00Z + + + Jane Developer + jane@example.com + + + + + + + <h1>Getting Started with Rust</h1> + <p>Rust is a systems programming language that runs blazingly fast...</p> + <pre><code>fn main() { + println!("Hello, world!"); +} +</code></pre> + + + Learn the basics of Rust programming language in this beginner-friendly tutorial. + © 2024 Jane Developer + + + + + CSS Grid Layout Mastery + + + + urn:uuid:d8f3e7a2-9b4c-4f8e-9a3c-1e4f7b9d2c5a + 2024-03-10T11:15:00Z + 2024-03-08T16:20:00Z + + + John Smith + https://example.com/john-smith + + + + Maria Garcia + maria@example.com + + + + <div xmlns="http://www.w3.org/1999/xhtml"> + <h1>CSS Grid Layout Mastery</h1> + <p>CSS Grid is a powerful layout system...</p> + </div> + + + + + + New Version Released: v2.0.0 + + tag:example.com,2024-03-01:/blog/version-2-release + 2024-03-01T08:00:00Z + 2024-03-01T08:00:00Z + + Release Team + + + Version 2.0.0 is now available! Includes performance improvements and bug fixes. + + + + \ No newline at end of file diff --git a/src-tauri/feeds.json b/src-tauri/feeds.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/src-tauri/feeds.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/src-tauri/src/client.rs b/src-tauri/src/client.rs index 2ceb415..ffed60e 100644 --- a/src-tauri/src/client.rs +++ b/src-tauri/src/client.rs @@ -1,4 +1,3 @@ - use once_cell::sync::Lazy; pub static CLIENT: Lazy = Lazy::new(|| { @@ -6,4 +5,4 @@ pub static CLIENT: Lazy = Lazy::new(|| { .timeout(std::time::Duration::from_secs(10)) .build() .expect("Failed to build client") -}); \ No newline at end of file +}); diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs new file mode 100644 index 0000000..abf705f --- /dev/null +++ b/src-tauri/src/commands.rs @@ -0,0 +1 @@ +pub mod feeds; diff --git a/src-tauri/src/commands/feeds.rs b/src-tauri/src/commands/feeds.rs index 7ba63de..f90ced6 100644 --- a/src-tauri/src/commands/feeds.rs +++ b/src-tauri/src/commands/feeds.rs @@ -1,7 +1,28 @@ +use crate::Error; + use crate::client::CLIENT; +use crate::parser::{FeedItemDetail, FeedItem}; +pub async fn get_summaries() -> Result, Error> { + let xml = CLIENT + .get("https://blog.rust-lang.org/feed") + .send() + .await? + .text() + .await?; + let parser_xml = FeedItem::from_feed(&xml); + println!("{:?}", parser_xml); + parser_xml +} -async fn get_feed() { - let rss = CLIENT.get("https://blog.rust-lang.org/feed").send().await.unwrap(); - println!("{}", res.body); -} \ No newline at end of file +pub async fn get_entry(taget_url: &str) -> Result { + let xml = CLIENT + .get("https://blog.rust-lang.org/feed") + .send() + .await? + .text() + .await?; + let parser_xml = FeedItemDetail::from_feed(&xml, taget_url); + println!("{:?}", parser_xml); + parser_xml +} diff --git a/src-tauri/src/config.rs b/src-tauri/src/config.rs new file mode 100644 index 0000000..5e081f3 --- /dev/null +++ b/src-tauri/src/config.rs @@ -0,0 +1,63 @@ +use crate::Error; +use serde::{Deserialize, Serialize}; +use std::fs; + +#[derive(Serialize, Deserialize, Debug)] +struct Feed { + id: String, + title: String, + url: String, + icon: String, +} + +impl Feed { + fn get(url: &str) -> Result { + let feeds = Self::get_content()?; + feeds + .into_iter() + .find(|feed| feed.url == url) + .ok_or_else(|| Error::MissingField(format!("Feed with url '{}' not found", url))) + } + fn get_all() -> Result, Error> { + Self::get_content() + } + + fn add(title: String, url: String, icon: String) -> Result { + let mut feeds = Self::get_content()?; + let new_feed = Self { + id: uuid::Uuid::new_v4().to_string(), + title, + url, + icon, + }; + feeds.push(new_feed); + let json = serde_json::to_string_pretty(&feeds)?; + fs::write("feeds.json", json)?; + + Ok(feeds.pop().unwrap()) + } + + fn remove(url: &str) -> Result { + let mut feeds = Self::get_content()?; + let index = feeds.iter() + .position(|feed| feed.url == url) + .ok_or_else(|| Error::MissingField(format!("Feed with url '{}' not found", url)))?; + + let removed = feeds.remove(index); + let json = serde_json::to_string_pretty(&feeds)?; + fs::write("feeds.json", json)?; + + Ok(removed) + + } + + fn get_content() -> Result, Error> { + let path = std::path::Path::new("feeds.json"); + if !path.exists() { + fs::write(path, "[]")?; + } + + let data = fs::read_to_string(&path)?; + Ok(serde_json::from_str(&data)?) + } +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 4a277ef..c3096ac 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -1,3 +1,16 @@ +use feed_rs::parser::ParseFeedError; + +pub mod client; +pub mod commands; +pub mod config; +pub mod parser; + +use client::CLIENT; + +pub async fn test_thing() -> Result<(), Error> { + +} + // Learn more about Tauri commands at https://tauri.app/develop/calling-rust/ #[tauri::command] fn greet(name: &str) -> String { @@ -12,3 +25,32 @@ pub fn run() { .run(tauri::generate_context!()) .expect("error while running tauri application"); } + +// =================== Error Handle =================== +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error(transparent)] + Io(#[from] std::io::Error), + + #[error("Failed to parse feed: {0}")] + ParseFeed(#[from] ParseFeedError), + + #[error("Missing required field: {0}")] + MissingField(String), + + #[error("HTTP request failed: {0}")] + HttpRequest(#[from] reqwest::Error), + + #[error("Failed to parse JSON: {0}")] + Json(#[from] serde_json::Error), +} + +// we must manually implement serde::Serialize +impl serde::Serialize for Error { + fn serialize(&self, serializer: S) -> Result + where + S: serde::ser::Serializer, + { + serializer.serialize_str(self.to_string().as_ref()) + } +} diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index 3e7d814..17a77c5 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -1,6 +1,8 @@ // Prevents additional console window on Windows in release, DO NOT REMOVE!! #![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] -fn main() { - rufeed_lib::run() +#[tokio::main] +async fn main() { + rufeed_lib::test_thing().await; + // rufeed_lib::run() } diff --git a/src-tauri/src/parser.rs b/src-tauri/src/parser.rs index d7f8ec9..065748e 100644 --- a/src-tauri/src/parser.rs +++ b/src-tauri/src/parser.rs @@ -1,55 +1,120 @@ - -use feed_rs::parser; use feed_rs::model::Person; +use feed_rs::parser; +use scraper::{Html, Selector}; + +use crate::Error; -pub struct AtomSummary { - title: String, - published: String, - url: String, +#[derive(Debug)] +pub struct FeedItem { + pub title: String, + pub published: String, + pub url: String, } -pub struct AtomFeed { // For Home page - entries: Vec +impl FeedItem { + pub fn from_feed(xml: &str) -> Result, Error> { + let feed = parser::parse(xml.as_bytes())?; + let mut items: Vec = Vec::new(); + + for entry in feed.entries { + let item = Self { + title: entry + .title + .map(|t| t.content) + .ok_or(Error::MissingField("title".into()))?, + published: entry + .published + .or(entry.updated) + .map(|d| d.to_string()) + .ok_or(Error::MissingField("published".into()))?, + url: entry + .links + .iter() + .find(|l| l.rel.as_deref() == Some("alternate")) + .or_else(|| entry.links.first()) + .map(|l| l.href.clone()) + .ok_or(Error::MissingField("url".into()))?, + }; + items.push(item); + } + Ok(items) + } } #[derive(Debug)] -pub struct AtomEntry { - id: String, - title: String, - link: String, - published: String, - updated: String, - summary: String, - content: String, - authors: Vec +pub struct FeedItemDetail { + pub id: String, + pub title: String, + pub url: String, + pub published: String, + pub updated: String, + pub summary: Option, + pub content: String, + pub authors: Vec, } -impl AtomEntry { - pub fn from_feed() -> Vec{ - let xml = std::fs::read_to_string("exam.atom").unwrap(); - let feed = parser::parse(xml.as_bytes()).unwrap(); - - let mut entries = Vec::new(); +impl FeedItemDetail { + pub fn from_feed(xml: &str, target_url: &str) -> Result { + let feed = parser::parse(xml.as_bytes())?; for entry in feed.entries { - let atom_entry = Self { - id: entry.id, - title: entry.title.map(|t| t.content).unwrap_or_default(), - link: entry.links.iter() - .find(|l| l.rel.as_deref() == Some("alternate")) - .or_else(|| entry.links.first()) - .map(|l| l.href.clone()) - .unwrap_or_default(), - published: entry.published.map(|d| d.to_string()).unwrap_or_default(), - updated: entry.updated.map(|d| d.to_string()).unwrap_or_default(), - summary: entry.summary.map(|t| t.content).unwrap_or_default(), - content: entry.content.map(|c| c.body).flatten().unwrap_or_default(), - authors: entry.authors, - }; - entries.push(atom_entry); + let url = entry + .links + .iter() + .find(|l| l.rel.as_deref() == Some("alternate")) + .or_else(|| entry.links.first()) + .map(|l| l.href.clone()) + .unwrap_or_default(); + + if url == target_url { + return Ok(Self { + id: entry.id, + title: entry + .title + .map(|t| t.content) + .ok_or(Error::MissingField("title".into()))?, + url, + published: entry + .published + .or(entry.updated) + .map(|d| d.to_string()) + .ok_or(Error::MissingField("published".into()))?, + updated: entry + .updated + .map(|d| d.to_string()) + .ok_or(Error::MissingField("updated".into()))?, + summary: entry.summary.map(|t| t.content), + content: entry + .content + .and_then(|c| c.body) + .ok_or(Error::MissingField("content".into()))?, + authors: entry.authors, + }); + } } - entries + + Err(Error::MissingField( + format!("no entry found for url: {}", target_url) + )) } +} + + + +pub fn extract_feed_urls(html: &str) -> Vec { + let document = Html::parse_document(html); + + let selector = Selector::parse( + r#"link[rel="alternate"][type="application/rss+xml"], + link[rel="alternate"][type="application/atom+xml"], + link[rel="alternate"][type="application/feed+json"]"# + ).unwrap(); + + document + .select(&selector) + .filter_map(|el| el.value().attr("href")) + .map(|href| href.to_string()) + .collect() } \ No newline at end of file