add parser

This commit is contained in:
mindfreq 2026-05-02 12:17:23 +02:00
parent a41a556afa
commit 64b71087d8
11 changed files with 544 additions and 59 deletions

214
src-tauri/Cargo.lock generated
View file

@ -625,6 +625,19 @@ dependencies = [
"typenum",
]
[[package]]
name = "cssparser"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf 0.11.3",
"smallvec",
]
[[package]]
name = "cssparser"
version = "0.36.0"
@ -719,6 +732,17 @@ dependencies = [
"serde_core",
]
[[package]]
name = "derive_more"
version = "0.99.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "derive_more"
version = "2.1.1"
@ -824,12 +848,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "521e380c0c8afb8d9a1e83a1822ee03556fc3e3e7dbc1fd30be14e37f9cb3f89"
dependencies = [
"bit-set",
"cssparser",
"cssparser 0.36.0",
"foldhash 0.2.0",
"html5ever",
"html5ever 0.38.0",
"precomputed-hash",
"selectors",
"tendril",
"selectors 0.36.1",
"tendril 0.5.0",
]
[[package]]
@ -883,6 +907,12 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
[[package]]
name = "ego-tree"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
[[package]]
name = "embed-resource"
version = "3.0.9"
@ -1105,6 +1135,16 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "futures-channel"
version = "0.3.32"
@ -1189,6 +1229,15 @@ dependencies = [
"slab",
]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]]
name = "gdk"
version = "0.18.2"
@ -1298,6 +1347,15 @@ dependencies = [
"version_check",
]
[[package]]
name = "getopts"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
"unicode-width",
]
[[package]]
name = "getrandom"
version = "0.2.17"
@ -1550,6 +1608,18 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "html5ever"
version = "0.29.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c"
dependencies = [
"log",
"mac",
"markup5ever 0.14.1",
"match_token",
]
[[package]]
name = "html5ever"
version = "0.38.0"
@ -1557,7 +1627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1054432bae2f14e0061e33d23402fbaa67a921d319d56adc6bcf887ddad1cbc2"
dependencies = [
"log",
"markup5ever",
"markup5ever 0.38.0",
]
[[package]]
@ -2131,6 +2201,26 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18"
dependencies = [
"log",
"phf 0.11.3",
"phf_codegen 0.11.3",
"string_cache 0.8.9",
"string_cache_codegen 0.5.4",
"tendril 0.4.3",
]
[[package]]
name = "markup5ever"
version = "0.38.0"
@ -2138,10 +2228,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8983d30f2915feeaaab2d6babdd6bc7e9ed1a00b66b5e6d74df19aa9c0e91862"
dependencies = [
"log",
"tendril",
"tendril 0.5.0",
"web_atoms",
]
[[package]]
name = "match_token"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "mediatype"
version = "0.19.20"
@ -2609,6 +2710,16 @@ dependencies = [
"serde",
]
[[package]]
name = "phf_codegen"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
]
[[package]]
name = "phf_codegen"
version = "0.13.1"
@ -3140,6 +3251,7 @@ dependencies = [
"feed-rs",
"once_cell",
"reqwest",
"scraper",
"serde",
"serde_json",
"specta",
@ -3147,7 +3259,9 @@ dependencies = [
"tauri-build",
"tauri-plugin-opener",
"tauri-specta",
"thiserror 2.0.18",
"tokio",
"uuid",
]
[[package]]
@ -3334,6 +3448,21 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc3d051b884f40e309de6c149734eab57aa8cc1347992710dc80bcc1c2194c15"
dependencies = [
"cssparser 0.34.0",
"ego-tree",
"getopts",
"html5ever 0.29.1",
"precomputed-hash",
"selectors 0.26.0",
"tendril 0.4.3",
]
[[package]]
name = "security-framework"
version = "3.7.0"
@ -3357,6 +3486,25 @@ dependencies = [
"libc",
]
[[package]]
name = "selectors"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
dependencies = [
"bitflags 2.11.1",
"cssparser 0.34.0",
"derive_more 0.99.20",
"fxhash",
"log",
"new_debug_unreachable",
"phf 0.11.3",
"phf_codegen 0.11.3",
"precomputed-hash",
"servo_arc",
"smallvec",
]
[[package]]
name = "selectors"
version = "0.36.1"
@ -3364,12 +3512,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5d9c0c92a92d33f08817311cf3f2c29a3538a8240e94a6a3c622ce652d7e00c"
dependencies = [
"bitflags 2.11.1",
"cssparser",
"derive_more",
"cssparser 0.36.0",
"derive_more 2.1.1",
"log",
"new_debug_unreachable",
"phf 0.13.1",
"phf_codegen",
"phf_codegen 0.13.1",
"precomputed-hash",
"rustc-hash",
"servo_arc",
@ -3718,6 +3866,19 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "string_cache"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared 0.11.3",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache"
version = "0.9.0"
@ -3730,6 +3891,18 @@ dependencies = [
"precomputed-hash",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
]
[[package]]
name = "string_cache_codegen"
version = "0.6.1"
@ -4190,6 +4363,17 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "tendril"
version = "0.5.0"
@ -4647,6 +4831,12 @@ version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "unicode-xid"
version = "0.2.6"
@ -4912,9 +5102,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7cff6eef815df1834fd250e3a2ff436044d82a9f1bc1980ca1dbdf07effc538"
dependencies = [
"phf 0.13.1",
"phf_codegen",
"string_cache",
"string_cache_codegen",
"phf_codegen 0.13.1",
"string_cache 0.9.0",
"string_cache_codegen 0.6.1",
]
[[package]]

View file

@ -28,3 +28,6 @@ once_cell = "1"
tokio = { version = "1", features = ["full"] }
specta = { version = "=2.0.0-rc.22" }
tauri-specta = { version = "=2.0.0-rc.21", features = ["derive", "typescript"] }
thiserror = "2.0.18"
uuid = { version = "1", features = ["v4"] }
scraper = "0.22"

98
src-tauri/exam.atom Normal file
View file

@ -0,0 +1,98 @@
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<!-- Required feed metadata -->
<title>Example Tech Blog</title>
<link href="https://example.com/blog/feed/atom" rel="self" />
<link href="https://example.com/blog" rel="alternate" />
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
<updated>2024-03-15T14:30:00Z</updated>
<!-- Optional but recommended -->
<subtitle>Latest posts about tech, programming, and web development</subtitle>
<author>
<name>Jane Developer</name>
<email>jane@example.com</email>
<uri>https://example.com/jane</uri>
</author>
<rights>© 2024 Example Tech Blog. All rights reserved.</rights>
<generator uri="https://example.com/cms" version="1.0">Example CMS</generator>
<icon>https://example.com/favicon.ico</icon>
<logo>https://example.com/logo.png</logo>
<!-- Entry 1 -->
<entry>
<title>Getting Started with Rust</title>
<link href="https://example.com/blog/rust-getting-started" />
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2024-03-15T14:30:00Z</updated>
<published>2024-03-14T09:00:00Z</published>
<author>
<name>Jane Developer</name>
<email>jane@example.com</email>
</author>
<category term="rust" scheme="https://example.com/tags" label="Rust Programming" />
<category term="tutorial" scheme="https://example.com/tags" />
<content type="html">
&lt;h1&gt;Getting Started with Rust&lt;/h1&gt;
&lt;p&gt;Rust is a systems programming language that runs blazingly fast...&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;fn main() {
println!("Hello, world!");
}
&lt;/code&gt;&lt;/pre&gt;
</content>
<summary>Learn the basics of Rust programming language in this beginner-friendly tutorial.</summary>
<rights>© 2024 Jane Developer</rights>
</entry>
<!-- Entry 2 -->
<entry>
<title>CSS Grid Layout Mastery</title>
<link href="https://example.com/blog/css-grid-mastery" />
<link href="https://example.com/blog/css-grid-mastery/pdf"
rel="alternate"
type="application/pdf"
title="PDF Version" />
<id>urn:uuid:d8f3e7a2-9b4c-4f8e-9a3c-1e4f7b9d2c5a</id>
<updated>2024-03-10T11:15:00Z</updated>
<published>2024-03-08T16:20:00Z</published>
<author>
<name>John Smith</name>
<uri>https://example.com/john-smith</uri>
</author>
<contributor>
<name>Maria Garcia</name>
<email>maria@example.com</email>
</contributor>
<content type="xhtml">
&lt;div xmlns="http://www.w3.org/1999/xhtml"&gt;
&lt;h1&gt;CSS Grid Layout Mastery&lt;/h1&gt;
&lt;p&gt;CSS Grid is a powerful layout system...&lt;/p&gt;
&lt;/div&gt;
</content>
</entry>
<!-- Entry 3 - Minimal example -->
<entry>
<title>New Version Released: v2.0.0</title>
<link href="https://example.com/blog/version-2-release" />
<id>tag:example.com,2024-03-01:/blog/version-2-release</id>
<updated>2024-03-01T08:00:00Z</updated>
<published>2024-03-01T08:00:00Z</published>
<author>
<name>Release Team</name>
</author>
<content type="text">
Version 2.0.0 is now available! Includes performance improvements and bug fixes.
</content>
</entry>
</feed>

1
src-tauri/feeds.json Normal file
View file

@ -0,0 +1 @@
[]

View file

@ -1,4 +1,3 @@
use once_cell::sync::Lazy;
pub static CLIENT: Lazy<reqwest::Client> = Lazy::new(|| {

View file

@ -0,0 +1 @@
pub mod feeds;

View file

@ -1,7 +1,28 @@
use crate::Error;
use crate::client::CLIENT;
use crate::parser::{FeedItemDetail, FeedItem};
async fn get_feed() {
let rss = CLIENT.get("https://blog.rust-lang.org/feed").send().await.unwrap();
println!("{}", res.body);
pub async fn get_summaries() -> Result<Vec<FeedItem>, Error> {
let xml = CLIENT
.get("https://blog.rust-lang.org/feed")
.send()
.await?
.text()
.await?;
let parser_xml = FeedItem::from_feed(&xml);
println!("{:?}", parser_xml);
parser_xml
}
pub async fn get_entry(taget_url: &str) -> Result<FeedItemDetail, Error> {
let xml = CLIENT
.get("https://blog.rust-lang.org/feed")
.send()
.await?
.text()
.await?;
let parser_xml = FeedItemDetail::from_feed(&xml, taget_url);
println!("{:?}", parser_xml);
parser_xml
}

63
src-tauri/src/config.rs Normal file
View file

@ -0,0 +1,63 @@
use crate::Error;
use serde::{Deserialize, Serialize};
use std::fs;
#[derive(Serialize, Deserialize, Debug)]
struct Feed {
id: String,
title: String,
url: String,
icon: String,
}
impl Feed {
fn get(url: &str) -> Result<Feed, Error> {
let feeds = Self::get_content()?;
feeds
.into_iter()
.find(|feed| feed.url == url)
.ok_or_else(|| Error::MissingField(format!("Feed with url '{}' not found", url)))
}
fn get_all() -> Result<Vec<Feed>, Error> {
Self::get_content()
}
fn add(title: String, url: String, icon: String) -> Result<Feed, Error> {
let mut feeds = Self::get_content()?;
let new_feed = Self {
id: uuid::Uuid::new_v4().to_string(),
title,
url,
icon,
};
feeds.push(new_feed);
let json = serde_json::to_string_pretty(&feeds)?;
fs::write("feeds.json", json)?;
Ok(feeds.pop().unwrap())
}
fn remove(url: &str) -> Result<Feed, Error> {
let mut feeds = Self::get_content()?;
let index = feeds.iter()
.position(|feed| feed.url == url)
.ok_or_else(|| Error::MissingField(format!("Feed with url '{}' not found", url)))?;
let removed = feeds.remove(index);
let json = serde_json::to_string_pretty(&feeds)?;
fs::write("feeds.json", json)?;
Ok(removed)
}
fn get_content() -> Result<Vec<Feed>, Error> {
let path = std::path::Path::new("feeds.json");
if !path.exists() {
fs::write(path, "[]")?;
}
let data = fs::read_to_string(&path)?;
Ok(serde_json::from_str(&data)?)
}
}

View file

@ -1,3 +1,16 @@
use feed_rs::parser::ParseFeedError;
pub mod client;
pub mod commands;
pub mod config;
pub mod parser;
use client::CLIENT;
pub async fn test_thing() -> Result<(), Error> {
}
// Learn more about Tauri commands at https://tauri.app/develop/calling-rust/
#[tauri::command]
fn greet(name: &str) -> String {
@ -12,3 +25,32 @@ pub fn run() {
.run(tauri::generate_context!())
.expect("error while running tauri application");
}
// =================== Error Handle ===================
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("Failed to parse feed: {0}")]
ParseFeed(#[from] ParseFeedError),
#[error("Missing required field: {0}")]
MissingField(String),
#[error("HTTP request failed: {0}")]
HttpRequest(#[from] reqwest::Error),
#[error("Failed to parse JSON: {0}")]
Json(#[from] serde_json::Error),
}
// we must manually implement serde::Serialize
impl serde::Serialize for Error {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::ser::Serializer,
{
serializer.serialize_str(self.to_string().as_ref())
}
}

View file

@ -1,6 +1,8 @@
// Prevents additional console window on Windows in release, DO NOT REMOVE!!
#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
fn main() {
rufeed_lib::run()
#[tokio::main]
async fn main() {
rufeed_lib::test_thing().await;
// rufeed_lib::run()
}

View file

@ -1,55 +1,120 @@
use feed_rs::parser;
use feed_rs::model::Person;
use feed_rs::parser;
use scraper::{Html, Selector};
use crate::Error;
pub struct AtomSummary {
title: String,
published: String,
url: String,
#[derive(Debug)]
pub struct FeedItem {
pub title: String,
pub published: String,
pub url: String,
}
pub struct AtomFeed { // For Home page
entries: Vec<AtomSummary>
impl FeedItem {
pub fn from_feed(xml: &str) -> Result<Vec<Self>, Error> {
let feed = parser::parse(xml.as_bytes())?;
let mut items: Vec<Self> = Vec::new();
for entry in feed.entries {
let item = Self {
title: entry
.title
.map(|t| t.content)
.ok_or(Error::MissingField("title".into()))?,
published: entry
.published
.or(entry.updated)
.map(|d| d.to_string())
.ok_or(Error::MissingField("published".into()))?,
url: entry
.links
.iter()
.find(|l| l.rel.as_deref() == Some("alternate"))
.or_else(|| entry.links.first())
.map(|l| l.href.clone())
.ok_or(Error::MissingField("url".into()))?,
};
items.push(item);
}
Ok(items)
}
}
#[derive(Debug)]
pub struct AtomEntry {
id: String,
title: String,
link: String,
published: String,
updated: String,
summary: String,
content: String,
authors: Vec<Person>
pub struct FeedItemDetail {
pub id: String,
pub title: String,
pub url: String,
pub published: String,
pub updated: String,
pub summary: Option<String>,
pub content: String,
pub authors: Vec<Person>,
}
impl AtomEntry {
pub fn from_feed() -> Vec<Self>{
let xml = std::fs::read_to_string("exam.atom").unwrap();
let feed = parser::parse(xml.as_bytes()).unwrap();
let mut entries = Vec::new();
impl FeedItemDetail {
pub fn from_feed(xml: &str, target_url: &str) -> Result<Self, Error> {
let feed = parser::parse(xml.as_bytes())?;
for entry in feed.entries {
let atom_entry = Self {
id: entry.id,
title: entry.title.map(|t| t.content).unwrap_or_default(),
link: entry.links.iter()
.find(|l| l.rel.as_deref() == Some("alternate"))
.or_else(|| entry.links.first())
.map(|l| l.href.clone())
.unwrap_or_default(),
published: entry.published.map(|d| d.to_string()).unwrap_or_default(),
updated: entry.updated.map(|d| d.to_string()).unwrap_or_default(),
summary: entry.summary.map(|t| t.content).unwrap_or_default(),
content: entry.content.map(|c| c.body).flatten().unwrap_or_default(),
authors: entry.authors,
};
entries.push(atom_entry);
let url = entry
.links
.iter()
.find(|l| l.rel.as_deref() == Some("alternate"))
.or_else(|| entry.links.first())
.map(|l| l.href.clone())
.unwrap_or_default();
if url == target_url {
return Ok(Self {
id: entry.id,
title: entry
.title
.map(|t| t.content)
.ok_or(Error::MissingField("title".into()))?,
url,
published: entry
.published
.or(entry.updated)
.map(|d| d.to_string())
.ok_or(Error::MissingField("published".into()))?,
updated: entry
.updated
.map(|d| d.to_string())
.ok_or(Error::MissingField("updated".into()))?,
summary: entry.summary.map(|t| t.content),
content: entry
.content
.and_then(|c| c.body)
.ok_or(Error::MissingField("content".into()))?,
authors: entry.authors,
});
}
}
entries
Err(Error::MissingField(
format!("no entry found for url: {}", target_url)
))
}
}
pub fn extract_feed_urls(html: &str) -> Vec<String> {
let document = Html::parse_document(html);
let selector = Selector::parse(
r#"link[rel="alternate"][type="application/rss+xml"],
link[rel="alternate"][type="application/atom+xml"],
link[rel="alternate"][type="application/feed+json"]"#
).unwrap();
document
.select(&selector)
.filter_map(|el| el.value().attr("href"))
.map(|href| href.to_string())
.collect()
}