Skip to content

Commit

Permalink
support OPML importing
Browse files Browse the repository at this point in the history
  • Loading branch information
fanzeyi committed Sep 8, 2020
1 parent 3ccbbb3 commit 7b06b88
Show file tree
Hide file tree
Showing 11 changed files with 509 additions and 24 deletions.
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ chrono = { version = "0.4.13" }
md-5 = "0.9.1"
feed-rs = "0.4.0"
serde_urlencoded = "0.6.1"
quick-xml = "0.18.1"
log = "0.4.11"
femme = "2.1.0"
url = "2.1.1"

[dev-dependencies]
rand = "0.7"
Expand Down
10 changes: 10 additions & 0 deletions fixtures/flat.opml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<opml version="2.0">
<head>
<title>Test OPML: Flat</title>
</head>
<body>
<outline xmlUrl="https://example.com/feed1" title="Feed 1 Title" type="rss" text="Feed 1 Text" htmlUrl="http://example.com/site1" />
<outline xmlUrl="https://example.com/feed2" title="Feed 2 Title" type="rss" text="Feed 2 Text" htmlUrl="http://example.com/site2" />
</body>
</opml>
11 changes: 11 additions & 0 deletions fixtures/missing.opml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<opml version="2.0">
<head>
<title>Test OPML: Missing Metadata</title>
</head>
<body>
<outline text="Group 1 Text" title="Group 1 Title">
<outline xmlUrl="__REPLACE__/rust.xml" />
</outline>
</body>
</opml>
16 changes: 16 additions & 0 deletions fixtures/normal.opml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<opml version="2.0">
<head>
<title>Test OPML: Normal</title>
</head>
<body>
<outline text="Group 1 Text" title="Group 1 Title">
<outline xmlUrl="https://example.com/feed1" title="Feed 1 Title" type="rss" text="Feed 1 Text" htmlUrl="http://example.com/site1" />
<outline xmlUrl="https://example.com/feed2" type="rss" text="Feed 2 Text" htmlUrl="http://example.com/site2" />
</outline>
<outline text="Group 2 Text">
<outline xmlUrl="https://example.com/feed3" title="Feed 3 Title" type="rss" htmlUrl="http://example.com/site3" />
<outline xmlUrl="https://example.com/feed4" text="Feed 4 Text" htmlUrl="http://example.com/site4" />
</outline>
</body>
</opml>
112 changes: 93 additions & 19 deletions src/cli.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
use crate::model::{Feed, FeedGroup, Group, Item, ModelExt};
use crate::state::State;
use anyhow::{anyhow, Context, Result};
use async_std::prelude::FutureExt;
use futures::stream::{self, StreamExt};
use log::{info, warn};
use prettytable::{cell, format, row, Table};
use std::path::PathBuf;
use structopt::StructOpt;

use crate::model::{Feed, FeedGroup, Group, Item, ModelExt};
use crate::opml;
use crate::remote::RemoteFeed;
use crate::state::State;

#[derive(Debug, StructOpt)]
pub enum FeedCommand {
/// Lists all feeds
Expand All @@ -23,6 +28,9 @@ pub enum FeedCommand {

/// Crawls a feed manually
Crawl { id: u32 },

/// Imports OPML file
Import { file: PathBuf },
}

impl FeedCommand {
Expand Down Expand Up @@ -53,26 +61,14 @@ impl FeedCommand {
return Err(anyhow!("Feed `{}` already exists!", url));
}

let bytes = surf::get(&url)
.await
.map_err(|err| anyhow!("unable to fetch {}: {:?}", &url, err))?
.body_bytes()
.await?;
let raw_feed = feed_rs::parser::parse(&bytes[..])?;
let remote = RemoteFeed::new(&url).await?;

let feed = Feed::new(
raw_feed
.title
.map(|t| t.content)
remote
.get_title()
.ok_or_else(|| anyhow!("Feed doesn't have a title"))?,
url.clone(),
raw_feed
.links
.iter()
.map(|l| l.href.as_str())
.filter(|&link| link != url)
.next()
.map(|l| l.to_string())
.unwrap_or(url),
remote.get_site_url().unwrap_or(url),
);
let feed = {
let conn = state.db.get()?;
Expand Down Expand Up @@ -111,12 +107,81 @@ impl FeedCommand {
Ok(())
}

async fn import(state: State, file: PathBuf) -> Result<()> {
let imports = opml::from_file(&file)?;

let imports: Vec<_> = stream::iter(imports)
.then(|(group, feeds)| async move {
// normalize feeds
let feeds = stream::iter(feeds)
.filter_map(|mut feed| async move {
if let Err(e) = feed.update().await {
warn!("failed to update feed {}: {:?}", feed, e);
}

if let Err(e) = feed.validate() {
warn!("invalid feed ({}): {:?}", feed, e);
None
} else {
Some(feed)
}
})
.map(Feed::from)
.collect::<Vec<Feed>>()
.await;

(group, feeds)
})
.collect()
.await;

let conn = state.db.get()?;
for (group, feeds) in imports.into_iter() {
let group = group.and_then(|title| {
if let Ok(group) = Group::get_by_name(&conn, &title) {
Some(group)
} else {
let group = Group::new(title.clone());
match group.insert(&conn) {
Ok(group) => Some(group),
Err(e) => {
warn!("unable to create group {}: {:?}", title, e);
None
}
}
}
});

for feed in feeds {
let feed = match feed.insert(&conn) {
Err(e) => {
warn!("unable to create feed: {:?}", e);
continue;
}
Ok(feed) => feed,
};

if let Some(group) = group.as_ref() {
if let Err(e) = group.add_feed(&conn, feed) {
warn!("unable to add feed to group {:?}: {:?}", group, e);
continue;
}
}
}
}

info!("import completed.");

Ok(())
}

async fn run(self, state: State) -> Result<()> {
match self {
Self::List => Self::list(state),
Self::Add { url, group } => Self::add(state, url, group).await,
Self::Delete { id } => Self::delete(state, id),
Self::Crawl { id } => Self::crawl(state, id).await,
Self::Import { file } => Self::import(state, file).await,
}
}
}
Expand Down Expand Up @@ -260,6 +325,9 @@ pub struct Options {
)]
database: PathBuf,

#[structopt(long)]
debug: bool,

#[structopt(subcommand)]
command: SubCommand,
}
Expand Down Expand Up @@ -287,6 +355,12 @@ impl Options {
let pool = crate::model::get_pool(&self.database)?;
let state = crate::state::State::new(pool);

if self.debug {
femme::with_level(log::LevelFilter::Debug);
} else {
femme::with_level(log::LevelFilter::Info);
}

match self.command {
SubCommand::Feed(cmd) => cmd.run(state).await,
SubCommand::Group(cmd) => cmd.run(state).await,
Expand Down
19 changes: 19 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,19 @@ pub enum Error {
#[error("Feed error")]
FeedError(#[from] feed_rs::parser::ParseFeedError),

#[error("XML error")]
XmlError(#[from] quick_xml::Error),

#[error("XML error at position {position}: {source}")]
XmlErrorWithPosition {
#[source]
source: quick_xml::Error,
position: usize,
},

#[error("url parsing error")]
UrlError(#[from] url::ParseError),

#[error("{}", _0)]
Message(String),
}
Expand All @@ -50,4 +63,10 @@ impl Error {
}
}

impl From<(quick_xml::Error, usize)> for Error {
fn from((source, position): (quick_xml::Error, usize)) -> Self {
Error::XmlErrorWithPosition { source, position }
}
}

pub type Result<T, E = Error> = std::result::Result<T, E>;
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ mod error;
mod api;
mod cli;
mod crawler;
mod model;
pub mod model;
mod opml;
mod remote;
mod state;
mod utils;

Expand Down
Loading

0 comments on commit 7b06b88

Please sign in to comment.