yep
This commit is contained in:
parent
6eb3097f3d
commit
9981647c5e
12 changed files with 1384 additions and 59 deletions
|
|
@ -1,5 +1,83 @@
|
|||
use anyhow::{Context, Result, bail};
|
||||
use std::{path::PathBuf, process::Command};
|
||||
use serde::Deserialize;
|
||||
use std::{fs, path::PathBuf, process::Command};
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct ScrapedTweet {
|
||||
pub id: String,
|
||||
pub text: String,
|
||||
pub author: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct RawScrapedTweet {
|
||||
pub id: String,
|
||||
#[serde(rename = "full_text")]
|
||||
pub text: String,
|
||||
pub author: Option<TweetAuthor>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TweetAuthor {
|
||||
#[serde(rename = "screen_name")]
|
||||
pub handle: String,
|
||||
}
|
||||
|
||||
pub fn parse_scraped_tweet(path: &PathBuf) -> Result<ScrapedTweet> {
|
||||
let contents = fs::read_to_string(path)
|
||||
.with_context(|| format!("Failed to read scraped tweet file: {}", path.display()))?;
|
||||
|
||||
if let Ok(raw) = toml::from_str::<RawScrapedTweet>(&contents) {
|
||||
let author = raw
|
||||
.author
|
||||
.map(|author| author.handle)
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
return Ok(ScrapedTweet {
|
||||
id: raw.id,
|
||||
text: raw.text,
|
||||
author,
|
||||
});
|
||||
}
|
||||
|
||||
parse_scraped_tweet_fallback(&contents, path)
|
||||
}
|
||||
|
||||
fn parse_scraped_tweet_fallback(contents: &str, path: &PathBuf) -> Result<ScrapedTweet> {
|
||||
let mut id = None;
|
||||
let mut text = None;
|
||||
let mut author = None;
|
||||
|
||||
for line in contents.lines() {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.starts_with("id = ") && id.is_none() {
|
||||
id = parse_quoted_value(trimmed);
|
||||
} else if trimmed.starts_with("full_text = ") && text.is_none() {
|
||||
text = parse_quoted_value(trimmed).map(unescape_toml_string);
|
||||
} else if trimmed.starts_with("screen_name = ") && author.is_none() {
|
||||
author = parse_quoted_value(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
let id = id.with_context(|| format!("Missing id in scraped tweet: {}", path.display()))?;
|
||||
let text =
|
||||
text.with_context(|| format!("Missing full_text in scraped tweet: {}", path.display()))?;
|
||||
let author = author.unwrap_or_else(|| "unknown".to_string());
|
||||
|
||||
Ok(ScrapedTweet { id, text, author })
|
||||
}
|
||||
|
||||
fn parse_quoted_value(line: &str) -> Option<String> {
|
||||
let start = line.find('"')?;
|
||||
let end = line.rfind('"')?;
|
||||
if end <= start {
|
||||
return None;
|
||||
}
|
||||
Some(line[start + 1..end].to_string())
|
||||
}
|
||||
|
||||
fn unescape_toml_string(value: String) -> String {
|
||||
value.replace("\\n", "\n").replace("\\\"", "\"")
|
||||
}
|
||||
|
||||
pub fn scrape(url: &str) -> Result<PathBuf> {
|
||||
let tweet_id = url.split('/').next_back().unwrap();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue