This commit is contained in:
TheGeneralist 2026-04-03 17:04:04 +02:00
parent 6eb3097f3d
commit 9981647c5e
Signed by: thegeneralist01
SSH key fingerprint: SHA256:pp9qddbCNmVNoSjevdvQvM5z0DHN7LTa8qBMbcMq/R4
12 changed files with 1384 additions and 59 deletions

View file

@ -1,5 +1,83 @@
use anyhow::{Context, Result, bail};
use std::{path::PathBuf, process::Command};
use serde::Deserialize;
use std::{fs, path::PathBuf, process::Command};
#[derive(Debug, Deserialize)]
pub struct ScrapedTweet {
pub id: String,
pub text: String,
pub author: String,
}
#[derive(Debug, Deserialize)]
struct RawScrapedTweet {
pub id: String,
#[serde(rename = "full_text")]
pub text: String,
pub author: Option<TweetAuthor>,
}
#[derive(Debug, Deserialize)]
struct TweetAuthor {
#[serde(rename = "screen_name")]
pub handle: String,
}
pub fn parse_scraped_tweet(path: &PathBuf) -> Result<ScrapedTweet> {
let contents = fs::read_to_string(path)
.with_context(|| format!("Failed to read scraped tweet file: {}", path.display()))?;
if let Ok(raw) = toml::from_str::<RawScrapedTweet>(&contents) {
let author = raw
.author
.map(|author| author.handle)
.unwrap_or_else(|| "unknown".to_string());
return Ok(ScrapedTweet {
id: raw.id,
text: raw.text,
author,
});
}
parse_scraped_tweet_fallback(&contents, path)
}
fn parse_scraped_tweet_fallback(contents: &str, path: &PathBuf) -> Result<ScrapedTweet> {
let mut id = None;
let mut text = None;
let mut author = None;
for line in contents.lines() {
let trimmed = line.trim();
if trimmed.starts_with("id = ") && id.is_none() {
id = parse_quoted_value(trimmed);
} else if trimmed.starts_with("full_text = ") && text.is_none() {
text = parse_quoted_value(trimmed).map(unescape_toml_string);
} else if trimmed.starts_with("screen_name = ") && author.is_none() {
author = parse_quoted_value(trimmed);
}
}
let id = id.with_context(|| format!("Missing id in scraped tweet: {}", path.display()))?;
let text =
text.with_context(|| format!("Missing full_text in scraped tweet: {}", path.display()))?;
let author = author.unwrap_or_else(|| "unknown".to_string());
Ok(ScrapedTweet { id, text, author })
}
fn parse_quoted_value(line: &str) -> Option<String> {
let start = line.find('"')?;
let end = line.rfind('"')?;
if end <= start {
return None;
}
Some(line[start + 1..end].to_string())
}
fn unescape_toml_string(value: String) -> String {
value.replace("\\n", "\n").replace("\\\"", "\"")
}
pub fn scrape(url: &str) -> Result<PathBuf> {
let tweet_id = url.split('/').next_back().unwrap();