diff --git a/src/downloader/local.rs b/src/downloader/local.rs index 6536aa7..df31a4e 100644 --- a/src/downloader/local.rs +++ b/src/downloader/local.rs @@ -31,12 +31,6 @@ pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result//`. If the destination already -/// exists the source file is removed (deduplication); otherwise it is renamed. -/// Returns the store-relative destination path. pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result { let hash = hash_file(file)?; let destination = raw_relative_path(file, &hash)?; @@ -55,9 +49,6 @@ pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result { Ok(destination) } -/// Computes the store-relative path for a file given its `hash`. -/// The layout is `raw///` where `c1`/`c2` are the first -/// two characters of the hash, providing a two-level directory sharding. fn raw_relative_path(file: &Path, hash: &str) -> Result { let mut chars = hash.chars(); let first_letter = chars.next().context("hash must not be empty")?; diff --git a/src/downloader/tweets.rs b/src/downloader/tweets.rs index 57014f2..db5b993 100644 --- a/src/downloader/tweets.rs +++ b/src/downloader/tweets.rs @@ -7,28 +7,30 @@ use std::{ fs, path::{Path, PathBuf}, process::Command, - sync::OnceLock, + sync::{Mutex, OnceLock}, }; use super::local; -/// Returns `Some(id)` if `id` is a non-empty string of ASCII digits, otherwise `None`. -fn parse_tweet_id(id: &str) -> Option { - if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) { - Some(id.to_string()) - } else { - None - } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TweetArchiveMode { + Tweet, + Thread, } -/// Extracts a tweet ID from an archivr path like `"tweet:123"` by taking the -/// last colon-separated segment and validating it as a numeric ID. -fn tweet_id_from_path(path: &str) -> Option { - path.split(':').next_back().and_then(parse_tweet_id) +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TweetArchiveRequest { + pub tweet_id: String, + pub mode: TweetArchiveMode, } -/// Resolves `path` relative to `cwd` if it is not already absolute. -fn absolutize_path_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf { +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TweetArchiveResult { + Archived(PathBuf), + Skipped(PathBuf), +} + +fn resolve_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf { if path.is_absolute() { path } else { @@ -36,18 +38,15 @@ fn absolutize_path_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf { } } -/// Builds the CLI argument list for the Python tweet scraper. -/// When `thread` is true, recursive flags are added to follow reply chains. fn build_scraper_args( - tweet_id: &str, - thread: bool, + request: &TweetArchiveRequest, output_dir: &Path, temp_dir: &Path, credentials_file: &Path, ) -> Vec { let mut args = vec![ "--tweet-ids".to_string(), - tweet_id.to_string(), + request.tweet_id.clone(), "--output-dir".to_string(), output_dir.display().to_string(), "--media-dir".to_string(), @@ -57,41 +56,34 @@ fn build_scraper_args( credentials_file.display().to_string(), ]; - if thread { - args.push("--recursive-replied-to-tweets".to_string()); - args.push("--recursive-replied-to-tweets-quotes-retweets".to_string()); - args.push("--download-replied-to-tweets-media".to_string()); - } else { - args.push("--no-recursive".to_string()); + match request.mode { + TweetArchiveMode::Tweet => { + args.push("--no-recursive".to_string()); + } + TweetArchiveMode::Thread => { + args.push("--recursive-replied-to-tweets".to_string()); + args.push("--recursive-replied-to-tweets-quotes-retweets".to_string()); + args.push("--download-replied-to-tweets-media".to_string()); + } } args } -/// Archives a tweet (or full thread) identified by `path` (e.g. `"tweet:123"`). -/// -/// Invokes the Python scraper, then moves all produced media assets into the -/// content-addressed raw store and rewrites the TOML output to use the new -/// store-relative paths. Returns `true` if new content was archived, `false` -/// if the tweet was already present and `thread` is `false`. -/// -/// Requires `ARCHIVR_TWITTER_CREDENTIALS_FILE` to be set. The scraper binary -/// can be overridden via `ARCHIVR_TWEET_SCRAPER` and `ARCHIVR_TWEET_PYTHON`. -pub fn archive(path: &str, thread: bool, store_path: &Path, timestamp: &str) -> Result { +pub fn archive( + request: &TweetArchiveRequest, + store_path: &Path, + timestamp: &str, +) -> Result { let invocation_cwd = env::current_dir().context("Failed to read current working directory")?; - // Output directory for Tweet TOML files. let output_dir = store_path.join("raw_tweets"); - // Temporary directory for media assets downloaded by the scraper in `temp/...`. let temp_dir = store_path.join("temp").join(timestamp).join("tweets"); - let tweet_id = tweet_id_from_path(path).context("Invalid tweet ID")?; - fs::create_dir_all(&output_dir)?; fs::create_dir_all(&temp_dir)?; - // Path to the root - the to-be-archived tweet's TOML file. - let root_toml = output_dir.join(format!("tweet-{tweet_id}.toml")); - if !thread && root_toml.exists() { - return Ok(false); + let root_toml = output_dir.join(format!("tweet-{}.toml", request.tweet_id)); + if request.mode == TweetArchiveMode::Tweet && root_toml.exists() { + return Ok(TweetArchiveResult::Skipped(output_dir)); } let before = tweet_toml_files(&output_dir)?; @@ -100,12 +92,12 @@ pub fn archive(path: &str, thread: bool, store_path: &Path, timestamp: &str) -> let scraper_path = env::var_os("ARCHIVR_TWEET_SCRAPER") .map(PathBuf::from) .unwrap_or_else(|| PathBuf::from("vendor/twitter/scrape_user_tweet_contents.py")); - let scraper_path = absolutize_path_from_cwd(scraper_path, &invocation_cwd); + let scraper_path = resolve_from_cwd(scraper_path, &invocation_cwd); let credentials_file = if let Some(credentials_file) = env::var_os("ARCHIVR_TWITTER_CREDENTIALS_FILE") { - absolutize_path_from_cwd(PathBuf::from(credentials_file), &invocation_cwd) + resolve_from_cwd(PathBuf::from(credentials_file), &invocation_cwd) } else { bail!( "Twitter scraping requires ARCHIVR_TWITTER_CREDENTIALS_FILE to point to a cookies file." @@ -121,7 +113,7 @@ pub fn archive(path: &str, thread: bool, store_path: &Path, timestamp: &str) -> let mut cmd = Command::new(&python); cmd.current_dir(&temp_dir).arg(&scraper_path); - for arg in build_scraper_args(&tweet_id, thread, &output_dir, &temp_dir, &credentials_file) { + for arg in build_scraper_args(request, &output_dir, &temp_dir, &credentials_file) { cmd.arg(arg); } @@ -159,10 +151,9 @@ pub fn archive(path: &str, thread: bool, store_path: &Path, timestamp: &str) -> rewrite_tweet_outputs(&new_tomls, &output_dir, &temp_dir, store_path)?; let _ = fs::remove_dir_all(store_path.join("temp").join(timestamp)); - Ok(true) + Ok(TweetArchiveResult::Archived(output_dir)) } -/// Removes the `scraping_summary.toml` file left by the scraper, if present. fn cleanup_summary(output_dir: &Path) -> Result<()> { let summary_path = output_dir.join("scraping_summary.toml"); if summary_path.exists() { @@ -171,14 +162,11 @@ fn cleanup_summary(output_dir: &Path) -> Result<()> { Ok(()) } -/// Returns the set of `tweet-*.toml` files present in `output_dir`. fn tweet_toml_files(output_dir: &Path) -> Result> { let mut files = HashSet::new(); - for entry in fs::read_dir(output_dir)? { let entry = entry?; let path = entry.path(); - if path.is_file() && path .file_name() @@ -188,31 +176,25 @@ fn tweet_toml_files(output_dir: &Path) -> Result> { files.insert(path); } } - Ok(files) } -/// Returns the sorted list of TOML files present in `after` but not in `before`. fn new_tweet_tomls(before: &HashSet, after: &HashSet) -> Vec { let mut files = after.difference(before).cloned().collect::>(); files.sort(); files } -/// Returns a lazily-compiled regex matching `avatar_local_path = "..."` in TOML. fn avatar_regex() -> &'static Regex { static REGEX: OnceLock = OnceLock::new(); REGEX.get_or_init(|| Regex::new(r#"avatar_local_path = "([^"\n]+)""#).unwrap()) } -/// Returns a lazily-compiled regex matching `local_path = "..."` in TOML. fn media_regex() -> &'static Regex { static REGEX: OnceLock = OnceLock::new(); REGEX.get_or_init(|| Regex::new(r#"(?m)\blocal_path = "([^"\n]+)""#).unwrap()) } -/// Rewrites asset paths in each newly-created TOML file, moving assets into -/// the content-addressed store. Files are written back only if content changed. fn rewrite_tweet_outputs( tweet_tomls: &[PathBuf], output_dir: &Path, @@ -230,7 +212,6 @@ fn rewrite_tweet_outputs( store_path, &mut archived_assets, )?; - if rewritten != contents { fs::write(path, rewritten)?; } @@ -239,10 +220,6 @@ fn rewrite_tweet_outputs( Ok(()) } -/// Rewrites all `avatar_local_path` and `local_path` references in `contents`, -/// archiving each referenced file into the raw store and returning the updated -/// TOML string. `archived_assets` is a cache to avoid re-archiving the same -/// file when it is referenced by multiple tweets. fn rewrite_toml_asset_paths( contents: &str, output_dir: &Path, @@ -275,10 +252,6 @@ fn rewrite_toml_asset_paths( Ok(rewritten) } -/// Archives the asset at `old_path` (relative to `base_dir`) into the raw store -/// and returns its new store-relative path. Already-archived paths (starting -/// with `"raw/"`) are returned unchanged. Results are cached in `archived_assets` -/// by `":"` key to deduplicate work across TOML files. fn archive_asset_reference( old_path: &str, base_dir: &Path, @@ -314,7 +287,8 @@ fn archive_asset_reference( mod tests { use super::*; use std::{ - sync::{Mutex, MutexGuard}, + env, fs, + sync::MutexGuard, time::{SystemTime, UNIX_EPOCH}, }; @@ -346,8 +320,10 @@ mod tests { #[test] fn test_build_scraper_args_for_single_tweet() { let args = build_scraper_args( - "1234567890", - false, + &TweetArchiveRequest { + tweet_id: "1234567890".to_string(), + mode: TweetArchiveMode::Tweet, + }, Path::new("/tmp/raw_tweets"), Path::new("/tmp/temp/tweets"), Path::new("/tmp/twitter-creds.txt"), @@ -359,6 +335,7 @@ mod tests { assert!(args.contains(&"--download-media".to_string())); assert!(args.contains(&"--credentials-file".to_string())); assert!(args.contains(&"--no-recursive".to_string())); + assert!(!args.contains(&"--no-download-avatars".to_string())); assert!(!args.contains(&"--recursive-replied-to-tweets".to_string())); assert!(!args.contains(&"--recursive-replied-to-tweets-quotes-retweets".to_string())); assert!(!args.contains(&"--download-replied-to-tweets-media".to_string())); @@ -367,8 +344,10 @@ mod tests { #[test] fn test_build_scraper_args_for_thread() { let args = build_scraper_args( - "1234567890", - true, + &TweetArchiveRequest { + tweet_id: "1234567890".to_string(), + mode: TweetArchiveMode::Thread, + }, Path::new("/tmp/raw_tweets"), Path::new("/tmp/temp/tweets"), Path::new("/tmp/twitter-creds.txt"), @@ -454,13 +433,13 @@ avatar_local_path = "../temp/ts/tweets/media/avatars/avatar.jpg" #[test] fn test_resolve_from_cwd_keeps_absolute_paths() { - let path = absolutize_path_from_cwd(PathBuf::from("/tmp/creds.txt"), Path::new("/work")); + let path = resolve_from_cwd(PathBuf::from("/tmp/creds.txt"), Path::new("/work")); assert_eq!(path, PathBuf::from("/tmp/creds.txt")); } #[test] fn test_resolve_from_cwd_expands_relative_paths() { - let path = absolutize_path_from_cwd(PathBuf::from("creds.txt"), Path::new("/work")); + let path = resolve_from_cwd(PathBuf::from("creds.txt"), Path::new("/work")); assert_eq!(path, PathBuf::from("/work/creds.txt")); } @@ -477,9 +456,17 @@ avatar_local_path = "../temp/ts/tweets/media/avatars/avatar.jpg" fs::write(&credentials, "ct0=test;auth_token=test").unwrap(); set_test_env("ARCHIVR_TWITTER_CREDENTIALS_FILE", &credentials); - let archived = archive("tweet:123", false, &store_path, "ts").unwrap(); + let result = archive( + &TweetArchiveRequest { + tweet_id: "123".to_string(), + mode: TweetArchiveMode::Tweet, + }, + &store_path, + "ts", + ) + .unwrap(); - assert!(!archived); + assert_eq!(result, TweetArchiveResult::Skipped(output_dir)); remove_test_env("ARCHIVR_TWITTER_CREDENTIALS_FILE"); let _ = fs::remove_dir_all(store_path); @@ -542,7 +529,7 @@ EOF "#, ) .unwrap(); - Command::new("chmod") + std::process::Command::new("chmod") .arg("+x") .arg(&script) .status() @@ -552,11 +539,20 @@ EOF set_test_env("ARCHIVR_TWEET_SCRAPER", &script); set_test_env("ARCHIVR_TWEET_PYTHON", "/bin/sh"); - let archived = archive("tweet:123", false, &store_path, "ts").unwrap(); + let result = archive( + &TweetArchiveRequest { + tweet_id: "123".to_string(), + mode: TweetArchiveMode::Tweet, + }, + &store_path, + "ts", + ) + .unwrap(); + let tweet_file = output_dir.join("tweet-123.toml"); let contents = fs::read_to_string(&tweet_file).unwrap(); - assert!(archived); + assert_eq!(result, TweetArchiveResult::Archived(output_dir.clone())); assert!(tweet_file.exists()); assert!(!output_dir.join("scraping_summary.toml").exists()); assert!(contents.contains(r#"avatar_local_path = "raw/"#)); diff --git a/src/main.rs b/src/main.rs index 31bab27..b83f514 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,6 +10,12 @@ use std::{ mod downloader; mod hash; +#[derive(Debug, Clone, PartialEq, Eq)] +enum ExplicitArchiveRequest { + Tweet(downloader::tweets::TweetArchiveRequest), + TweetMedia { tweet_id: String }, +} + #[derive(Parser, Debug)] #[command(version, about, long_about = None)] struct Args { @@ -66,14 +72,12 @@ fn get_archive_path() -> Option { None } -#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[derive(Debug, PartialEq)] enum Source { YouTubeVideo, YouTubePlaylist, YouTubeChannel, X, - Tweet, - TweetThread, Instagram, Facebook, TikTok, @@ -91,19 +95,39 @@ fn parse_tweet_id(id: &str) -> Option { } } -fn tweet_id_from_path(path: &str) -> Option { - path.split(':').next_back().and_then(parse_tweet_id) +fn parse_explicit_archive_request(path: &str) -> Option { + let parts: Vec<&str> = path.split(':').collect(); + + match parts.as_slice() { + ["tweet", id] => parse_tweet_id(id).map(|tweet_id| { + ExplicitArchiveRequest::Tweet(downloader::tweets::TweetArchiveRequest { + tweet_id, + mode: downloader::tweets::TweetArchiveMode::Tweet, + }) + }), + ["tweet", "media", id] => { + parse_tweet_id(id).map(|tweet_id| ExplicitArchiveRequest::TweetMedia { tweet_id }) + } + ["x", "tweet", id] | ["x", "x", id] | ["twitter", "x", id] | ["twitter", "tweet", id] => { + parse_tweet_id(id).map(|tweet_id| { + ExplicitArchiveRequest::Tweet(downloader::tweets::TweetArchiveRequest { + tweet_id, + mode: downloader::tweets::TweetArchiveMode::Tweet, + }) + }) + } + ["x", "thread", id] | ["twitter", "thread", id] => parse_tweet_id(id).map(|tweet_id| { + ExplicitArchiveRequest::Tweet(downloader::tweets::TweetArchiveRequest { + tweet_id, + mode: downloader::tweets::TweetArchiveMode::Thread, + }) + }), + _ => None, + } } -fn resolve_source_path(path: &str, source: &Source) -> String { - if *source == Source::X && path.starts_with("tweet:media:") { - format!( - "https://x.com/i/status/{}", - tweet_id_from_path(path).unwrap() - ) - } else { - path.to_string() - } +fn tweet_media_path(tweet_id: &str) -> String { + format!("https://x.com/i/status/{tweet_id}") } // INFO: yt-dlp supports a lot of sites; so, when archiving (for example) a website, the user @@ -141,43 +165,8 @@ fn determine_source(path: &str) -> Source { } } - // Shorthand schemes: tweet:, x:, or twitter: - if let Some(after_scheme) = path.strip_prefix("tweet:") { - if after_scheme.starts_with("media:") - && after_scheme - .strip_prefix("media:") - .and_then(parse_tweet_id) - .is_some() - { - return Source::X; - } - - if parse_tweet_id(after_scheme).is_some() { - return Source::Tweet; - } - } - - if let Some(after_scheme) = path - .strip_prefix("x:") - .or_else(|| path.strip_prefix("twitter:")) - { - if after_scheme - .strip_prefix("thread:") - .and_then(parse_tweet_id) - .is_some() - { - return Source::TweetThread; - } - - if after_scheme - .strip_prefix("tweet:") - .or_else(|| after_scheme.strip_prefix("x:")) - .and_then(parse_tweet_id) - .is_some() - { - return Source::Tweet; - } - + // Shorthand schemes: x: or twitter: + if path.starts_with("x:") || path.starts_with("twitter:") { return Source::X; } @@ -355,46 +344,40 @@ fn main() -> Result<()> { } }; - let source = determine_source(path); - - // Sources: Tweets or Twitter Threads - match source { - Source::Other => { - eprintln!("Archiving from this source is not yet implemented."); - process::exit(1); - } - Source::Tweet | Source::TweetThread => { - match downloader::tweets::archive( - path, - source == Source::TweetThread, - &store_path, - ×tamp, - ) { - Ok(true) => { - println!( - "Tweet archived successfully to {}", - store_path.join("raw_tweets").display() - ); - return Ok(()); - } - Ok(false) => { - println!( - "Tweet already archived in {}", - store_path.join("raw_tweets").display() - ); - return Ok(()); - } - Err(e) => { - eprintln!("Failed to archive tweet: {e}"); - process::exit(1); - } + if let Some(ExplicitArchiveRequest::Tweet(request)) = + parse_explicit_archive_request(path) + { + match downloader::tweets::archive(&request, &store_path, ×tamp) { + Ok(downloader::tweets::TweetArchiveResult::Archived(output_dir)) => { + println!("Tweet archived successfully to {}", output_dir.display()); + return Ok(()); + } + Ok(downloader::tweets::TweetArchiveResult::Skipped(output_dir)) => { + println!("Tweet already archived in {}", output_dir.display()); + return Ok(()); + } + Err(e) => { + eprintln!("Failed to archive tweet: {e}"); + process::exit(1); } } - _ => {} } - // Sources, for which yt-dlp is needed - let path = resolve_source_path(path, &source); + let (resolved_path, source) = match parse_explicit_archive_request(path) { + Some(ExplicitArchiveRequest::TweetMedia { tweet_id }) => { + (tweet_media_path(&tweet_id), Source::X) + } + None => { + let source = determine_source(path); + if let Source::Other = source { + eprintln!("Archiving from this source is not yet implemented."); + process::exit(1); + } + (path.clone(), source) + } + Some(ExplicitArchiveRequest::Tweet(_)) => unreachable!(), + }; + let hash = match source { Source::YouTubeVideo | Source::X @@ -403,7 +386,11 @@ fn main() -> Result<()> { | Source::TikTok | Source::Reddit | Source::Snapchat => { - match downloader::ytdlp::download(path.clone(), &store_path, ×tamp) { + match downloader::ytdlp::download( + resolved_path.clone(), + &store_path, + ×tamp, + ) { Ok(h) => h, Err(e) => { eprintln!("Failed to download from YouTube: {e}"); @@ -412,7 +399,7 @@ fn main() -> Result<()> { } } Source::Local => { - match downloader::local::save(path.clone(), &store_path, ×tamp) { + match downloader::local::save(resolved_path.clone(), &store_path, ×tamp) { Ok(h) => h, Err(e) => { eprintln!("Failed to archive local file: {e}"); @@ -432,7 +419,7 @@ fn main() -> Result<()> { | Source::Reddit | Source::Snapchat => ".mp4", Source::Local => { - let p = Path::new(path.trim_start_matches("file://")); + let p = Path::new(resolved_path.trim_start_matches("file://")); &p.extension() .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy())) } @@ -535,7 +522,6 @@ fn main() -> Result<()> { #[cfg(test)] mod tests { use super::*; - use std::fs; struct TestCase<'a> { url: &'a str, @@ -543,93 +529,93 @@ mod tests { } #[test] - fn test_tweet_sources() { + fn test_explicit_tweet_archive_parsing() { let cases = [ - TestCase { - url: "tweet:1234567890", - expected: Source::Tweet, - }, - TestCase { - url: "x:tweet:1234567890", - expected: Source::Tweet, - }, - TestCase { - url: "x:x:1234567890", - expected: Source::Tweet, - }, - TestCase { - url: "twitter:x:1234567890", - expected: Source::Tweet, - }, - TestCase { - url: "twitter:tweet:1234567890", - expected: Source::Tweet, - }, - TestCase { - url: "tweet:media:1234567890", - expected: Source::X, - }, - TestCase { - url: "x:thread:1234567890", - expected: Source::TweetThread, - }, - TestCase { - url: "twitter:thread:1234567890", - expected: Source::TweetThread, - }, - TestCase { - url: "tweet:thread:1234567890", - expected: Source::Other, - }, - TestCase { - url: "tweet:not-a-number", - expected: Source::Other, - }, - TestCase { - url: "tweet:media:not-a-number", - expected: Source::Other, - }, + ( + "tweet:1234567890", + Some(ExplicitArchiveRequest::Tweet( + downloader::tweets::TweetArchiveRequest { + tweet_id: "1234567890".to_string(), + mode: downloader::tweets::TweetArchiveMode::Tweet, + }, + )), + ), + ( + "x:tweet:1234567890", + Some(ExplicitArchiveRequest::Tweet( + downloader::tweets::TweetArchiveRequest { + tweet_id: "1234567890".to_string(), + mode: downloader::tweets::TweetArchiveMode::Tweet, + }, + )), + ), + ( + "x:x:1234567890", + Some(ExplicitArchiveRequest::Tweet( + downloader::tweets::TweetArchiveRequest { + tweet_id: "1234567890".to_string(), + mode: downloader::tweets::TweetArchiveMode::Tweet, + }, + )), + ), + ( + "twitter:x:1234567890", + Some(ExplicitArchiveRequest::Tweet( + downloader::tweets::TweetArchiveRequest { + tweet_id: "1234567890".to_string(), + mode: downloader::tweets::TweetArchiveMode::Tweet, + }, + )), + ), + ( + "twitter:tweet:1234567890", + Some(ExplicitArchiveRequest::Tweet( + downloader::tweets::TweetArchiveRequest { + tweet_id: "1234567890".to_string(), + mode: downloader::tweets::TweetArchiveMode::Tweet, + }, + )), + ), + ( + "tweet:media:1234567890", + Some(ExplicitArchiveRequest::TweetMedia { + tweet_id: "1234567890".to_string(), + }), + ), + ( + "x:thread:1234567890", + Some(ExplicitArchiveRequest::Tweet( + downloader::tweets::TweetArchiveRequest { + tweet_id: "1234567890".to_string(), + mode: downloader::tweets::TweetArchiveMode::Thread, + }, + )), + ), + ( + "twitter:thread:1234567890", + Some(ExplicitArchiveRequest::Tweet( + downloader::tweets::TweetArchiveRequest { + tweet_id: "1234567890".to_string(), + mode: downloader::tweets::TweetArchiveMode::Thread, + }, + )), + ), + ("tweet:thread:1234567890", None), + ("x:media:1234567890", None), + ("tweet:not-a-number", None), + ("tweet:media:not-a-number", None), ]; - for case in &cases { + for (input, expected) in cases { assert_eq!( - determine_source(case.url), - case.expected, - "Failed for URL: {}", - case.url + parse_explicit_archive_request(input), + expected, + "Failed for input: {}", + input ); } } - #[test] - fn test_tweet_id_from_path() { - assert_eq!( - tweet_id_from_path("tweet:1234567890"), - Some("1234567890".to_string()) - ); - assert_eq!( - tweet_id_from_path("tweet:media:1234567890"), - Some("1234567890".to_string()) - ); - assert_eq!( - tweet_id_from_path("x:thread:1234567890"), - Some("1234567890".to_string()) - ); - assert_eq!(tweet_id_from_path("tweet:not-a-number"), None); - } - - #[test] - fn test_resolve_source_path() { - assert_eq!( - resolve_source_path("tweet:media:1234567890", &Source::X), - "https://x.com/i/status/1234567890" - ); - assert_eq!( - resolve_source_path("tweet:1234567890", &Source::Tweet), - "tweet:1234567890" - ); - } - #[test] fn test_youtube_sources() { // --- YouTube Video URLs ---