Rename resolve_from_cwd to absolutize_path

Update call sites and tests to use the new API. Adjust tweet scraper path/credentials handling and make small tweaks to local path hashing and raw store helpers. Signed-off-by: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Signed-off-by: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Clean up some clanker-written code
2026-07-22 03:05:32 +02:00 · 2026-04-02 21:13:55 +02:00 · 2026-04-02 18:54:58 +02:00 · 2026-04-02 14:31:04 +02:00 · 2026-04-02 14:05:01 +02:00
3 changed files with 257 additions and 230 deletions
--- a/src/downloader/local.rs
+++ b/src/downloader/local.rs
@ -31,6 +31,12 @@ pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<Strin
    hash_file(&out_file)
 }
 /// Moves `file` into the content-addressed raw store under `store_path`.
 ///
 /// The destination path is derived from the file's SHA-256 hash:
 /// `raw/<first-char>/<second-char>/<hash><ext>`. If the destination already
 /// exists the source file is removed (deduplication); otherwise it is renamed.
 /// Returns the store-relative destination path.
 pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
    let hash = hash_file(file)?;
    let destination = raw_relative_path(file, &hash)?;
@ -49,6 +55,9 @@ pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
    Ok(destination)
 }
 /// Computes the store-relative path for a file given its `hash`.
 /// The layout is `raw/<c1>/<c2>/<hash><ext>` where `c1`/`c2` are the first
 /// two characters of the hash, providing a two-level directory sharding.
 fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
    let mut chars = hash.chars();
    let first_letter = chars.next().context("hash must not be empty")?;
--- a/src/downloader/tweets.rs
+++ b/src/downloader/tweets.rs
@ -7,30 +7,28 @@ use std::{
    fs,
    path::{Path, PathBuf},
    process::Command,
-    sync::{Mutex, OnceLock},
+    sync::OnceLock,
 };
 use super::local;
-#[derive(Debug, Clone, PartialEq, Eq)]
+/// Returns `Some(id)` if `id` is a non-empty string of ASCII digits, otherwise `None`.
-pub enum TweetArchiveMode {
+fn parse_tweet_id(id: &str) -> Option<String> {
-    Tweet,
+    if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
-    Thread,
+        Some(id.to_string())
    } else {
        None
    }
 }
-#[derive(Debug, Clone, PartialEq, Eq)]
+/// Extracts a tweet ID from an archivr path like `"tweet:123"` by taking the
-pub struct TweetArchiveRequest {
+/// last colon-separated segment and validating it as a numeric ID.
-    pub tweet_id: String,
+fn tweet_id_from_path(path: &str) -> Option<String> {
-    pub mode: TweetArchiveMode,
+    path.split(':').next_back().and_then(parse_tweet_id)
 }
-#[derive(Debug, Clone, PartialEq, Eq)]
+/// Resolves `path` relative to `cwd` if it is not already absolute.
-pub enum TweetArchiveResult {
+fn absolutize_path_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf {
    Archived(PathBuf),
    Skipped(PathBuf),
 }
 fn resolve_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf {
    if path.is_absolute() {
        path
    } else {
@ -38,15 +36,18 @@ fn resolve_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf {
    }
 }
 /// Builds the CLI argument list for the Python tweet scraper.
 /// When `thread` is true, recursive flags are added to follow reply chains.
 fn build_scraper_args(
-    request: &TweetArchiveRequest,
+    tweet_id: &str,
    thread: bool,
    output_dir: &Path,
    temp_dir: &Path,
    credentials_file: &Path,
 ) -> Vec<String> {
    let mut args = vec![
        "--tweet-ids".to_string(),
-        request.tweet_id.clone(),
+        tweet_id.to_string(),
        "--output-dir".to_string(),
        output_dir.display().to_string(),
        "--media-dir".to_string(),
@ -56,34 +57,41 @@ fn build_scraper_args(
        credentials_file.display().to_string(),
    ];
-    match request.mode {
+    if thread {
-        TweetArchiveMode::Tweet => {
+        args.push("--recursive-replied-to-tweets".to_string());
-            args.push("--no-recursive".to_string());
+        args.push("--recursive-replied-to-tweets-quotes-retweets".to_string());
-        }
+        args.push("--download-replied-to-tweets-media".to_string());
-        TweetArchiveMode::Thread => {
+    } else {
-            args.push("--recursive-replied-to-tweets".to_string());
+        args.push("--no-recursive".to_string());
            args.push("--recursive-replied-to-tweets-quotes-retweets".to_string());
            args.push("--download-replied-to-tweets-media".to_string());
        }
    }
    args
 }
-pub fn archive(
+/// Archives a tweet (or full thread) identified by `path` (e.g. `"tweet:123"`).
-    request: &TweetArchiveRequest,
+///
-    store_path: &Path,
+/// Invokes the Python scraper, then moves all produced media assets into the
-    timestamp: &str,
+/// content-addressed raw store and rewrites the TOML output to use the new
-) -> Result<TweetArchiveResult> {
+/// store-relative paths. Returns `true` if new content was archived, `false`
 /// if the tweet was already present and `thread` is `false`.
 ///
 /// Requires `ARCHIVR_TWITTER_CREDENTIALS_FILE` to be set. The scraper binary
 /// can be overridden via `ARCHIVR_TWEET_SCRAPER` and `ARCHIVR_TWEET_PYTHON`.
 pub fn archive(path: &str, thread: bool, store_path: &Path, timestamp: &str) -> Result<bool> {
    let invocation_cwd = env::current_dir().context("Failed to read current working directory")?;
    // Output directory for Tweet TOML files.
    let output_dir = store_path.join("raw_tweets");
    // Temporary directory for media assets downloaded by the scraper in `temp/...`.
    let temp_dir = store_path.join("temp").join(timestamp).join("tweets");
    let tweet_id = tweet_id_from_path(path).context("Invalid tweet ID")?;
    fs::create_dir_all(&output_dir)?;
    fs::create_dir_all(&temp_dir)?;
-    let root_toml = output_dir.join(format!("tweet-{}.toml", request.tweet_id));
+    // Path to the root - the to-be-archived tweet's TOML file.
-    if request.mode == TweetArchiveMode::Tweet && root_toml.exists() {
+    let root_toml = output_dir.join(format!("tweet-{tweet_id}.toml"));
-        return Ok(TweetArchiveResult::Skipped(output_dir));
+    if !thread && root_toml.exists() {
        return Ok(false);
    }
    let before = tweet_toml_files(&output_dir)?;
@ -92,12 +100,12 @@ pub fn archive(
    let scraper_path = env::var_os("ARCHIVR_TWEET_SCRAPER")
        .map(PathBuf::from)
        .unwrap_or_else(|| PathBuf::from("vendor/twitter/scrape_user_tweet_contents.py"));
-    let scraper_path = resolve_from_cwd(scraper_path, &invocation_cwd);
+    let scraper_path = absolutize_path_from_cwd(scraper_path, &invocation_cwd);
    let credentials_file = if let Some(credentials_file) =
        env::var_os("ARCHIVR_TWITTER_CREDENTIALS_FILE")
    {
-        resolve_from_cwd(PathBuf::from(credentials_file), &invocation_cwd)
+        absolutize_path_from_cwd(PathBuf::from(credentials_file), &invocation_cwd)
    } else {
        bail!(
            "Twitter scraping requires ARCHIVR_TWITTER_CREDENTIALS_FILE to point to a cookies file."
@ -113,7 +121,7 @@ pub fn archive(
    let mut cmd = Command::new(&python);
    cmd.current_dir(&temp_dir).arg(&scraper_path);
-    for arg in build_scraper_args(request, &output_dir, &temp_dir, &credentials_file) {
+    for arg in build_scraper_args(&tweet_id, thread, &output_dir, &temp_dir, &credentials_file) {
        cmd.arg(arg);
    }
@ -151,9 +159,10 @@ pub fn archive(
    rewrite_tweet_outputs(&new_tomls, &output_dir, &temp_dir, store_path)?;
    let _ = fs::remove_dir_all(store_path.join("temp").join(timestamp));
-    Ok(TweetArchiveResult::Archived(output_dir))
+    Ok(true)
 }
 /// Removes the `scraping_summary.toml` file left by the scraper, if present.
 fn cleanup_summary(output_dir: &Path) -> Result<()> {
    let summary_path = output_dir.join("scraping_summary.toml");
    if summary_path.exists() {
@ -162,11 +171,14 @@ fn cleanup_summary(output_dir: &Path) -> Result<()> {
    Ok(())
 }
 /// Returns the set of `tweet-*.toml` files present in `output_dir`.
 fn tweet_toml_files(output_dir: &Path) -> Result<HashSet<PathBuf>> {
    let mut files = HashSet::new();
    for entry in fs::read_dir(output_dir)? {
        let entry = entry?;
        let path = entry.path();
        if path.is_file()
            && path
                .file_name()
@ -176,25 +188,31 @@ fn tweet_toml_files(output_dir: &Path) -> Result<HashSet<PathBuf>> {
            files.insert(path);
        }
    }
    Ok(files)
 }
 /// Returns the sorted list of TOML files present in `after` but not in `before`.
 fn new_tweet_tomls(before: &HashSet<PathBuf>, after: &HashSet<PathBuf>) -> Vec<PathBuf> {
    let mut files = after.difference(before).cloned().collect::<Vec<_>>();
    files.sort();
    files
 }
 /// Returns a lazily-compiled regex matching `avatar_local_path = "..."` in TOML.
 fn avatar_regex() -> &'static Regex {
    static REGEX: OnceLock<Regex> = OnceLock::new();
    REGEX.get_or_init(|| Regex::new(r#"avatar_local_path = "([^"\n]+)""#).unwrap())
 }
 /// Returns a lazily-compiled regex matching `local_path = "..."` in TOML.
 fn media_regex() -> &'static Regex {
    static REGEX: OnceLock<Regex> = OnceLock::new();
    REGEX.get_or_init(|| Regex::new(r#"(?m)\blocal_path = "([^"\n]+)""#).unwrap())
 }
 /// Rewrites asset paths in each newly-created TOML file, moving assets into
 /// the content-addressed store. Files are written back only if content changed.
 fn rewrite_tweet_outputs(
    tweet_tomls: &[PathBuf],
    output_dir: &Path,
@ -212,6 +230,7 @@ fn rewrite_tweet_outputs(
            store_path,
            &mut archived_assets,
        )?;
        if rewritten != contents {
            fs::write(path, rewritten)?;
        }
@ -220,6 +239,10 @@ fn rewrite_tweet_outputs(
    Ok(())
 }
 /// Rewrites all `avatar_local_path` and `local_path` references in `contents`,
 /// archiving each referenced file into the raw store and returning the updated
 /// TOML string. `archived_assets` is a cache to avoid re-archiving the same
 /// file when it is referenced by multiple tweets.
 fn rewrite_toml_asset_paths(
    contents: &str,
    output_dir: &Path,
@ -252,6 +275,10 @@ fn rewrite_toml_asset_paths(
    Ok(rewritten)
 }
 /// Archives the asset at `old_path` (relative to `base_dir`) into the raw store
 /// and returns its new store-relative path. Already-archived paths (starting
 /// with `"raw/"`) are returned unchanged. Results are cached in `archived_assets`
 /// by `"<kind>:<old_path>"` key to deduplicate work across TOML files.
 fn archive_asset_reference(
    old_path: &str,
    base_dir: &Path,
@ -287,8 +314,7 @@ fn archive_asset_reference(
 mod tests {
    use super::*;
    use std::{
-        env, fs,
+        sync::{Mutex, MutexGuard},
        sync::MutexGuard,
        time::{SystemTime, UNIX_EPOCH},
    };
@ -320,10 +346,8 @@ mod tests {
    #[test]
    fn test_build_scraper_args_for_single_tweet() {
        let args = build_scraper_args(
-            &TweetArchiveRequest {
+            "1234567890",
-                tweet_id: "1234567890".to_string(),
+            false,
                mode: TweetArchiveMode::Tweet,
            },
            Path::new("/tmp/raw_tweets"),
            Path::new("/tmp/temp/tweets"),
            Path::new("/tmp/twitter-creds.txt"),
@ -335,7 +359,6 @@ mod tests {
        assert!(args.contains(&"--download-media".to_string()));
        assert!(args.contains(&"--credentials-file".to_string()));
        assert!(args.contains(&"--no-recursive".to_string()));
        assert!(!args.contains(&"--no-download-avatars".to_string()));
        assert!(!args.contains(&"--recursive-replied-to-tweets".to_string()));
        assert!(!args.contains(&"--recursive-replied-to-tweets-quotes-retweets".to_string()));
        assert!(!args.contains(&"--download-replied-to-tweets-media".to_string()));
@ -344,10 +367,8 @@ mod tests {
    #[test]
    fn test_build_scraper_args_for_thread() {
        let args = build_scraper_args(
-            &TweetArchiveRequest {
+            "1234567890",
-                tweet_id: "1234567890".to_string(),
+            true,
                mode: TweetArchiveMode::Thread,
            },
            Path::new("/tmp/raw_tweets"),
            Path::new("/tmp/temp/tweets"),
            Path::new("/tmp/twitter-creds.txt"),
@ -433,13 +454,13 @@ avatar_local_path = "../temp/ts/tweets/media/avatars/avatar.jpg"
    #[test]
    fn test_resolve_from_cwd_keeps_absolute_paths() {
-        let path = resolve_from_cwd(PathBuf::from("/tmp/creds.txt"), Path::new("/work"));
+        let path = absolutize_path_from_cwd(PathBuf::from("/tmp/creds.txt"), Path::new("/work"));
        assert_eq!(path, PathBuf::from("/tmp/creds.txt"));
    }
    #[test]
    fn test_resolve_from_cwd_expands_relative_paths() {
-        let path = resolve_from_cwd(PathBuf::from("creds.txt"), Path::new("/work"));
+        let path = absolutize_path_from_cwd(PathBuf::from("creds.txt"), Path::new("/work"));
        assert_eq!(path, PathBuf::from("/work/creds.txt"));
    }
@ -456,17 +477,9 @@ avatar_local_path = "../temp/ts/tweets/media/avatars/avatar.jpg"
        fs::write(&credentials, "ct0=test;auth_token=test").unwrap();
        set_test_env("ARCHIVR_TWITTER_CREDENTIALS_FILE", &credentials);
-        let result = archive(
+        let archived = archive("tweet:123", false, &store_path, "ts").unwrap();
            &TweetArchiveRequest {
                tweet_id: "123".to_string(),
                mode: TweetArchiveMode::Tweet,
            },
            &store_path,
            "ts",
        )
        .unwrap();
-        assert_eq!(result, TweetArchiveResult::Skipped(output_dir));
+        assert!(!archived);
        remove_test_env("ARCHIVR_TWITTER_CREDENTIALS_FILE");
        let _ = fs::remove_dir_all(store_path);
@ -529,7 +542,7 @@ EOF
 "#,
        )
        .unwrap();
-        std::process::Command::new("chmod")
+        Command::new("chmod")
            .arg("+x")
            .arg(&script)
            .status()
@ -539,20 +552,11 @@ EOF
        set_test_env("ARCHIVR_TWEET_SCRAPER", &script);
        set_test_env("ARCHIVR_TWEET_PYTHON", "/bin/sh");
-        let result = archive(
+        let archived = archive("tweet:123", false, &store_path, "ts").unwrap();
            &TweetArchiveRequest {
                tweet_id: "123".to_string(),
                mode: TweetArchiveMode::Tweet,
            },
            &store_path,
            "ts",
        )
        .unwrap();
        let tweet_file = output_dir.join("tweet-123.toml");
        let contents = fs::read_to_string(&tweet_file).unwrap();
-        assert_eq!(result, TweetArchiveResult::Archived(output_dir.clone()));
+        assert!(archived);
        assert!(tweet_file.exists());
        assert!(!output_dir.join("scraping_summary.toml").exists());
        assert!(contents.contains(r#"avatar_local_path = "raw/"#));
--- a/src/main.rs
+++ b/src/main.rs
@ -10,12 +10,6 @@ use std::{
 mod downloader;
 mod hash;
 #[derive(Debug, Clone, PartialEq, Eq)]
 enum ExplicitArchiveRequest {
    Tweet(downloader::tweets::TweetArchiveRequest),
    TweetMedia { tweet_id: String },
 }
 #[derive(Parser, Debug)]
 #[command(version, about, long_about = None)]
 struct Args {
@ -72,12 +66,14 @@ fn get_archive_path() -> Option<PathBuf> {
    None
 }
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 enum Source {
    YouTubeVideo,
    YouTubePlaylist,
    YouTubeChannel,
    X,
    Tweet,
    TweetThread,
    Instagram,
    Facebook,
    TikTok,
@ -95,39 +91,19 @@ fn parse_tweet_id(id: &str) -> Option<String> {
    }
 }
-fn parse_explicit_archive_request(path: &str) -> Option<ExplicitArchiveRequest> {
+fn tweet_id_from_path(path: &str) -> Option<String> {
-    let parts: Vec<&str> = path.split(':').collect();
+    path.split(':').next_back().and_then(parse_tweet_id)
    match parts.as_slice() {
        ["tweet", id] => parse_tweet_id(id).map(|tweet_id| {
            ExplicitArchiveRequest::Tweet(downloader::tweets::TweetArchiveRequest {
                tweet_id,
                mode: downloader::tweets::TweetArchiveMode::Tweet,
            })
        }),
        ["tweet", "media", id] => {
            parse_tweet_id(id).map(|tweet_id| ExplicitArchiveRequest::TweetMedia { tweet_id })
        }
        ["x", "tweet", id] | ["x", "x", id] | ["twitter", "x", id] | ["twitter", "tweet", id] => {
            parse_tweet_id(id).map(|tweet_id| {
                ExplicitArchiveRequest::Tweet(downloader::tweets::TweetArchiveRequest {
                    tweet_id,
                    mode: downloader::tweets::TweetArchiveMode::Tweet,
                })
            })
        }
        ["x", "thread", id] | ["twitter", "thread", id] => parse_tweet_id(id).map(|tweet_id| {
            ExplicitArchiveRequest::Tweet(downloader::tweets::TweetArchiveRequest {
                tweet_id,
                mode: downloader::tweets::TweetArchiveMode::Thread,
            })
        }),
        _ => None,
    }
 }
-fn tweet_media_path(tweet_id: &str) -> String {
+fn resolve_source_path(path: &str, source: &Source) -> String {
-    format!("https://x.com/i/status/{tweet_id}")
+    if *source == Source::X && path.starts_with("tweet:media:") {
        format!(
            "https://x.com/i/status/{}",
            tweet_id_from_path(path).unwrap()
        )
    } else {
        path.to_string()
    }
 }
 // INFO: yt-dlp supports a lot of sites; so, when archiving (for example) a website, the user
@ -165,8 +141,43 @@ fn determine_source(path: &str) -> Source {
        }
    }
-    // Shorthand schemes: x: or twitter:
+    // Shorthand schemes: tweet:, x:, or twitter:
-    if path.starts_with("x:") || path.starts_with("twitter:") {
+    if let Some(after_scheme) = path.strip_prefix("tweet:") {
        if after_scheme.starts_with("media:")
            && after_scheme
                .strip_prefix("media:")
                .and_then(parse_tweet_id)
                .is_some()
        {
            return Source::X;
        }
        if parse_tweet_id(after_scheme).is_some() {
            return Source::Tweet;
        }
    }
    if let Some(after_scheme) = path
        .strip_prefix("x:")
        .or_else(|| path.strip_prefix("twitter:"))
    {
        if after_scheme
            .strip_prefix("thread:")
            .and_then(parse_tweet_id)
            .is_some()
        {
            return Source::TweetThread;
        }
        if after_scheme
            .strip_prefix("tweet:")
            .or_else(|| after_scheme.strip_prefix("x:"))
            .and_then(parse_tweet_id)
            .is_some()
        {
            return Source::Tweet;
        }
        return Source::X;
    }
@ -344,40 +355,46 @@ fn main() -> Result<()> {
                }
            };
-            if let Some(ExplicitArchiveRequest::Tweet(request)) =
+            let source = determine_source(path);
-                parse_explicit_archive_request(path)
+
-            {
+            // Sources: Tweets or Twitter Threads
-                match downloader::tweets::archive(&request, &store_path, &timestamp) {
+            match source {
-                    Ok(downloader::tweets::TweetArchiveResult::Archived(output_dir)) => {
+                Source::Other => {
-                        println!("Tweet archived successfully to {}", output_dir.display());
+                    eprintln!("Archiving from this source is not yet implemented.");
-                        return Ok(());
+                    process::exit(1);
-                    }
+                }
-                    Ok(downloader::tweets::TweetArchiveResult::Skipped(output_dir)) => {
+                Source::Tweet | Source::TweetThread => {
-                        println!("Tweet already archived in {}", output_dir.display());
+                    match downloader::tweets::archive(
-                        return Ok(());
+                        path,
-                    }
+                        source == Source::TweetThread,
-                    Err(e) => {
+                        &store_path,
-                        eprintln!("Failed to archive tweet: {e}");
+                        &timestamp,
-                        process::exit(1);
+                    ) {
                        Ok(true) => {
                            println!(
                                "Tweet archived successfully to {}",
                                store_path.join("raw_tweets").display()
                            );
                            return Ok(());
                        }
                        Ok(false) => {
                            println!(
                                "Tweet already archived in {}",
                                store_path.join("raw_tweets").display()
                            );
                            return Ok(());
                        }
                        Err(e) => {
                            eprintln!("Failed to archive tweet: {e}");
                            process::exit(1);
                        }
                    }
                }
                _ => {}
            }
-            let (resolved_path, source) = match parse_explicit_archive_request(path) {
+            // Sources, for which yt-dlp is needed
-                Some(ExplicitArchiveRequest::TweetMedia { tweet_id }) => {
+            let path = resolve_source_path(path, &source);
                    (tweet_media_path(&tweet_id), Source::X)
                }
                None => {
                    let source = determine_source(path);
                    if let Source::Other = source {
                        eprintln!("Archiving from this source is not yet implemented.");
                        process::exit(1);
                    }
                    (path.clone(), source)
                }
                Some(ExplicitArchiveRequest::Tweet(_)) => unreachable!(),
            };
            let hash = match source {
                Source::YouTubeVideo
                | Source::X
@ -386,11 +403,7 @@ fn main() -> Result<()> {
                | Source::TikTok
                | Source::Reddit
                | Source::Snapchat => {
-                    match downloader::ytdlp::download(
+                    match downloader::ytdlp::download(path.clone(), &store_path, &timestamp) {
                        resolved_path.clone(),
                        &store_path,
                        &timestamp,
                    ) {
                        Ok(h) => h,
                        Err(e) => {
                            eprintln!("Failed to download from YouTube: {e}");
@ -399,7 +412,7 @@ fn main() -> Result<()> {
                    }
                }
                Source::Local => {
-                    match downloader::local::save(resolved_path.clone(), &store_path, &timestamp) {
+                    match downloader::local::save(path.clone(), &store_path, &timestamp) {
                        Ok(h) => h,
                        Err(e) => {
                            eprintln!("Failed to archive local file: {e}");
@ -419,7 +432,7 @@ fn main() -> Result<()> {
                | Source::Reddit
                | Source::Snapchat => ".mp4",
                Source::Local => {
-                    let p = Path::new(resolved_path.trim_start_matches("file://"));
+                    let p = Path::new(path.trim_start_matches("file://"));
                    &p.extension()
                        .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()))
                }
@ -522,6 +535,7 @@ fn main() -> Result<()> {
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::fs;
    struct TestCase<'a> {
        url: &'a str,
@ -529,93 +543,93 @@ mod tests {
    }
    #[test]
-    fn test_explicit_tweet_archive_parsing() {
+    fn test_tweet_sources() {
        let cases = [
-            (
+            TestCase {
-                "tweet:1234567890",
+                url: "tweet:1234567890",
-                Some(ExplicitArchiveRequest::Tweet(
+                expected: Source::Tweet,
-                    downloader::tweets::TweetArchiveRequest {
+            },
-                        tweet_id: "1234567890".to_string(),
+            TestCase {
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
+                url: "x:tweet:1234567890",
-                    },
+                expected: Source::Tweet,
-                )),
+            },
-            ),
+            TestCase {
-            (
+                url: "x:x:1234567890",
-                "x:tweet:1234567890",
+                expected: Source::Tweet,
-                Some(ExplicitArchiveRequest::Tweet(
+            },
-                    downloader::tweets::TweetArchiveRequest {
+            TestCase {
-                        tweet_id: "1234567890".to_string(),
+                url: "twitter:x:1234567890",
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
+                expected: Source::Tweet,
-                    },
+            },
-                )),
+            TestCase {
-            ),
+                url: "twitter:tweet:1234567890",
-            (
+                expected: Source::Tweet,
-                "x:x:1234567890",
+            },
-                Some(ExplicitArchiveRequest::Tweet(
+            TestCase {
-                    downloader::tweets::TweetArchiveRequest {
+                url: "tweet:media:1234567890",
-                        tweet_id: "1234567890".to_string(),
+                expected: Source::X,
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
+            },
-                    },
+            TestCase {
-                )),
+                url: "x:thread:1234567890",
-            ),
+                expected: Source::TweetThread,
-            (
+            },
-                "twitter:x:1234567890",
+            TestCase {
-                Some(ExplicitArchiveRequest::Tweet(
+                url: "twitter:thread:1234567890",
-                    downloader::tweets::TweetArchiveRequest {
+                expected: Source::TweetThread,
-                        tweet_id: "1234567890".to_string(),
+            },
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
+            TestCase {
-                    },
+                url: "tweet:thread:1234567890",
-                )),
+                expected: Source::Other,
-            ),
+            },
-            (
+            TestCase {
-                "twitter:tweet:1234567890",
+                url: "tweet:not-a-number",
-                Some(ExplicitArchiveRequest::Tweet(
+                expected: Source::Other,
-                    downloader::tweets::TweetArchiveRequest {
+            },
-                        tweet_id: "1234567890".to_string(),
+            TestCase {
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
+                url: "tweet:media:not-a-number",
-                    },
+                expected: Source::Other,
-                )),
+            },
            ),
            (
                "tweet:media:1234567890",
                Some(ExplicitArchiveRequest::TweetMedia {
                    tweet_id: "1234567890".to_string(),
                }),
            ),
            (
                "x:thread:1234567890",
                Some(ExplicitArchiveRequest::Tweet(
                    downloader::tweets::TweetArchiveRequest {
                        tweet_id: "1234567890".to_string(),
                        mode: downloader::tweets::TweetArchiveMode::Thread,
                    },
                )),
            ),
            (
                "twitter:thread:1234567890",
                Some(ExplicitArchiveRequest::Tweet(
                    downloader::tweets::TweetArchiveRequest {
                        tweet_id: "1234567890".to_string(),
                        mode: downloader::tweets::TweetArchiveMode::Thread,
                    },
                )),
            ),
            ("tweet:thread:1234567890", None),
            ("x:media:1234567890", None),
            ("tweet:not-a-number", None),
            ("tweet:media:not-a-number", None),
        ];
-        for (input, expected) in cases {
+        for case in &cases {
            assert_eq!(
-                parse_explicit_archive_request(input),
+                determine_source(case.url),
-                expected,
+                case.expected,
-                "Failed for input: {}",
+                "Failed for URL: {}",
-                input
+                case.url
            );
        }
    }
    #[test]
    fn test_tweet_id_from_path() {
        assert_eq!(
            tweet_id_from_path("tweet:1234567890"),
            Some("1234567890".to_string())
        );
        assert_eq!(
            tweet_id_from_path("tweet:media:1234567890"),
            Some("1234567890".to_string())
        );
        assert_eq!(
            tweet_id_from_path("x:thread:1234567890"),
            Some("1234567890".to_string())
        );
        assert_eq!(tweet_id_from_path("tweet:not-a-number"), None);
    }
    #[test]
    fn test_resolve_source_path() {
        assert_eq!(
            resolve_source_path("tweet:media:1234567890", &Source::X),
            "https://x.com/i/status/1234567890"
        );
        assert_eq!(
            resolve_source_path("tweet:1234567890", &Source::Tweet),
            "tweet:1234567890"
        );
    }
    #[test]
    fn test_youtube_sources() {
        // --- YouTube Video URLs ---
Author	SHA1	Message	Date
TheGeneralist	9837bda0c2	Rename resolve_from_cwd to absolutize_path Update call sites and tests to use the new API. Adjust tweet scraper path/credentials handling and make small tweaks to local path hashing and raw store helpers. Signed-off-by: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Signed-off-by: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>	2026-04-02 21:13:55 +02:00
TheGeneralist	741e33c3af	Clean up some clanker-written code Signed-off-by: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>	2026-04-02 18:54:58 +02:00
TheGeneralist	26d94a8289	Refactor tweet archive source handling	2026-04-02 14:31:04 +02:00
TheGeneralist	514a5e99c7	refactor: simplify archive source parsing	2026-04-02 14:05:01 +02:00