From 514a5e99c7b0dab7dd8a2a7e8faf0aeb47e9ac32 Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Thu, 2 Apr 2026 14:05:01 +0200
Subject: [PATCH 1/4] refactor: simplify archive source parsing

---
 src/downloader/local.rs  |  30 ++-
 src/downloader/tweets.rs |   5 +-
 src/downloader/ytdlp.rs  |  12 +-
 src/main.rs              | 441 +++++++++++++++------------------------
 4 files changed, 205 insertions(+), 283 deletions(-)
diff --git a/src/downloader/local.rs b/src/downloader/local.rs
index df31a4e..d91b652 100644
--- a/src/downloader/local.rs
+++ b/src/downloader/local.rs
@@ -7,7 +7,21 @@ use std::{
 
 use crate::hash::hash_file;
 
-pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<String> {
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum RawArchiveResult {
+    Archived(PathBuf),
+    AlreadyArchived(PathBuf),
+}
+
+impl RawArchiveResult {
+    pub fn relative_path(&self) -> &Path {
+        match self {
+            Self::Archived(path) | Self::AlreadyArchived(path) => path,
+        }
+    }
+}
+
+pub fn save(path: String, store_path: &Path, timestamp: &str) -> Result<PathBuf> {
     println!("Saving path: {path}");
 
     let temp_dir = store_path.join("temp").join(timestamp);
@@ -28,10 +42,10 @@ pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<Strin
         bail!("yt-dlp failed: {stderr}");
     }
 
-    hash_file(&out_file)
+    Ok(out_file)
 }
 
-pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
+pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<RawArchiveResult> {
     let hash = hash_file(file)?;
     let destination = raw_relative_path(file, &hash)?;
     let absolute_destination = store_path.join(&destination);
@@ -42,11 +56,11 @@ pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
 
     if absolute_destination.exists() {
         fs::remove_file(file)?;
+        Ok(RawArchiveResult::AlreadyArchived(destination))
     } else {
         fs::rename(file, &absolute_destination)?;
+        Ok(RawArchiveResult::Archived(destination))
     }
-
-    Ok(destination)
 }
 
 fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
@@ -79,12 +93,12 @@ mod tests {
         let staged = root.join("temp").join("photo.jpg");
         fs::write(&staged, b"image-bytes").unwrap();
 
-        let relative = archive_staged_file(&staged, &root).unwrap();
-        let absolute = root.join(&relative);
+        let result = archive_staged_file(&staged, &root).unwrap();
+        let absolute = root.join(result.relative_path());
 
         assert!(absolute.is_file());
         assert!(!staged.exists());
-        assert!(relative.starts_with("raw"));
+        assert!(result.relative_path().starts_with("raw"));
 
         let _ = fs::remove_dir_all(&root);
     }
diff --git a/src/downloader/tweets.rs b/src/downloader/tweets.rs
index db5b993..c963bf3 100644
--- a/src/downloader/tweets.rs
+++ b/src/downloader/tweets.rs
@@ -277,7 +277,10 @@ fn archive_asset_reference(
     }
 
     let relative_path = local::archive_staged_file(&absolute_path, store_path)?;
-    let relative_path = relative_path.to_string_lossy().replace('\\', "/");
+    let relative_path = relative_path
+        .relative_path()
+        .to_string_lossy()
+        .replace('\\', "/");
     archived_assets.insert(key, relative_path.clone());
 
     Ok(relative_path)
diff --git a/src/downloader/ytdlp.rs b/src/downloader/ytdlp.rs
index 6ecd7b8..2417bb0 100644
--- a/src/downloader/ytdlp.rs
+++ b/src/downloader/ytdlp.rs
@@ -1,9 +1,11 @@
 use anyhow::{Context, Result, bail};
-use std::{env, path::Path, process::Command};
+use std::{
+    env,
+    path::{Path, PathBuf},
+    process::Command,
+};
 
-use crate::hash::hash_file;
-
-pub fn download(path: String, store_path: &Path, timestamp: &String) -> Result<String> {
+pub fn download(path: String, store_path: &Path, timestamp: &str) -> Result<PathBuf> {
     println!("Downloading with yt-dlp: {path}");
 
     let ytdlp = env::var("ARCHIVR_YT_DLP").unwrap_or_else(|_| "yt-dlp".to_string());
@@ -29,5 +31,5 @@ pub fn download(path: String, store_path: &Path, timestamp: &String) -> Result<S
         bail!("yt-dlp failed: {stderr}");
     }
 
-    hash_file(&out_file)
+    Ok(out_file)
 }
diff --git a/src/main.rs b/src/main.rs
index b83f514..487e2fd 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,4 @@
-use anyhow::Result;
+use anyhow::{Result, bail};
 use chrono::Local;
 use clap::{Parser, Subcommand};
 use std::{
@@ -10,12 +10,6 @@ use std::{
 mod downloader;
 mod hash;
 
-#[derive(Debug, Clone, PartialEq, Eq)]
-enum ExplicitArchiveRequest {
-    Tweet(downloader::tweets::TweetArchiveRequest),
-    TweetMedia { tweet_id: String },
-}
-
 #[derive(Parser, Debug)]
 #[command(version, about, long_about = None)]
 struct Args {
@@ -72,8 +66,10 @@ fn get_archive_path() -> Option<PathBuf> {
     None
 }
 
-#[derive(Debug, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 enum Source {
+    Tweet(downloader::tweets::TweetArchiveRequest),
+    TweetMedia { tweet_id: String },
     YouTubeVideo,
     YouTubePlaylist,
     YouTubeChannel,
@@ -95,37 +91,6 @@ fn parse_tweet_id(id: &str) -> Option<String> {
     }
 }
 
-fn parse_explicit_archive_request(path: &str) -> Option<ExplicitArchiveRequest> {
-    let parts: Vec<&str> = path.split(':').collect();
-
-    match parts.as_slice() {
-        ["tweet", id] => parse_tweet_id(id).map(|tweet_id| {
-            ExplicitArchiveRequest::Tweet(downloader::tweets::TweetArchiveRequest {
-                tweet_id,
-                mode: downloader::tweets::TweetArchiveMode::Tweet,
-            })
-        }),
-        ["tweet", "media", id] => {
-            parse_tweet_id(id).map(|tweet_id| ExplicitArchiveRequest::TweetMedia { tweet_id })
-        }
-        ["x", "tweet", id] | ["x", "x", id] | ["twitter", "x", id] | ["twitter", "tweet", id] => {
-            parse_tweet_id(id).map(|tweet_id| {
-                ExplicitArchiveRequest::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id,
-                    mode: downloader::tweets::TweetArchiveMode::Tweet,
-                })
-            })
-        }
-        ["x", "thread", id] | ["twitter", "thread", id] => parse_tweet_id(id).map(|tweet_id| {
-            ExplicitArchiveRequest::Tweet(downloader::tweets::TweetArchiveRequest {
-                tweet_id,
-                mode: downloader::tweets::TweetArchiveMode::Thread,
-            })
-        }),
-        _ => None,
-    }
-}
-
 fn tweet_media_path(tweet_id: &str) -> String {
     format!("https://x.com/i/status/{tweet_id}")
 }
@@ -165,6 +130,40 @@ fn determine_source(path: &str) -> Source {
         }
     }
 
+    let parts: Vec<&str> = path.split(':').collect();
+    match parts.as_slice() {
+        ["tweet", id] => {
+            if let Some(tweet_id) = parse_tweet_id(id) {
+                return Source::Tweet(downloader::tweets::TweetArchiveRequest {
+                    tweet_id,
+                    mode: downloader::tweets::TweetArchiveMode::Tweet,
+                });
+            }
+        }
+        ["tweet", "media", id] => {
+            if let Some(tweet_id) = parse_tweet_id(id) {
+                return Source::TweetMedia { tweet_id };
+            }
+        }
+        ["x", "tweet", id] | ["x", "x", id] | ["twitter", "x", id] | ["twitter", "tweet", id] => {
+            if let Some(tweet_id) = parse_tweet_id(id) {
+                return Source::Tweet(downloader::tweets::TweetArchiveRequest {
+                    tweet_id,
+                    mode: downloader::tweets::TweetArchiveMode::Tweet,
+                });
+            }
+        }
+        ["x", "thread", id] | ["twitter", "thread", id] => {
+            if let Some(tweet_id) = parse_tweet_id(id) {
+                return Source::Tweet(downloader::tweets::TweetArchiveRequest {
+                    tweet_id,
+                    mode: downloader::tweets::TweetArchiveMode::Thread,
+                });
+            }
+        }
+        _ => {}
+    }
+
     // Shorthand schemes: x: or twitter:
     if path.starts_with("x:") || path.starts_with("twitter:") {
         return Source::X;
@@ -261,56 +260,6 @@ fn determine_source(path: &str) -> Source {
     Source::Other
 }
 
-fn hash_exists(filename: String, store_path: &Path) -> bool {
-    let mut chars = filename.chars();
-    let first_letter = chars.next().unwrap();
-    let second_letter = chars.next().unwrap();
-
-    let path = store_path
-        .join("raw")
-        .join(first_letter.to_string())
-        .join(second_letter.to_string())
-        .join(filename);
-
-    println!("Checking {}", path.display());
-
-    path.exists()
-}
-
-fn move_temp_to_raw(file: &Path, hash: &String, store_path: &Path) -> Result<()> {
-    let mut chars = hash.chars();
-    let first_letter = chars.next().unwrap().to_string();
-    let second_letter = chars.next().unwrap().to_string();
-    let file_extension = file
-        .extension()
-        .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
-
-    fs::create_dir_all(
-        store_path
-            .join("raw")
-            .join(&first_letter)
-            .join(&second_letter),
-    )?;
-
-    fs::rename(
-        file,
-        store_path
-            .join("raw")
-            .join(&first_letter)
-            .join(&second_letter)
-            .join(format!(
-                "{hash}{}",
-                if file_extension.is_empty() {
-                    ""
-                } else {
-                    &file_extension
-                }
-            )),
-    )?;
-
-    Ok(())
-}
-
 fn initialize_store_directories(store_path: &Path) -> Result<()> {
     fs::create_dir_all(store_path.join("raw"))?;
     fs::create_dir_all(store_path.join("raw_tweets"))?;
@@ -319,6 +268,33 @@ fn initialize_store_directories(store_path: &Path) -> Result<()> {
     Ok(())
 }
 
+fn archive_non_tweet_source(
+    source: &Source,
+    path: &str,
+    store_path: &Path,
+    timestamp: &str,
+) -> Result<downloader::local::RawArchiveResult> {
+    let staged_file = match source {
+        Source::Tweet(_) | Source::Other => unreachable!(),
+        Source::TweetMedia { tweet_id } => {
+            downloader::ytdlp::download(tweet_media_path(tweet_id), store_path, timestamp)?
+        }
+        Source::YouTubeVideo
+        | Source::X
+        | Source::Instagram
+        | Source::Facebook
+        | Source::TikTok
+        | Source::Reddit
+        | Source::Snapchat => downloader::ytdlp::download(path.to_string(), store_path, timestamp)?,
+        Source::Local => downloader::local::save(path.to_string(), store_path, timestamp)?,
+        Source::YouTubePlaylist | Source::YouTubeChannel => {
+            bail!("Archiving from this source is not yet implemented.")
+        }
+    };
+
+    downloader::local::archive_staged_file(&staged_file, store_path)
+}
+
 fn main() -> Result<()> {
     let args = Args::parse();
 
@@ -344,118 +320,51 @@ fn main() -> Result<()> {
                 }
             };
 
-            if let Some(ExplicitArchiveRequest::Tweet(request)) =
-                parse_explicit_archive_request(path)
-            {
-                match downloader::tweets::archive(&request, &store_path, &timestamp) {
-                    Ok(downloader::tweets::TweetArchiveResult::Archived(output_dir)) => {
-                        println!("Tweet archived successfully to {}", output_dir.display());
-                        return Ok(());
-                    }
-                    Ok(downloader::tweets::TweetArchiveResult::Skipped(output_dir)) => {
-                        println!("Tweet already archived in {}", output_dir.display());
-                        return Ok(());
-                    }
-                    Err(e) => {
-                        eprintln!("Failed to archive tweet: {e}");
-                        process::exit(1);
-                    }
+            let source = determine_source(path);
+            match source {
+                Source::Other => {
+                    eprintln!("Archiving from this source is not yet implemented.");
+                    process::exit(1);
                 }
-            }
-
-            let (resolved_path, source) = match parse_explicit_archive_request(path) {
-                Some(ExplicitArchiveRequest::TweetMedia { tweet_id }) => {
-                    (tweet_media_path(&tweet_id), Source::X)
-                }
-                None => {
-                    let source = determine_source(path);
-                    if let Source::Other = source {
-                        eprintln!("Archiving from this source is not yet implemented.");
-                        process::exit(1);
-                    }
-                    (path.clone(), source)
-                }
-                Some(ExplicitArchiveRequest::Tweet(_)) => unreachable!(),
-            };
-
-            let hash = match source {
-                Source::YouTubeVideo
-                | Source::X
-                | Source::Instagram
-                | Source::Facebook
-                | Source::TikTok
-                | Source::Reddit
-                | Source::Snapchat => {
-                    match downloader::ytdlp::download(
-                        resolved_path.clone(),
-                        &store_path,
-                        &timestamp,
-                    ) {
-                        Ok(h) => h,
+                Source::Tweet(request) => {
+                    match downloader::tweets::archive(&request, &store_path, &timestamp) {
+                        Ok(downloader::tweets::TweetArchiveResult::Archived(output_dir)) => {
+                            println!("Tweet archived successfully to {}", output_dir.display());
+                            return Ok(());
+                        }
+                        Ok(downloader::tweets::TweetArchiveResult::Skipped(output_dir)) => {
+                            println!("Tweet already archived in {}", output_dir.display());
+                            return Ok(());
+                        }
                         Err(e) => {
-                            eprintln!("Failed to download from YouTube: {e}");
+                            eprintln!("Failed to archive tweet: {e}");
                             process::exit(1);
                         }
                     }
                 }
-                Source::Local => {
-                    match downloader::local::save(resolved_path.clone(), &store_path, &timestamp) {
-                        Ok(h) => h,
-                        Err(e) => {
-                            eprintln!("Failed to archive local file: {e}");
-                            process::exit(1);
+                source => {
+                    let result =
+                        match archive_non_tweet_source(&source, path, &store_path, &timestamp) {
+                            Ok(result) => result,
+                            Err(e) => {
+                                match source {
+                                    Source::Local => eprintln!("Failed to archive local file: {e}"),
+                                    _ => eprintln!("Failed to archive source: {e}"),
+                                }
+                                process::exit(1);
+                            }
+                        };
+
+                    let _ = fs::remove_dir_all(store_path.join("temp").join(&timestamp));
+                    match result {
+                        downloader::local::RawArchiveResult::Archived(_) => {
+                            println!("File archived successfully.");
+                        }
+                        downloader::local::RawArchiveResult::AlreadyArchived(_) => {
+                            println!("File already archived.");
                         }
                     }
                 }
-                _ => unreachable!(),
-            };
-
-            let file_extension = match source {
-                Source::YouTubeVideo
-                | Source::X
-                | Source::Instagram
-                | Source::Facebook
-                | Source::TikTok
-                | Source::Reddit
-                | Source::Snapchat => ".mp4",
-                Source::Local => {
-                    let p = Path::new(resolved_path.trim_start_matches("file://"));
-                    &p.extension()
-                        .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()))
-                }
-                _ => "",
-            };
-
-            let hash_exists = hash_exists(format!("{hash}{file_extension}"), &store_path);
-
-            // TODO: check for repeated archives?
-            // There could be one of the following:
-            // - We are literally archiving the same path over again.
-            // - We are archiving a different path, which had this file. E.g.: we archived a
-            // website before which had this YouTube video, and while recursively archiving
-            // everything, we also archived the YouTube video although it wasn't our main
-            // target. This means that we should archive again; whereas with the first case...
-            // Not sure. Need to think about this.
-            // ----
-            // Thinking about it a day later...
-            // If we are specifically archiving a YouTube video, it could also be two of the
-            // above. So yeah, just create a new DB entry and symlink the Raw to the Structured
-            // Dir or whatever. it's midnight and my brain ain't wording/braining.
-            if hash_exists {
-                println!("File already archived.");
-                let _ = fs::remove_dir_all(store_path.join("temp").join(&timestamp));
-            } else {
-                move_temp_to_raw(
-                    &store_path
-                        .join("temp")
-                        .join(&timestamp)
-                        .join(format!("{timestamp}{file_extension}")),
-                    &hash,
-                    &store_path,
-                )?;
-                let _ = fs::remove_dir_all(store_path.join("temp").join(&timestamp));
-
-                println!("File archived successfully.");
             }
 
             // TODO: DB INSERT, inserting a record
@@ -529,89 +438,83 @@ mod tests {
     }
 
     #[test]
-    fn test_explicit_tweet_archive_parsing() {
+    fn test_tweet_and_thread_sources() {
         let cases = [
-            (
-                "tweet:1234567890",
-                Some(ExplicitArchiveRequest::Tweet(
-                    downloader::tweets::TweetArchiveRequest {
-                        tweet_id: "1234567890".to_string(),
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
-                    },
-                )),
-            ),
-            (
-                "x:tweet:1234567890",
-                Some(ExplicitArchiveRequest::Tweet(
-                    downloader::tweets::TweetArchiveRequest {
-                        tweet_id: "1234567890".to_string(),
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
-                    },
-                )),
-            ),
-            (
-                "x:x:1234567890",
-                Some(ExplicitArchiveRequest::Tweet(
-                    downloader::tweets::TweetArchiveRequest {
-                        tweet_id: "1234567890".to_string(),
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
-                    },
-                )),
-            ),
-            (
-                "twitter:x:1234567890",
-                Some(ExplicitArchiveRequest::Tweet(
-                    downloader::tweets::TweetArchiveRequest {
-                        tweet_id: "1234567890".to_string(),
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
-                    },
-                )),
-            ),
-            (
-                "twitter:tweet:1234567890",
-                Some(ExplicitArchiveRequest::Tweet(
-                    downloader::tweets::TweetArchiveRequest {
-                        tweet_id: "1234567890".to_string(),
-                        mode: downloader::tweets::TweetArchiveMode::Tweet,
-                    },
-                )),
-            ),
-            (
-                "tweet:media:1234567890",
-                Some(ExplicitArchiveRequest::TweetMedia {
+            TestCase {
+                url: "tweet:1234567890",
+                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
                     tweet_id: "1234567890".to_string(),
+                    mode: downloader::tweets::TweetArchiveMode::Tweet,
                 }),
-            ),
-            (
-                "x:thread:1234567890",
-                Some(ExplicitArchiveRequest::Tweet(
-                    downloader::tweets::TweetArchiveRequest {
-                        tweet_id: "1234567890".to_string(),
-                        mode: downloader::tweets::TweetArchiveMode::Thread,
-                    },
-                )),
-            ),
-            (
-                "twitter:thread:1234567890",
-                Some(ExplicitArchiveRequest::Tweet(
-                    downloader::tweets::TweetArchiveRequest {
-                        tweet_id: "1234567890".to_string(),
-                        mode: downloader::tweets::TweetArchiveMode::Thread,
-                    },
-                )),
-            ),
-            ("tweet:thread:1234567890", None),
-            ("x:media:1234567890", None),
-            ("tweet:not-a-number", None),
-            ("tweet:media:not-a-number", None),
+            },
+            TestCase {
+                url: "x:tweet:1234567890",
+                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
+                    tweet_id: "1234567890".to_string(),
+                    mode: downloader::tweets::TweetArchiveMode::Tweet,
+                }),
+            },
+            TestCase {
+                url: "x:x:1234567890",
+                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
+                    tweet_id: "1234567890".to_string(),
+                    mode: downloader::tweets::TweetArchiveMode::Tweet,
+                }),
+            },
+            TestCase {
+                url: "twitter:x:1234567890",
+                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
+                    tweet_id: "1234567890".to_string(),
+                    mode: downloader::tweets::TweetArchiveMode::Tweet,
+                }),
+            },
+            TestCase {
+                url: "twitter:tweet:1234567890",
+                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
+                    tweet_id: "1234567890".to_string(),
+                    mode: downloader::tweets::TweetArchiveMode::Tweet,
+                }),
+            },
+            TestCase {
+                url: "tweet:media:1234567890",
+                expected: Source::TweetMedia {
+                    tweet_id: "1234567890".to_string(),
+                },
+            },
+            TestCase {
+                url: "x:thread:1234567890",
+                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
+                    tweet_id: "1234567890".to_string(),
+                    mode: downloader::tweets::TweetArchiveMode::Thread,
+                }),
+            },
+            TestCase {
+                url: "twitter:thread:1234567890",
+                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
+                    tweet_id: "1234567890".to_string(),
+                    mode: downloader::tweets::TweetArchiveMode::Thread,
+                }),
+            },
+            TestCase {
+                url: "tweet:thread:1234567890",
+                expected: Source::Other,
+            },
+            TestCase {
+                url: "tweet:not-a-number",
+                expected: Source::Other,
+            },
+            TestCase {
+                url: "tweet:media:not-a-number",
+                expected: Source::Other,
+            },
         ];
 
-        for (input, expected) in cases {
+        for case in &cases {
             assert_eq!(
-                parse_explicit_archive_request(input),
-                expected,
-                "Failed for input: {}",
-                input
+                determine_source(case.url),
+                case.expected,
+                "Failed for URL: {}",
+                case.url
             );
         }
     }

From 26d94a8289f2e351b6d4b726181b4a223a4f6d2b Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Thu, 2 Apr 2026 14:31:04 +0200
Subject: [PATCH 2/4] Refactor tweet archive source handling

---
 src/downloader/local.rs  |  30 +---
 src/downloader/tweets.rs | 110 +++++-------
 src/downloader/ytdlp.rs  |  12 +-
 src/main.rs              | 358 ++++++++++++++++++++++++++-------------
 4 files changed, 288 insertions(+), 222 deletions(-)

diff --git a/src/downloader/local.rs b/src/downloader/local.rs
index d91b652..df31a4e 100644
--- a/src/downloader/local.rs
+++ b/src/downloader/local.rs
@@ -7,21 +7,7 @@ use std::{
 
 use crate::hash::hash_file;
 
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum RawArchiveResult {
-    Archived(PathBuf),
-    AlreadyArchived(PathBuf),
-}
-
-impl RawArchiveResult {
-    pub fn relative_path(&self) -> &Path {
-        match self {
-            Self::Archived(path) | Self::AlreadyArchived(path) => path,
-        }
-    }
-}
-
-pub fn save(path: String, store_path: &Path, timestamp: &str) -> Result<PathBuf> {
+pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<String> {
     println!("Saving path: {path}");
 
     let temp_dir = store_path.join("temp").join(timestamp);
@@ -42,10 +28,10 @@ pub fn save(path: String, store_path: &Path, timestamp: &str) -> Result<PathBuf>
         bail!("yt-dlp failed: {stderr}");
     }
 
-    Ok(out_file)
+    hash_file(&out_file)
 }
 
-pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<RawArchiveResult> {
+pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
     let hash = hash_file(file)?;
     let destination = raw_relative_path(file, &hash)?;
     let absolute_destination = store_path.join(&destination);
@@ -56,11 +42,11 @@ pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<RawArchiveR
 
     if absolute_destination.exists() {
         fs::remove_file(file)?;
-        Ok(RawArchiveResult::AlreadyArchived(destination))
     } else {
         fs::rename(file, &absolute_destination)?;
-        Ok(RawArchiveResult::Archived(destination))
     }
+
+    Ok(destination)
 }
 
 fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
@@ -93,12 +79,12 @@ mod tests {
         let staged = root.join("temp").join("photo.jpg");
         fs::write(&staged, b"image-bytes").unwrap();
 
-        let result = archive_staged_file(&staged, &root).unwrap();
-        let absolute = root.join(result.relative_path());
+        let relative = archive_staged_file(&staged, &root).unwrap();
+        let absolute = root.join(&relative);
 
         assert!(absolute.is_file());
         assert!(!staged.exists());
-        assert!(result.relative_path().starts_with("raw"));
+        assert!(relative.starts_with("raw"));
 
         let _ = fs::remove_dir_all(&root);
     }
diff --git a/src/downloader/tweets.rs b/src/downloader/tweets.rs
index c963bf3..9e43759 100644
--- a/src/downloader/tweets.rs
+++ b/src/downloader/tweets.rs
@@ -12,22 +12,16 @@ use std::{
 
 use super::local;
 
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum TweetArchiveMode {
-    Tweet,
-    Thread,
+fn parse_tweet_id(id: &str) -> Option<String> {
+    if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
+        Some(id.to_string())
+    } else {
+        None
+    }
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct TweetArchiveRequest {
-    pub tweet_id: String,
-    pub mode: TweetArchiveMode,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum TweetArchiveResult {
-    Archived(PathBuf),
-    Skipped(PathBuf),
+fn tweet_id_from_path(path: &str) -> Option<String> {
+    path.split(':').next_back().and_then(parse_tweet_id)
 }
 
 fn resolve_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf {
@@ -39,14 +33,15 @@ fn resolve_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf {
 }
 
 fn build_scraper_args(
-    request: &TweetArchiveRequest,
+    tweet_id: &str,
+    thread: bool,
     output_dir: &Path,
     temp_dir: &Path,
     credentials_file: &Path,
 ) -> Vec<String> {
     let mut args = vec![
         "--tweet-ids".to_string(),
-        request.tweet_id.clone(),
+        tweet_id.to_string(),
         "--output-dir".to_string(),
         output_dir.display().to_string(),
         "--media-dir".to_string(),
@@ -56,34 +51,29 @@ fn build_scraper_args(
         credentials_file.display().to_string(),
     ];
 
-    match request.mode {
-        TweetArchiveMode::Tweet => {
-            args.push("--no-recursive".to_string());
-        }
-        TweetArchiveMode::Thread => {
-            args.push("--recursive-replied-to-tweets".to_string());
-            args.push("--recursive-replied-to-tweets-quotes-retweets".to_string());
-            args.push("--download-replied-to-tweets-media".to_string());
-        }
+    if thread {
+        args.push("--recursive-replied-to-tweets".to_string());
+        args.push("--recursive-replied-to-tweets-quotes-retweets".to_string());
+        args.push("--download-replied-to-tweets-media".to_string());
+    } else {
+        args.push("--no-recursive".to_string());
     }
 
     args
 }
 
-pub fn archive(
-    request: &TweetArchiveRequest,
-    store_path: &Path,
-    timestamp: &str,
-) -> Result<TweetArchiveResult> {
+pub fn archive(path: &str, thread: bool, store_path: &Path, timestamp: &str) -> Result<bool> {
     let invocation_cwd = env::current_dir().context("Failed to read current working directory")?;
     let output_dir = store_path.join("raw_tweets");
     let temp_dir = store_path.join("temp").join(timestamp).join("tweets");
+    let tweet_id = tweet_id_from_path(path).context("Invalid tweet ID")?;
+
     fs::create_dir_all(&output_dir)?;
     fs::create_dir_all(&temp_dir)?;
 
-    let root_toml = output_dir.join(format!("tweet-{}.toml", request.tweet_id));
-    if request.mode == TweetArchiveMode::Tweet && root_toml.exists() {
-        return Ok(TweetArchiveResult::Skipped(output_dir));
+    let root_toml = output_dir.join(format!("tweet-{tweet_id}.toml"));
+    if !thread && root_toml.exists() {
+        return Ok(false);
     }
 
     let before = tweet_toml_files(&output_dir)?;
@@ -113,7 +103,7 @@ pub fn archive(
 
     let mut cmd = Command::new(&python);
     cmd.current_dir(&temp_dir).arg(&scraper_path);
-    for arg in build_scraper_args(request, &output_dir, &temp_dir, &credentials_file) {
+    for arg in build_scraper_args(&tweet_id, thread, &output_dir, &temp_dir, &credentials_file) {
         cmd.arg(arg);
     }
 
@@ -151,7 +141,7 @@ pub fn archive(
     rewrite_tweet_outputs(&new_tomls, &output_dir, &temp_dir, store_path)?;
     let _ = fs::remove_dir_all(store_path.join("temp").join(timestamp));
 
-    Ok(TweetArchiveResult::Archived(output_dir))
+    Ok(true)
 }
 
 fn cleanup_summary(output_dir: &Path) -> Result<()> {
@@ -164,9 +154,11 @@ fn cleanup_summary(output_dir: &Path) -> Result<()> {
 
 fn tweet_toml_files(output_dir: &Path) -> Result<HashSet<PathBuf>> {
     let mut files = HashSet::new();
+
     for entry in fs::read_dir(output_dir)? {
         let entry = entry?;
         let path = entry.path();
+
         if path.is_file()
             && path
                 .file_name()
@@ -176,6 +168,7 @@ fn tweet_toml_files(output_dir: &Path) -> Result<HashSet<PathBuf>> {
             files.insert(path);
         }
     }
+
     Ok(files)
 }
 
@@ -212,6 +205,7 @@ fn rewrite_tweet_outputs(
             store_path,
             &mut archived_assets,
         )?;
+
         if rewritten != contents {
             fs::write(path, rewritten)?;
         }
@@ -277,10 +271,7 @@ fn archive_asset_reference(
     }
 
     let relative_path = local::archive_staged_file(&absolute_path, store_path)?;
-    let relative_path = relative_path
-        .relative_path()
-        .to_string_lossy()
-        .replace('\\', "/");
+    let relative_path = relative_path.to_string_lossy().replace('\\', "/");
     archived_assets.insert(key, relative_path.clone());
 
     Ok(relative_path)
@@ -290,7 +281,6 @@ fn archive_asset_reference(
 mod tests {
     use super::*;
     use std::{
-        env, fs,
         sync::MutexGuard,
         time::{SystemTime, UNIX_EPOCH},
     };
@@ -323,10 +313,8 @@ mod tests {
     #[test]
     fn test_build_scraper_args_for_single_tweet() {
         let args = build_scraper_args(
-            &TweetArchiveRequest {
-                tweet_id: "1234567890".to_string(),
-                mode: TweetArchiveMode::Tweet,
-            },
+            "1234567890",
+            false,
             Path::new("/tmp/raw_tweets"),
             Path::new("/tmp/temp/tweets"),
             Path::new("/tmp/twitter-creds.txt"),
@@ -338,7 +326,6 @@ mod tests {
         assert!(args.contains(&"--download-media".to_string()));
         assert!(args.contains(&"--credentials-file".to_string()));
         assert!(args.contains(&"--no-recursive".to_string()));
-        assert!(!args.contains(&"--no-download-avatars".to_string()));
         assert!(!args.contains(&"--recursive-replied-to-tweets".to_string()));
         assert!(!args.contains(&"--recursive-replied-to-tweets-quotes-retweets".to_string()));
         assert!(!args.contains(&"--download-replied-to-tweets-media".to_string()));
@@ -347,10 +334,8 @@ mod tests {
     #[test]
     fn test_build_scraper_args_for_thread() {
         let args = build_scraper_args(
-            &TweetArchiveRequest {
-                tweet_id: "1234567890".to_string(),
-                mode: TweetArchiveMode::Thread,
-            },
+            "1234567890",
+            true,
             Path::new("/tmp/raw_tweets"),
             Path::new("/tmp/temp/tweets"),
             Path::new("/tmp/twitter-creds.txt"),
@@ -459,17 +444,9 @@ avatar_local_path = "../temp/ts/tweets/media/avatars/avatar.jpg"
         fs::write(&credentials, "ct0=test;auth_token=test").unwrap();
         set_test_env("ARCHIVR_TWITTER_CREDENTIALS_FILE", &credentials);
 
-        let result = archive(
-            &TweetArchiveRequest {
-                tweet_id: "123".to_string(),
-                mode: TweetArchiveMode::Tweet,
-            },
-            &store_path,
-            "ts",
-        )
-        .unwrap();
+        let archived = archive("tweet:123", false, &store_path, "ts").unwrap();
 
-        assert_eq!(result, TweetArchiveResult::Skipped(output_dir));
+        assert!(!archived);
 
         remove_test_env("ARCHIVR_TWITTER_CREDENTIALS_FILE");
         let _ = fs::remove_dir_all(store_path);
@@ -532,7 +509,7 @@ EOF
 "#,
         )
         .unwrap();
-        std::process::Command::new("chmod")
+        Command::new("chmod")
             .arg("+x")
             .arg(&script)
             .status()
@@ -542,20 +519,11 @@ EOF
         set_test_env("ARCHIVR_TWEET_SCRAPER", &script);
         set_test_env("ARCHIVR_TWEET_PYTHON", "/bin/sh");
 
-        let result = archive(
-            &TweetArchiveRequest {
-                tweet_id: "123".to_string(),
-                mode: TweetArchiveMode::Tweet,
-            },
-            &store_path,
-            "ts",
-        )
-        .unwrap();
-
+        let archived = archive("tweet:123", false, &store_path, "ts").unwrap();
         let tweet_file = output_dir.join("tweet-123.toml");
         let contents = fs::read_to_string(&tweet_file).unwrap();
 
-        assert_eq!(result, TweetArchiveResult::Archived(output_dir.clone()));
+        assert!(archived);
         assert!(tweet_file.exists());
         assert!(!output_dir.join("scraping_summary.toml").exists());
         assert!(contents.contains(r#"avatar_local_path = "raw/"#));
diff --git a/src/downloader/ytdlp.rs b/src/downloader/ytdlp.rs
index 2417bb0..6ecd7b8 100644
--- a/src/downloader/ytdlp.rs
+++ b/src/downloader/ytdlp.rs
@@ -1,11 +1,9 @@
 use anyhow::{Context, Result, bail};
-use std::{
-    env,
-    path::{Path, PathBuf},
-    process::Command,
-};
+use std::{env, path::Path, process::Command};
 
-pub fn download(path: String, store_path: &Path, timestamp: &str) -> Result<PathBuf> {
+use crate::hash::hash_file;
+
+pub fn download(path: String, store_path: &Path, timestamp: &String) -> Result<String> {
     println!("Downloading with yt-dlp: {path}");
 
     let ytdlp = env::var("ARCHIVR_YT_DLP").unwrap_or_else(|_| "yt-dlp".to_string());
@@ -31,5 +29,5 @@ pub fn download(path: String, store_path: &Path, timestamp: &str) -> Result<Path
         bail!("yt-dlp failed: {stderr}");
     }
 
-    Ok(out_file)
+    hash_file(&out_file)
 }
diff --git a/src/main.rs b/src/main.rs
index 487e2fd..dba347c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,4 @@
-use anyhow::{Result, bail};
+use anyhow::Result;
 use chrono::Local;
 use clap::{Parser, Subcommand};
 use std::{
@@ -66,14 +66,14 @@ fn get_archive_path() -> Option<PathBuf> {
     None
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 enum Source {
-    Tweet(downloader::tweets::TweetArchiveRequest),
-    TweetMedia { tweet_id: String },
     YouTubeVideo,
     YouTubePlaylist,
     YouTubeChannel,
     X,
+    Tweet,
+    TweetThread,
     Instagram,
     Facebook,
     TikTok,
@@ -91,8 +91,19 @@ fn parse_tweet_id(id: &str) -> Option<String> {
     }
 }
 
-fn tweet_media_path(tweet_id: &str) -> String {
-    format!("https://x.com/i/status/{tweet_id}")
+fn tweet_id_from_path(path: &str) -> Option<String> {
+    path.split(':').next_back().and_then(parse_tweet_id)
+}
+
+fn resolve_source_path(path: &str, source: &Source) -> String {
+    if *source == Source::X && path.starts_with("tweet:media:") {
+        format!(
+            "https://x.com/i/status/{}",
+            tweet_id_from_path(path).unwrap()
+        )
+    } else {
+        path.to_string()
+    }
 }
 
 // INFO: yt-dlp supports a lot of sites; so, when archiving (for example) a website, the user
@@ -130,42 +141,43 @@ fn determine_source(path: &str) -> Source {
         }
     }
 
-    let parts: Vec<&str> = path.split(':').collect();
-    match parts.as_slice() {
-        ["tweet", id] => {
-            if let Some(tweet_id) = parse_tweet_id(id) {
-                return Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id,
-                    mode: downloader::tweets::TweetArchiveMode::Tweet,
-                });
-            }
+    // Shorthand schemes: tweet:, x:, or twitter:
+    if let Some(after_scheme) = path.strip_prefix("tweet:") {
+        if after_scheme.starts_with("media:")
+            && after_scheme
+                .strip_prefix("media:")
+                .and_then(parse_tweet_id)
+                .is_some()
+        {
+            return Source::X;
         }
-        ["tweet", "media", id] => {
-            if let Some(tweet_id) = parse_tweet_id(id) {
-                return Source::TweetMedia { tweet_id };
-            }
+
+        if parse_tweet_id(after_scheme).is_some() {
+            return Source::Tweet;
         }
-        ["x", "tweet", id] | ["x", "x", id] | ["twitter", "x", id] | ["twitter", "tweet", id] => {
-            if let Some(tweet_id) = parse_tweet_id(id) {
-                return Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id,
-                    mode: downloader::tweets::TweetArchiveMode::Tweet,
-                });
-            }
-        }
-        ["x", "thread", id] | ["twitter", "thread", id] => {
-            if let Some(tweet_id) = parse_tweet_id(id) {
-                return Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id,
-                    mode: downloader::tweets::TweetArchiveMode::Thread,
-                });
-            }
-        }
-        _ => {}
     }
 
-    // Shorthand schemes: x: or twitter:
-    if path.starts_with("x:") || path.starts_with("twitter:") {
+    if let Some(after_scheme) = path
+        .strip_prefix("x:")
+        .or_else(|| path.strip_prefix("twitter:"))
+    {
+        if after_scheme
+            .strip_prefix("thread:")
+            .and_then(parse_tweet_id)
+            .is_some()
+        {
+            return Source::TweetThread;
+        }
+
+        if after_scheme
+            .strip_prefix("tweet:")
+            .or_else(|| after_scheme.strip_prefix("x:"))
+            .and_then(parse_tweet_id)
+            .is_some()
+        {
+            return Source::Tweet;
+        }
+
         return Source::X;
     }
 
@@ -260,6 +272,56 @@ fn determine_source(path: &str) -> Source {
     Source::Other
 }
 
+fn hash_exists(filename: String, store_path: &Path) -> bool {
+    let mut chars = filename.chars();
+    let first_letter = chars.next().unwrap();
+    let second_letter = chars.next().unwrap();
+
+    let path = store_path
+        .join("raw")
+        .join(first_letter.to_string())
+        .join(second_letter.to_string())
+        .join(filename);
+
+    println!("Checking {}", path.display());
+
+    path.exists()
+}
+
+fn move_temp_to_raw(file: &Path, hash: &String, store_path: &Path) -> Result<()> {
+    let mut chars = hash.chars();
+    let first_letter = chars.next().unwrap().to_string();
+    let second_letter = chars.next().unwrap().to_string();
+    let file_extension = file
+        .extension()
+        .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
+
+    fs::create_dir_all(
+        store_path
+            .join("raw")
+            .join(&first_letter)
+            .join(&second_letter),
+    )?;
+
+    fs::rename(
+        file,
+        store_path
+            .join("raw")
+            .join(&first_letter)
+            .join(&second_letter)
+            .join(format!(
+                "{hash}{}",
+                if file_extension.is_empty() {
+                    ""
+                } else {
+                    &file_extension
+                }
+            )),
+    )?;
+
+    Ok(())
+}
+
 fn initialize_store_directories(store_path: &Path) -> Result<()> {
     fs::create_dir_all(store_path.join("raw"))?;
     fs::create_dir_all(store_path.join("raw_tweets"))?;
@@ -268,33 +330,6 @@ fn initialize_store_directories(store_path: &Path) -> Result<()> {
     Ok(())
 }
 
-fn archive_non_tweet_source(
-    source: &Source,
-    path: &str,
-    store_path: &Path,
-    timestamp: &str,
-) -> Result<downloader::local::RawArchiveResult> {
-    let staged_file = match source {
-        Source::Tweet(_) | Source::Other => unreachable!(),
-        Source::TweetMedia { tweet_id } => {
-            downloader::ytdlp::download(tweet_media_path(tweet_id), store_path, timestamp)?
-        }
-        Source::YouTubeVideo
-        | Source::X
-        | Source::Instagram
-        | Source::Facebook
-        | Source::TikTok
-        | Source::Reddit
-        | Source::Snapchat => downloader::ytdlp::download(path.to_string(), store_path, timestamp)?,
-        Source::Local => downloader::local::save(path.to_string(), store_path, timestamp)?,
-        Source::YouTubePlaylist | Source::YouTubeChannel => {
-            bail!("Archiving from this source is not yet implemented.")
-        }
-    };
-
-    downloader::local::archive_staged_file(&staged_file, store_path)
-}
-
 fn main() -> Result<()> {
     let args = Args::parse();
 
@@ -321,19 +356,32 @@ fn main() -> Result<()> {
             };
 
             let source = determine_source(path);
+            let resolved_path = resolve_source_path(path, &source);
+
             match source {
                 Source::Other => {
                     eprintln!("Archiving from this source is not yet implemented.");
                     process::exit(1);
                 }
-                Source::Tweet(request) => {
-                    match downloader::tweets::archive(&request, &store_path, &timestamp) {
-                        Ok(downloader::tweets::TweetArchiveResult::Archived(output_dir)) => {
-                            println!("Tweet archived successfully to {}", output_dir.display());
+                Source::Tweet | Source::TweetThread => {
+                    match downloader::tweets::archive(
+                        path,
+                        source == Source::TweetThread,
+                        &store_path,
+                        &timestamp,
+                    ) {
+                        Ok(true) => {
+                            println!(
+                                "Tweet archived successfully to {}",
+                                store_path.join("raw_tweets").display()
+                            );
                             return Ok(());
                         }
-                        Ok(downloader::tweets::TweetArchiveResult::Skipped(output_dir)) => {
-                            println!("Tweet already archived in {}", output_dir.display());
+                        Ok(false) => {
+                            println!(
+                                "Tweet already archived in {}",
+                                store_path.join("raw_tweets").display()
+                            );
                             return Ok(());
                         }
                         Err(e) => {
@@ -342,29 +390,88 @@ fn main() -> Result<()> {
                         }
                     }
                 }
-                source => {
-                    let result =
-                        match archive_non_tweet_source(&source, path, &store_path, &timestamp) {
-                            Ok(result) => result,
-                            Err(e) => {
-                                match source {
-                                    Source::Local => eprintln!("Failed to archive local file: {e}"),
-                                    _ => eprintln!("Failed to archive source: {e}"),
-                                }
-                                process::exit(1);
-                            }
-                        };
+                _ => {}
+            }
 
-                    let _ = fs::remove_dir_all(store_path.join("temp").join(&timestamp));
-                    match result {
-                        downloader::local::RawArchiveResult::Archived(_) => {
-                            println!("File archived successfully.");
-                        }
-                        downloader::local::RawArchiveResult::AlreadyArchived(_) => {
-                            println!("File already archived.");
+            // Other sources
+            let hash = match source {
+                Source::YouTubeVideo
+                | Source::X
+                | Source::Instagram
+                | Source::Facebook
+                | Source::TikTok
+                | Source::Reddit
+                | Source::Snapchat => {
+                    match downloader::ytdlp::download(
+                        resolved_path.clone(),
+                        &store_path,
+                        &timestamp,
+                    ) {
+                        Ok(h) => h,
+                        Err(e) => {
+                            eprintln!("Failed to download from YouTube: {e}");
+                            process::exit(1);
                         }
                     }
                 }
+                Source::Local => {
+                    match downloader::local::save(resolved_path.clone(), &store_path, &timestamp) {
+                        Ok(h) => h,
+                        Err(e) => {
+                            eprintln!("Failed to archive local file: {e}");
+                            process::exit(1);
+                        }
+                    }
+                }
+                _ => unreachable!(),
+            };
+
+            let file_extension = match source {
+                Source::YouTubeVideo
+                | Source::X
+                | Source::Instagram
+                | Source::Facebook
+                | Source::TikTok
+                | Source::Reddit
+                | Source::Snapchat => ".mp4",
+                Source::Local => {
+                    let p = Path::new(resolved_path.trim_start_matches("file://"));
+                    &p.extension()
+                        .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()))
+                }
+                _ => "",
+            };
+
+            let hash_exists = hash_exists(format!("{hash}{file_extension}"), &store_path);
+
+            // TODO: check for repeated archives?
+            // There could be one of the following:
+            // - We are literally archiving the same path over again.
+            // - We are archiving a different path, which had this file. E.g.: we archived a
+            // website before which had this YouTube video, and while recursively archiving
+            // everything, we also archived the YouTube video although it wasn't our main
+            // target. This means that we should archive again; whereas with the first case...
+            // Not sure. Need to think about this.
+            // ----
+            // Thinking about it a day later...
+            // If we are specifically archiving a YouTube video, it could also be two of the
+            // above. So yeah, just create a new DB entry and symlink the Raw to the Structured
+            // Dir or whatever. it's midnight and my brain ain't wording/braining.
+            if hash_exists {
+                println!("File already archived.");
+                let _ = fs::remove_dir_all(store_path.join("temp").join(&timestamp));
+            } else {
+                move_temp_to_raw(
+                    &store_path
+                        .join("temp")
+                        .join(&timestamp)
+                        .join(format!("{timestamp}{file_extension}")),
+                    &hash,
+                    &store_path,
+                )?;
+                let _ = fs::remove_dir_all(store_path.join("temp").join(&timestamp));
+
+                println!("File archived successfully.");
             }
 
             // TODO: DB INSERT, inserting a record
@@ -431,6 +538,7 @@ fn main() -> Result<()> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::fs;
 
     struct TestCase<'a> {
         url: &'a str,
@@ -438,62 +546,39 @@ mod tests {
     }
 
     #[test]
-    fn test_tweet_and_thread_sources() {
+    fn test_tweet_sources() {
         let cases = [
             TestCase {
                 url: "tweet:1234567890",
-                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id: "1234567890".to_string(),
-                    mode: downloader::tweets::TweetArchiveMode::Tweet,
-                }),
+                expected: Source::Tweet,
             },
             TestCase {
                 url: "x:tweet:1234567890",
-                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id: "1234567890".to_string(),
-                    mode: downloader::tweets::TweetArchiveMode::Tweet,
-                }),
+                expected: Source::Tweet,
             },
             TestCase {
                 url: "x:x:1234567890",
-                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id: "1234567890".to_string(),
-                    mode: downloader::tweets::TweetArchiveMode::Tweet,
-                }),
+                expected: Source::Tweet,
             },
             TestCase {
                 url: "twitter:x:1234567890",
-                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id: "1234567890".to_string(),
-                    mode: downloader::tweets::TweetArchiveMode::Tweet,
-                }),
+                expected: Source::Tweet,
             },
             TestCase {
                 url: "twitter:tweet:1234567890",
-                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id: "1234567890".to_string(),
-                    mode: downloader::tweets::TweetArchiveMode::Tweet,
-                }),
+                expected: Source::Tweet,
             },
             TestCase {
                 url: "tweet:media:1234567890",
-                expected: Source::TweetMedia {
-                    tweet_id: "1234567890".to_string(),
-                },
+                expected: Source::X,
             },
             TestCase {
                 url: "x:thread:1234567890",
-                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id: "1234567890".to_string(),
-                    mode: downloader::tweets::TweetArchiveMode::Thread,
-                }),
+                expected: Source::TweetThread,
             },
             TestCase {
                 url: "twitter:thread:1234567890",
-                expected: Source::Tweet(downloader::tweets::TweetArchiveRequest {
-                    tweet_id: "1234567890".to_string(),
-                    mode: downloader::tweets::TweetArchiveMode::Thread,
-                }),
+                expected: Source::TweetThread,
             },
             TestCase {
                 url: "tweet:thread:1234567890",
@@ -519,6 +604,35 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_tweet_id_from_path() {
+        assert_eq!(
+            tweet_id_from_path("tweet:1234567890"),
+            Some("1234567890".to_string())
+        );
+        assert_eq!(
+            tweet_id_from_path("tweet:media:1234567890"),
+            Some("1234567890".to_string())
+        );
+        assert_eq!(
+            tweet_id_from_path("x:thread:1234567890"),
+            Some("1234567890".to_string())
+        );
+        assert_eq!(tweet_id_from_path("tweet:not-a-number"), None);
+    }
+
+    #[test]
+    fn test_resolve_source_path() {
+        assert_eq!(
+            resolve_source_path("tweet:media:1234567890", &Source::X),
+            "https://x.com/i/status/1234567890"
+        );
+        assert_eq!(
+            resolve_source_path("tweet:1234567890", &Source::Tweet),
+            "tweet:1234567890"
+        );
+    }
+
     #[test]
     fn test_youtube_sources() {
         // --- YouTube Video URLs ---

From 741e33c3afc20f31fae06c860bbdbea3cf60f3a9 Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Thu, 2 Apr 2026 18:54:58 +0200
Subject: [PATCH 3/4] Clean up some clanker-written code

Signed-off-by: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
---
 src/downloader/tweets.rs |  4 ++--
 src/main.rs              | 12 ++++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/downloader/tweets.rs b/src/downloader/tweets.rs
index 9e43759..e00c2f1 100644
--- a/src/downloader/tweets.rs
+++ b/src/downloader/tweets.rs
@@ -7,7 +7,7 @@ use std::{
     fs,
     path::{Path, PathBuf},
     process::Command,
-    sync::{Mutex, OnceLock},
+    sync::OnceLock,
 };
 
 use super::local;
@@ -281,7 +281,7 @@ fn archive_asset_reference(
 mod tests {
     use super::*;
     use std::{
-        sync::MutexGuard,
+        sync::{Mutex, MutexGuard},
         time::{SystemTime, UNIX_EPOCH},
     };
 
diff --git a/src/main.rs b/src/main.rs
index dba347c..3352fad 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -356,7 +356,6 @@ fn main() -> Result<()> {
             };
 
             let source = determine_source(path);
-            let resolved_path = resolve_source_path(path, &source);
 
             match source {
                 Source::Other => {
@@ -394,6 +393,7 @@ fn main() -> Result<()> {
             }
 
             // Other sources
+            let path = resolve_source_path(path, &source);
             let hash = match source {
                 Source::YouTubeVideo
                 | Source::X
@@ -402,11 +402,7 @@ fn main() -> Result<()> {
                 | Source::TikTok
                 | Source::Reddit
                 | Source::Snapchat => {
-                    match downloader::ytdlp::download(
-                        resolved_path.clone(),
-                        &store_path,
-                        &timestamp,
-                    ) {
+                    match downloader::ytdlp::download(path.clone(), &store_path, &timestamp) {
                         Ok(h) => h,
                         Err(e) => {
                             eprintln!("Failed to download from YouTube: {e}");
@@ -415,7 +411,7 @@ fn main() -> Result<()> {
                     }
                 }
                 Source::Local => {
-                    match downloader::local::save(resolved_path.clone(), &store_path, &timestamp) {
+                    match downloader::local::save(path.clone(), &store_path, &timestamp) {
                         Ok(h) => h,
                         Err(e) => {
                             eprintln!("Failed to archive local file: {e}");
@@ -435,7 +431,7 @@ fn main() -> Result<()> {
                 | Source::Reddit
                 | Source::Snapchat => ".mp4",
                 Source::Local => {
-                    let p = Path::new(resolved_path.trim_start_matches("file://"));
+                    let p = Path::new(path.trim_start_matches("file://"));
                     &p.extension()
                         .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()))
                 }

From 9837bda0c25aaf99328e31b932159311f6e485c8 Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Thu, 2 Apr 2026 20:59:57 +0200
Subject: [PATCH 4/4] Rename resolve_from_cwd to absolutize_path

Update call sites and tests to use the new API. Adjust tweet scraper
path/credentials handling and make small tweaks to local path hashing
and
raw store helpers.

Signed-off-by: TheGeneralist
<180094941+thegeneralist01@users.noreply.github.com>
Signed-off-by: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
---
 src/downloader/local.rs  |  9 +++++++++
 src/downloader/tweets.rs | 43 +++++++++++++++++++++++++++++++++++-----
 src/main.rs              |  3 ++-
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/downloader/local.rs b/src/downloader/local.rs
index df31a4e..6536aa7 100644
--- a/src/downloader/local.rs
+++ b/src/downloader/local.rs
@@ -31,6 +31,12 @@ pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<Strin
     hash_file(&out_file)
 }
 
+/// Moves `file` into the content-addressed raw store under `store_path`.
+///
+/// The destination path is derived from the file's SHA-256 hash:
+/// `raw/<first-char>/<second-char>/<hash><ext>`. If the destination already
+/// exists the source file is removed (deduplication); otherwise it is renamed.
+/// Returns the store-relative destination path.
 pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
     let hash = hash_file(file)?;
     let destination = raw_relative_path(file, &hash)?;
@@ -49,6 +55,9 @@ pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
     Ok(destination)
 }
 
+/// Computes the store-relative path for a file given its `hash`.
+/// The layout is `raw/<c1>/<c2>/<hash><ext>` where `c1`/`c2` are the first
+/// two characters of the hash, providing a two-level directory sharding.
 fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
     let mut chars = hash.chars();
     let first_letter = chars.next().context("hash must not be empty")?;
diff --git a/src/downloader/tweets.rs b/src/downloader/tweets.rs
index e00c2f1..57014f2 100644
--- a/src/downloader/tweets.rs
+++ b/src/downloader/tweets.rs
@@ -12,6 +12,7 @@ use std::{
 
 use super::local;
 
+/// Returns `Some(id)` if `id` is a non-empty string of ASCII digits, otherwise `None`.
 fn parse_tweet_id(id: &str) -> Option<String> {
     if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
         Some(id.to_string())
@@ -20,11 +21,14 @@ fn parse_tweet_id(id: &str) -> Option<String> {
     }
 }
 
+/// Extracts a tweet ID from an archivr path like `"tweet:123"` by taking the
+/// last colon-separated segment and validating it as a numeric ID.
 fn tweet_id_from_path(path: &str) -> Option<String> {
     path.split(':').next_back().and_then(parse_tweet_id)
 }
 
-fn resolve_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf {
+/// Resolves `path` relative to `cwd` if it is not already absolute.
+fn absolutize_path_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf {
     if path.is_absolute() {
         path
     } else {
@@ -32,6 +36,8 @@ fn resolve_from_cwd(path: PathBuf, cwd: &Path) -> PathBuf {
     }
 }
 
+/// Builds the CLI argument list for the Python tweet scraper.
+/// When `thread` is true, recursive flags are added to follow reply chains.
 fn build_scraper_args(
     tweet_id: &str,
     thread: bool,
@@ -62,15 +68,27 @@ fn build_scraper_args(
     args
 }
 
+/// Archives a tweet (or full thread) identified by `path` (e.g. `"tweet:123"`).
+///
+/// Invokes the Python scraper, then moves all produced media assets into the
+/// content-addressed raw store and rewrites the TOML output to use the new
+/// store-relative paths. Returns `true` if new content was archived, `false`
+/// if the tweet was already present and `thread` is `false`.
+///
+/// Requires `ARCHIVR_TWITTER_CREDENTIALS_FILE` to be set. The scraper binary
+/// can be overridden via `ARCHIVR_TWEET_SCRAPER` and `ARCHIVR_TWEET_PYTHON`.
 pub fn archive(path: &str, thread: bool, store_path: &Path, timestamp: &str) -> Result<bool> {
     let invocation_cwd = env::current_dir().context("Failed to read current working directory")?;
+    // Output directory for Tweet TOML files.
     let output_dir = store_path.join("raw_tweets");
+    // Temporary directory for media assets downloaded by the scraper in `temp/...`.
     let temp_dir = store_path.join("temp").join(timestamp).join("tweets");
     let tweet_id = tweet_id_from_path(path).context("Invalid tweet ID")?;
 
     fs::create_dir_all(&output_dir)?;
     fs::create_dir_all(&temp_dir)?;
 
+    // Path to the root - the to-be-archived tweet's TOML file.
     let root_toml = output_dir.join(format!("tweet-{tweet_id}.toml"));
     if !thread && root_toml.exists() {
         return Ok(false);
@@ -82,12 +100,12 @@ pub fn archive(path: &str, thread: bool, store_path: &Path, timestamp: &str) ->
     let scraper_path = env::var_os("ARCHIVR_TWEET_SCRAPER")
         .map(PathBuf::from)
         .unwrap_or_else(|| PathBuf::from("vendor/twitter/scrape_user_tweet_contents.py"));
-    let scraper_path = resolve_from_cwd(scraper_path, &invocation_cwd);
+    let scraper_path = absolutize_path_from_cwd(scraper_path, &invocation_cwd);
 
     let credentials_file = if let Some(credentials_file) =
         env::var_os("ARCHIVR_TWITTER_CREDENTIALS_FILE")
     {
-        resolve_from_cwd(PathBuf::from(credentials_file), &invocation_cwd)
+        absolutize_path_from_cwd(PathBuf::from(credentials_file), &invocation_cwd)
     } else {
         bail!(
             "Twitter scraping requires ARCHIVR_TWITTER_CREDENTIALS_FILE to point to a cookies file."
@@ -144,6 +162,7 @@ pub fn archive(path: &str, thread: bool, store_path: &Path, timestamp: &str) ->
     Ok(true)
 }
 
+/// Removes the `scraping_summary.toml` file left by the scraper, if present.
 fn cleanup_summary(output_dir: &Path) -> Result<()> {
     let summary_path = output_dir.join("scraping_summary.toml");
     if summary_path.exists() {
@@ -152,6 +171,7 @@ fn cleanup_summary(output_dir: &Path) -> Result<()> {
     Ok(())
 }
 
+/// Returns the set of `tweet-*.toml` files present in `output_dir`.
 fn tweet_toml_files(output_dir: &Path) -> Result<HashSet<PathBuf>> {
     let mut files = HashSet::new();
 
@@ -172,22 +192,27 @@ fn tweet_toml_files(output_dir: &Path) -> Result<HashSet<PathBuf>> {
     Ok(files)
 }
 
+/// Returns the sorted list of TOML files present in `after` but not in `before`.
 fn new_tweet_tomls(before: &HashSet<PathBuf>, after: &HashSet<PathBuf>) -> Vec<PathBuf> {
     let mut files = after.difference(before).cloned().collect::<Vec<_>>();
     files.sort();
     files
 }
 
+/// Returns a lazily-compiled regex matching `avatar_local_path = "..."` in TOML.
 fn avatar_regex() -> &'static Regex {
     static REGEX: OnceLock<Regex> = OnceLock::new();
     REGEX.get_or_init(|| Regex::new(r#"avatar_local_path = "([^"\n]+)""#).unwrap())
 }
 
+/// Returns a lazily-compiled regex matching `local_path = "..."` in TOML.
 fn media_regex() -> &'static Regex {
     static REGEX: OnceLock<Regex> = OnceLock::new();
     REGEX.get_or_init(|| Regex::new(r#"(?m)\blocal_path = "([^"\n]+)""#).unwrap())
 }
 
+/// Rewrites asset paths in each newly-created TOML file, moving assets into
+/// the content-addressed store. Files are written back only if content changed.
 fn rewrite_tweet_outputs(
     tweet_tomls: &[PathBuf],
     output_dir: &Path,
@@ -214,6 +239,10 @@ fn rewrite_tweet_outputs(
     Ok(())
 }
 
+/// Rewrites all `avatar_local_path` and `local_path` references in `contents`,
+/// archiving each referenced file into the raw store and returning the updated
+/// TOML string. `archived_assets` is a cache to avoid re-archiving the same
+/// file when it is referenced by multiple tweets.
 fn rewrite_toml_asset_paths(
     contents: &str,
     output_dir: &Path,
@@ -246,6 +275,10 @@ fn rewrite_toml_asset_paths(
     Ok(rewritten)
 }
 
+/// Archives the asset at `old_path` (relative to `base_dir`) into the raw store
+/// and returns its new store-relative path. Already-archived paths (starting
+/// with `"raw/"`) are returned unchanged. Results are cached in `archived_assets`
+/// by `"<kind>:<old_path>"` key to deduplicate work across TOML files.
 fn archive_asset_reference(
     old_path: &str,
     base_dir: &Path,
@@ -421,13 +454,13 @@ avatar_local_path = "../temp/ts/tweets/media/avatars/avatar.jpg"
 
     #[test]
     fn test_resolve_from_cwd_keeps_absolute_paths() {
-        let path = resolve_from_cwd(PathBuf::from("/tmp/creds.txt"), Path::new("/work"));
+        let path = absolutize_path_from_cwd(PathBuf::from("/tmp/creds.txt"), Path::new("/work"));
         assert_eq!(path, PathBuf::from("/tmp/creds.txt"));
     }
 
     #[test]
     fn test_resolve_from_cwd_expands_relative_paths() {
-        let path = resolve_from_cwd(PathBuf::from("creds.txt"), Path::new("/work"));
+        let path = absolutize_path_from_cwd(PathBuf::from("creds.txt"), Path::new("/work"));
         assert_eq!(path, PathBuf::from("/work/creds.txt"));
     }
 
diff --git a/src/main.rs b/src/main.rs
index 3352fad..31bab27 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -357,6 +357,7 @@ fn main() -> Result<()> {
 
             let source = determine_source(path);
 
+            // Sources: Tweets or Twitter Threads
             match source {
                 Source::Other => {
                     eprintln!("Archiving from this source is not yet implemented.");
@@ -392,7 +393,7 @@ fn main() -> Result<()> {
                 _ => {}
             }
 
-            // Other sources
+            // Sources, for which yt-dlp is needed
             let path = resolve_source_path(path, &source);
             let hash = match source {
                 Source::YouTubeVideo