From 955a5037e9af89d968b5bdf7c2d26431b16a88fc Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Wed, 21 Jan 2026 20:26:58 +0100
Subject: [PATCH 1/9] chore: specify non-ignored `.md` files

---
 .gitignore | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index 5bf848c..75ade1b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,9 @@
 *
 
 !.gitignore
-!*.md
+
+LICENSE.md
+README.md
 
 !src
 !src/**

From 60727c0f1e7a159f52b4fa2fb8379738c092f6f5 Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Wed, 21 Jan 2026 20:29:16 +0100
Subject: [PATCH 2/9] refactor: rename youtube downloader to ytdlp

More generic name since yt-dlp supports many sites beyond YouTube.
---
 src/downloader/mod.rs                   | 3 ++-
 src/downloader/{youtube.rs => ytdlp.rs} | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)
 rename src/downloader/{youtube.rs => ytdlp.rs} (88%)

diff --git a/src/downloader/mod.rs b/src/downloader/mod.rs
index 5e12721..e896201 100644
--- a/src/downloader/mod.rs
+++ b/src/downloader/mod.rs
@@ -1 +1,2 @@
-pub mod youtube;
+pub mod local;
+pub mod ytdlp;
diff --git a/src/downloader/youtube.rs b/src/downloader/ytdlp.rs
similarity index 88%
rename from src/downloader/youtube.rs
rename to src/downloader/ytdlp.rs
index 3af75fa..6ecd7b8 100644
--- a/src/downloader/youtube.rs
+++ b/src/downloader/ytdlp.rs
@@ -4,12 +4,13 @@ use std::{env, path::Path, process::Command};
 use crate::hash::hash_file;
 
 pub fn download(path: String, store_path: &Path, timestamp: &String) -> Result<String> {
-    println!("Downloading from YouTube: {path}");
+    println!("Downloading with yt-dlp: {path}");
 
     let ytdlp = env::var("ARCHIVR_YT_DLP").unwrap_or_else(|_| "yt-dlp".to_string());
 
-    let temp_dir = store_path.join("temp");
+    let temp_dir = store_path.join("temp").join(timestamp);
     std::fs::create_dir_all(&temp_dir)?;
+
     let out_file = temp_dir.join(format!("{timestamp}.mp4"));
 
     let out = Command::new(&ytdlp)

From 935729ac9340ce7a724aa4e54ff2806bfbf8c1ac Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Wed, 21 Jan 2026 20:29:24 +0100
Subject: [PATCH 3/9] feat: add local file downloader

Supports file:// URLs for archiving local files.
---
 src/downloader/local.rs | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 src/downloader/local.rs

diff --git a/src/downloader/local.rs b/src/downloader/local.rs
new file mode 100644
index 0000000..f946a2e
--- /dev/null
+++ b/src/downloader/local.rs
@@ -0,0 +1,28 @@
+use anyhow::{Context, Result, bail};
+use std::{path::Path, process::Command};
+
+use crate::hash::hash_file;
+
+pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<String> {
+    println!("Saving path: {path}");
+
+    let temp_dir = store_path.join("temp").join(timestamp);
+    std::fs::create_dir_all(&temp_dir)?;
+
+    let in_file = Path::new(path.trim_start_matches("file://"));
+    let extension = in_file
+        .extension()
+        .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
+    let out_file = temp_dir.join(format!("{timestamp}{extension}"));
+
+    let mut binding = Command::new("cp");
+    let cmd = binding.arg(in_file).arg(&out_file);
+    let out = cmd.output().with_context(|| "failed to spawn cp process")?;
+
+    if !out.status.success() {
+        let stderr = String::from_utf8_lossy(&out.stderr);
+        bail!("yt-dlp failed: {stderr}");
+    }
+
+    hash_file(&out_file)
+}

From e455f18932fd0024ce043706f97c813dae279031 Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Wed, 21 Jan 2026 20:29:31 +0100
Subject: [PATCH 4/9] deps: add regex crate for URL pattern matching

---
 Cargo.lock | 45 +++++++++++++++++++++++++++++++++++++++++++++
 Cargo.toml |  1 +
 2 files changed, 46 insertions(+)

diff --git a/Cargo.lock b/Cargo.lock
index 8678d20..155a9fc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2,6 +2,15 @@
 # It is not intended for manual editing.
 version = 4
 
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "android_system_properties"
 version = "0.1.5"
@@ -75,6 +84,7 @@ dependencies = [
  "chrono",
  "clap",
  "hex",
+ "regex",
  "sha3",
  "uuid",
 ]
@@ -311,6 +321,12 @@ version = "0.4.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
 
+[[package]]
+name = "memchr"
+version = "2.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -356,6 +372,35 @@ version = "5.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
 
+[[package]]
+name = "regex"
+version = "1.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+
 [[package]]
 name = "rustversion"
 version = "1.0.22"
diff --git a/Cargo.toml b/Cargo.toml
index de61350..f40ba88 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,5 +8,6 @@ anyhow = "1.0.100"
 chrono = "0.4.42"
 clap = { version = "4.5.48", features = ["derive"] }
 hex = "0.4.3"
+regex = "1.12.2"
 sha3 = "0.10.8"
 uuid = { version = "1.18.1", features = ["v4"] }

From 56fa1eaeb9b43c1fcfef37421907d56e3c60fe24 Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Wed, 21 Jan 2026 20:29:59 +0100
Subject: [PATCH 5/9] feat: expand source detection with granular YouTube types

- Split Source::YouTube into YouTubeVideo, YouTubePlaylist, YouTubeChannel
- Add Source::X for Twitter/X posts
- Add Source::Local for file:// URLs
- Add regex-based URL pattern matching for YouTube URLs
- Add shorthand schemes (yt:video/ID, youtube:playlist/ID, etc.)
- Add comprehensive tests for all URL patterns
---
 src/hash.rs |   2 +-
 src/main.rs | 391 +++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 343 insertions(+), 50 deletions(-)

diff --git a/src/hash.rs b/src/hash.rs
index 4bb1da9..cbf1194 100644
--- a/src/hash.rs
+++ b/src/hash.rs
@@ -1,6 +1,6 @@
+use anyhow::Result;
 use sha3::{Digest, Sha3_256};
 use std::{fs::File, io::Read, path::Path};
-use anyhow::Result;
 
 pub fn hash_file(path: &Path) -> Result<String> {
     let mut file = File::open(path)?;
diff --git a/src/main.rs b/src/main.rs
index 4deb3b8..f2e6de1 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -32,6 +32,8 @@ enum Command {
         /// Store path - path to store the archived files in.
         /// Structure will be:
         /// store_path/
+        ///   temp/
+        ///     ...
         ///   raw/
         ///     ...
         ///   structured/
@@ -42,6 +44,10 @@ enum Command {
         /// Name of the archive
         #[arg(short, long)]
         name: String,
+
+        /// Wipe existing .archivr repository data
+        #[arg(long = "force-with-info-removal")]
+        force_with_info_removal: bool,
     },
 }
 
@@ -58,15 +64,80 @@ fn get_archive_path() -> Option<PathBuf> {
     None
 }
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 enum Source {
-    YouTube,
+    YouTubeVideo,
+    YouTubePlaylist,
+    YouTubeChannel,
+    X,
+    Local,
     Other,
 }
 
+// INFO: yt-dlp supports a lot of sites; so, when archiving (for example) a website, the user
+// -> should be asked whether they want to archive the whole website or just the video(s) on it.
 fn determine_source(path: &str) -> Source {
-    if path.starts_with("http://") || path.starts_with("https://") {
-        return Source::YouTube;
+    // INFO: Extractors' URLs can be found here:
+    // -> https://github.com/yt-dlp/yt-dlp/tree/dfc0a84c192a7357dd1768cc345d590253a14fe5/yt_dlp/extractor
+    // TEST: X posts can have multiple videos.
+
+    // Shorthand schemes: yt: or youtube:
+    if path.starts_with("yt:") || path.starts_with("youtube:") {
+        let after_scheme = if path.starts_with("yt:") {
+            &path[3..]
+        } else {
+            &path[8..]
+        };
+
+        // video/ID, short/ID, shorts/ID
+        if after_scheme.starts_with("video/")
+            || after_scheme.starts_with("short/")
+            || after_scheme.starts_with("shorts/")
+        {
+            return Source::YouTubeVideo;
+        }
+
+        // playlist/ID
+        if after_scheme.starts_with("playlist/") {
+            return Source::YouTubePlaylist;
+        }
+
+        // channel/ID, c/ID, user/ID, @handle
+        if after_scheme.starts_with("channel/")
+            || after_scheme.starts_with("c/")
+            || after_scheme.starts_with("user/")
+            || after_scheme.starts_with("@")
+        {
+            return Source::YouTubeChannel;
+        }
+    }
+
+    if path.starts_with("file://") {
+        return Source::Local;
+    } else if path.starts_with("http://") || path.starts_with("https://") {
+        // Video URLs (watch, youtu.be, shorts)
+        let video_re = regex::Regex::new(r"^https?://(?:www\.)?(?:youtu\.be/[0-9A-Za-z_-]+|youtube\.com/watch\?v=[0-9A-Za-z_-]+|youtube\.com/shorts/[0-9A-Za-z_-]+)").unwrap();
+        if video_re.is_match(path) {
+            return Source::YouTubeVideo;
+        }
+
+        // Playlist URLs
+        let playlist_re =
+            regex::Regex::new(r"^https?://(?:www\.)?youtube\.com/playlist\?list=[0-9A-Za-z_-]+")
+                .unwrap();
+        if playlist_re.is_match(path) {
+            return Source::YouTubePlaylist;
+        }
+
+        // Channel or user URLs (channel IDs, /c/, /user/, or @handles)
+        let channel_re = regex::Regex::new(r"^https?://(?:www\.)?youtube\.com/(?:channel/[0-9A-Za-z_-]+|c/[0-9A-Za-z_-]+|user/[0-9A-Za-z_-]+|@[0-9A-Za-z_-]+)").unwrap();
+        if channel_re.is_match(path) {
+            return Source::YouTubeChannel;
+        }
+
+        if path.starts_with("https://x.com/") {
+            return Source::X;
+        }
     }
     Source::Other
 }
@@ -136,54 +207,92 @@ fn main() -> Result<()> {
             let timestamp = Local::now().format("%Y-%m-%dT%H-%M-%S%.3f").to_string();
 
             let source = determine_source(path);
-            if let Source::YouTube = source {
-                let store_path_string_file = archive_path.unwrap().join("store_path");
-                let store_path = match fs::read_to_string(store_path_string_file) {
-                    Ok(p) => PathBuf::from(p.trim()),
-                    Err(e) => {
-                        eprintln!("Failed to read store path: {e}");
-                        process::exit(1);
-                    }
-                };
+            if let Source::Other = source {
+                eprintln!("Archiving from this source is not yet implemented.");
+                process::exit(1);
+            }
 
-                let hash =
-                    match downloader::youtube::download(path.clone(), &store_path, &timestamp) {
+            let store_path_string_file = archive_path.unwrap().join("store_path");
+            let store_path = match fs::read_to_string(store_path_string_file) {
+                Ok(p) => PathBuf::from(p.trim()),
+                Err(e) => {
+                    eprintln!("Failed to read store path: {e}");
+                    process::exit(1);
+                }
+            };
+
+            let hash = match source {
+                Source::YouTubeVideo | Source::X => {
+                    match downloader::ytdlp::download(path.clone(), &store_path, &timestamp) {
                         Ok(h) => h,
                         Err(e) => {
                             eprintln!("Failed to download from YouTube: {e}");
                             process::exit(1);
                         }
-                    };
-
-                let hash_exists = hash_exists(format!("{hash}.mp4"), &store_path);
-                // TODO: check for repeated archives?
-                // There could be one of the following:
-                // - We are literally archiving the same path over again.
-                // - We are archiving a different path, which had this file. E.g.: we archived a
-                // website before which had this YouTube video, and while recursively archiving
-                // everything, we also archived the YouTube video although it wasn't our main
-                // target. This means that we should archive again; whereas with the first case...
-                // Not sure. Need to think about this.
-                // ----
-                // Thinking about it a day later...
-                // If we are specifically archiving a YouTube video, it could also be two of the
-                // above. So yeah, just create a new DB entry and symlink the Raw to the Structured
-                // Dir or whatever. it's midnight and my brain ain't wording/braining.
-                if hash_exists {
-                    println!("File already archived.");
-                    process::exit(0);
-                } else {
-                    move_temp_to_raw(
-                        &store_path.join("temp").join(format!("{timestamp}.mp4")),
-                        &hash,
-                        &store_path,
-                    )?;
-
-                    println!("File archived successfully.");
+                    }
                 }
+                Source::Local => {
+                    match downloader::local::save(path.clone(), &store_path, &timestamp) {
+                        Ok(h) => h,
+                        Err(e) => {
+                            eprintln!("Failed to archive local file: {e}");
+                            process::exit(1);
+                        }
+                    }
+                }
+                _ => unreachable!(),
+            };
+
+            let file_extension = match source {
+                Source::YouTubeVideo | Source::X => ".mp4",
+                Source::Local => {
+                    let p = Path::new(path.trim_start_matches("file://"));
+                    &p.extension()
+                        .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()))
+                }
+                _ => "",
+            };
+
+            let hash_exists = hash_exists(format!("{hash}{file_extension}"), &store_path);
+
+            // TODO: check for repeated archives?
+            // There could be one of the following:
+            // - We are literally archiving the same path over again.
+            // - We are archiving a different path, which had this file. E.g.: we archived a
+            // website before which had this YouTube video, and while recursively archiving
+            // everything, we also archived the YouTube video although it wasn't our main
+            // target. This means that we should archive again; whereas with the first case...
+            // Not sure. Need to think about this.
+            // ----
+            // Thinking about it a day later...
+            // If we are specifically archiving a YouTube video, it could also be two of the
+            // above. So yeah, just create a new DB entry and symlink the Raw to the Structured
+            // Dir or whatever. it's midnight and my brain ain't wording/braining.
+            if hash_exists {
+                println!("File already archived.");
+                let _ = fs::remove_file(
+                    store_path
+                        .join("temp")
+                        .join(&timestamp)
+                        .join(format!("{timestamp}{file_extension}")),
+                );
+                process::exit(0);
+            } else {
+                move_temp_to_raw(
+                    &store_path
+                        .join("temp")
+                        .join(&timestamp)
+                        .join(format!("{timestamp}{file_extension}")),
+                    &hash,
+                    &store_path,
+                )?;
+
+                println!("File archived successfully.");
             }
 
             // TODO: DB INSERT, inserting a record
+            // https://github.com/rusqlite/rusqlite
+            // Think of the DB schema
 
             Ok(())
         }
@@ -192,6 +301,7 @@ fn main() -> Result<()> {
             path: ref archive_path_string,
             store_path: ref store_path_string,
             name: ref archive_name,
+            force_with_info_removal,
         } => {
             let archive_path = Path::new(&archive_path_string).join(".archivr");
             let store_path = if Path::new(&store_path_string).is_relative() {
@@ -201,16 +311,26 @@ fn main() -> Result<()> {
             };
 
             if archive_path.exists() {
-                // TODO: check if there is nothing inside. if there is nothing inside, use it
-                eprintln!("Archive already exists at {}", archive_path.display());
-                if store_path.exists() {
-                    eprintln!("Store path already exists at {}", store_path.display());
+                if !archive_path.is_dir() {
+                    eprintln!(
+                        "Archive path exists and is not a directory: {}",
+                        archive_path.display()
+                    );
+                    process::exit(1);
+                }
+
+                if force_with_info_removal {
+                    fs::remove_dir_all(&archive_path)?;
+                } else if fs::read_dir(&archive_path)?.next().is_some() {
+                    eprintln!(
+                        "Archive already exists at {} and is not empty. Use --force-with-info-removal to reinitialize.",
+                        archive_path.display()
+                    );
                     process::exit(1);
                 }
-                process::exit(1);
             }
-            if store_path.exists() {
-                // TODO: check if the structure is correct. If so, use it.
+
+            if store_path.exists() && !force_with_info_removal {
                 eprintln!("Store path already exists at {}", store_path.display());
                 process::exit(1);
             }
@@ -232,3 +352,176 @@ fn main() -> Result<()> {
         } // _ => eprintln!("Unknown command: {:?}", args.command),
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    struct TestCase<'a> {
+        url: &'a str,
+        expected: Source,
+    }
+
+    #[test]
+    fn test_youtube_sources() {
+        // --- YouTube Video URLs ---
+        let video_cases = [
+            TestCase {
+                url: "https://www.youtube.com/watch?v=UHxw-L2WyyY",
+                expected: Source::YouTubeVideo,
+            },
+            TestCase {
+                url: "https://youtu.be/UHxw-L2WyyY",
+                expected: Source::YouTubeVideo,
+            },
+            TestCase {
+                url: "https://www.youtube.com/shorts/EtC99eWiwRI",
+                expected: Source::YouTubeVideo,
+            },
+        ];
+
+        for case in &video_cases {
+            assert_eq!(
+                determine_source(case.url),
+                case.expected,
+                "Failed for URL: {}",
+                case.url
+            );
+        }
+
+        // --- YouTube Playlist URLs ---
+        let playlist_cases = [TestCase {
+            url: "https://www.youtube.com/playlist?list=PL9vTTBa7QaQOoMfpP3ztvgyQkPWDPfJez",
+            expected: Source::YouTubePlaylist,
+        }];
+
+        for case in &playlist_cases {
+            assert_eq!(
+                determine_source(case.url),
+                case.expected,
+                "Failed for URL: {}",
+                case.url
+            );
+        }
+
+        // --- YouTube Channel URLs ---
+        let channel_cases = [
+            TestCase {
+                url: "https://www.youtube.com/channel/CoreDumpped",
+                expected: Source::YouTubeChannel,
+            },
+            TestCase {
+                url: "https://www.youtube.com/@CoreDumpped",
+                expected: Source::YouTubeChannel,
+            },
+            TestCase {
+                url: "https://www.youtube.com/c/YouTubeCreators",
+                expected: Source::YouTubeChannel,
+            },
+            TestCase {
+                url: "https://www.youtube.com/user/pewdiepie",
+                expected: Source::YouTubeChannel,
+            },
+            TestCase {
+                url: "https://youtube.com/@pewdiepie?si=KOcLN_KPYNpe5f_8",
+                expected: Source::YouTubeChannel,
+            },
+        ];
+
+        for case in &channel_cases {
+            assert_eq!(
+                determine_source(case.url),
+                case.expected,
+                "Failed for URL: {}",
+                case.url
+            );
+        }
+
+        // --- Shorthand scheme URLs ---
+        let shorthand_cases = [
+            // Videos
+            TestCase {
+                url: "yt:video/UHxw-L2WyyY",
+                expected: Source::YouTubeVideo,
+            },
+            TestCase {
+                url: "youtube:video/UHxw-L2WyyY",
+                expected: Source::YouTubeVideo,
+            },
+            TestCase {
+                url: "yt:short/EtC99eWiwRI",
+                expected: Source::YouTubeVideo,
+            },
+            TestCase {
+                url: "yt:shorts/EtC99eWiwRI",
+                expected: Source::YouTubeVideo,
+            },
+            TestCase {
+                url: "youtube:shorts/EtC99eWiwRI",
+                expected: Source::YouTubeVideo,
+            },
+            // Playlists
+            TestCase {
+                url: "yt:playlist/PL9vTTBa7QaQOoMfpP3ztvgyQkPWDPfJez",
+                expected: Source::YouTubePlaylist,
+            },
+            TestCase {
+                url: "youtube:playlist/PL9vTTBa7QaQOoMfpP3ztvgyQkPWDPfJez",
+                expected: Source::YouTubePlaylist,
+            },
+            // Channels
+            TestCase {
+                url: "yt:channel/UCxyz123",
+                expected: Source::YouTubeChannel,
+            },
+            TestCase {
+                url: "yt:c/YouTubeCreators",
+                expected: Source::YouTubeChannel,
+            },
+            TestCase {
+                url: "yt:user/pewdiepie",
+                expected: Source::YouTubeChannel,
+            },
+            TestCase {
+                url: "youtube:@CoreDumpped",
+                expected: Source::YouTubeChannel,
+            },
+        ];
+
+        for case in &shorthand_cases {
+            assert_eq!(
+                determine_source(case.url),
+                case.expected,
+                "Failed for URL: {}",
+                case.url
+            );
+        }
+    }
+
+    #[test]
+    fn test_non_youtube_sources() {
+        let other_cases = [
+            TestCase {
+                url: "file:///local/path/file.mp4",
+                expected: Source::Local,
+            },
+            TestCase {
+                url: "https://x.com/some_post",
+                expected: Source::X,
+            },
+            TestCase {
+                url: "https://example.com/",
+                expected: Source::Other,
+            },
+        ];
+
+        for case in &other_cases {
+            assert_eq!(
+                determine_source(case.url),
+                case.expected,
+                "Failed for URL: {}",
+                case.url
+            );
+        }
+    }
+}

From b3677d627c684ff0ae8d82e472bdf7098d5948cd Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Wed, 21 Jan 2026 20:30:15 +0100
Subject: [PATCH 6/9] docs: update README milestones

Mark YouTube videos, Twitter videos, and local files as done.
---
 README.md | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index f59bfab..12c6af6 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,26 @@
 # archivr
 
-An open-source self-hosted archiving solution. Work in progress.
+An open-source self-hosted archiving tool. Work in progress.
 
 ## Milestones
 - [ ] Archiving
     - [ ] Archiving media files from social media platforms
-        - [ ] YouTube
-        - [ ] Twitter
+        - [X] YouTube Videos
+        - [X] Twitter Videos
         - [ ] Instagram
         - [ ] Facebook
         - [ ] TikTok
         - [ ] Reddit
         - [ ] Snapchat
+        - [ ] YouTube Posts (?)
         - (Some of these could be postponed for later.)
-    - [ ] Archiving local files
-        - [ ] Archive videos (MP4, WebM)
-        - [ ] Archive audio files (MP3, WAV)
-        - [ ] Archive documents (DOCX, XLSX, PPTX)
-        - [ ] Archive PDFs
-        - [ ] Archive images (JPEG, PNG, GIF)
+    - [X] Archiving local files
     - [ ] Archiving files from cloud storage services (Google Drive, Dropbox, OneDrive) and from URLs
         - [ ] URLs
         - [ ] Google Drive
         - [ ] Dropbox
         - [ ] OneDrive
+    - [ ] Archiving Twitter threads
     - [ ] Archive web pages (HTML, CSS, JS, images)
     - [ ] Archiving emails (???)
         - [ ] Gmail

From 76fce7f91e34979cb326ee3af282df4bc4ba78d7 Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Wed, 21 Jan 2026 20:30:23 +0100
Subject: [PATCH 7/9] chore: update flake.lock

---
 flake.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flake.lock b/flake.lock
index fd86e53..d406848 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,11 +2,11 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1760284886,
-        "narHash": "sha256-TK9Kr0BYBQ/1P5kAsnNQhmWWKgmZXwUQr4ZMjCzWf2c=",
+        "lastModified": 1761672384,
+        "narHash": "sha256-o9KF3DJL7g7iYMZq9SWgfS1BFlNbsm6xplRjVlOCkXI=",
         "owner": "nixos",
         "repo": "nixpkgs",
-        "rev": "cf3f5c4def3c7b5f1fc012b3d839575dbe552d43",
+        "rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c",
         "type": "github"
       },
       "original": {

From 20d85146961d6108acbf6a0785ce13f8aa8abc07 Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Wed, 21 Jan 2026 20:42:00 +0100
Subject: [PATCH 8/9] feat: add shorthand schemes for X/Twitter media

---
 src/main.rs | 47 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 11 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index f2e6de1..240ea55 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -82,13 +82,10 @@ fn determine_source(path: &str) -> Source {
     // TEST: X posts can have multiple videos.
 
     // Shorthand schemes: yt: or youtube:
-    if path.starts_with("yt:") || path.starts_with("youtube:") {
-        let after_scheme = if path.starts_with("yt:") {
-            &path[3..]
-        } else {
-            &path[8..]
-        };
-
+    if let Some(after_scheme) = path
+        .strip_prefix("yt:")
+        .or_else(|| path.strip_prefix("youtube:"))
+    {
         // video/ID, short/ID, shorts/ID
         if after_scheme.starts_with("video/")
             || after_scheme.starts_with("short/")
@@ -112,6 +109,11 @@ fn determine_source(path: &str) -> Source {
         }
     }
 
+    // Shorthand schemes: x: or twitter:
+    if path.starts_with("x:") || path.starts_with("twitter:") {
+        return Source::X;
+    }
+
     if path.starts_with("file://") {
         return Source::Local;
     } else if path.starts_with("http://") || path.starts_with("https://") {
@@ -498,6 +500,33 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_x_sources() {
+        let x_cases = [
+            TestCase {
+                url: "https://x.com/some_post",
+                expected: Source::X,
+            },
+            TestCase {
+                url: "x:1234567890",
+                expected: Source::X,
+            },
+            TestCase {
+                url: "twitter:1234567890",
+                expected: Source::X,
+            },
+        ];
+
+        for case in &x_cases {
+            assert_eq!(
+                determine_source(case.url),
+                case.expected,
+                "Failed for URL: {}",
+                case.url
+            );
+        }
+    }
+
     #[test]
     fn test_non_youtube_sources() {
         let other_cases = [
@@ -505,10 +534,6 @@ mod tests {
                 url: "file:///local/path/file.mp4",
                 expected: Source::Local,
             },
-            TestCase {
-                url: "https://x.com/some_post",
-                expected: Source::X,
-            },
             TestCase {
                 url: "https://example.com/",
                 expected: Source::Other,

From df05687ad97a5c7ed5bb49bdcad32861e638d1f1 Mon Sep 17 00:00:00 2001
From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com>
Date: Wed, 21 Jan 2026 20:47:46 +0100
Subject: [PATCH 9/9] chore: move docs into docs dir

---
 .gitignore                    | 4 ++--
 LICENSE.md => docs/LICENSE.md | 0
 README.md => docs/README.md   | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename LICENSE.md => docs/LICENSE.md (100%)
 rename README.md => docs/README.md (100%)

diff --git a/.gitignore b/.gitignore
index 75ade1b..c8ea956 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,8 +2,8 @@
 
 !.gitignore
 
-LICENSE.md
-README.md
+!docs
+!docs/**
 
 !src
 !src/**
diff --git a/LICENSE.md b/docs/LICENSE.md
similarity index 100%
rename from LICENSE.md
rename to docs/LICENSE.md
diff --git a/README.md b/docs/README.md
similarity index 100%
rename from README.md
rename to docs/README.md