From 955a5037e9af89d968b5bdf7c2d26431b16a88fc Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Wed, 21 Jan 2026 20:26:58 +0100 Subject: [PATCH 1/9] chore: specify non-ignored `.md` files --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5bf848c..75ade1b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ * !.gitignore -!*.md + +LICENSE.md +README.md !src !src/** From 60727c0f1e7a159f52b4fa2fb8379738c092f6f5 Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Wed, 21 Jan 2026 20:29:16 +0100 Subject: [PATCH 2/9] refactor: rename youtube downloader to ytdlp More generic name since yt-dlp supports many sites beyond YouTube. --- src/downloader/mod.rs | 3 ++- src/downloader/{youtube.rs => ytdlp.rs} | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) rename src/downloader/{youtube.rs => ytdlp.rs} (88%) diff --git a/src/downloader/mod.rs b/src/downloader/mod.rs index 5e12721..e896201 100644 --- a/src/downloader/mod.rs +++ b/src/downloader/mod.rs @@ -1 +1,2 @@ -pub mod youtube; +pub mod local; +pub mod ytdlp; diff --git a/src/downloader/youtube.rs b/src/downloader/ytdlp.rs similarity index 88% rename from src/downloader/youtube.rs rename to src/downloader/ytdlp.rs index 3af75fa..6ecd7b8 100644 --- a/src/downloader/youtube.rs +++ b/src/downloader/ytdlp.rs @@ -4,12 +4,13 @@ use std::{env, path::Path, process::Command}; use crate::hash::hash_file; pub fn download(path: String, store_path: &Path, timestamp: &String) -> Result { - println!("Downloading from YouTube: {path}"); + println!("Downloading with yt-dlp: {path}"); let ytdlp = env::var("ARCHIVR_YT_DLP").unwrap_or_else(|_| "yt-dlp".to_string()); - let temp_dir = store_path.join("temp"); + let temp_dir = store_path.join("temp").join(timestamp); std::fs::create_dir_all(&temp_dir)?; + let out_file = temp_dir.join(format!("{timestamp}.mp4")); let out = Command::new(&ytdlp) From 935729ac9340ce7a724aa4e54ff2806bfbf8c1ac Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Wed, 21 Jan 2026 20:29:24 +0100 Subject: [PATCH 3/9] feat: add local file downloader Supports file:// URLs for archiving local files. --- src/downloader/local.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 src/downloader/local.rs diff --git a/src/downloader/local.rs b/src/downloader/local.rs new file mode 100644 index 0000000..f946a2e --- /dev/null +++ b/src/downloader/local.rs @@ -0,0 +1,28 @@ +use anyhow::{Context, Result, bail}; +use std::{path::Path, process::Command}; + +use crate::hash::hash_file; + +pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result { + println!("Saving path: {path}"); + + let temp_dir = store_path.join("temp").join(timestamp); + std::fs::create_dir_all(&temp_dir)?; + + let in_file = Path::new(path.trim_start_matches("file://")); + let extension = in_file + .extension() + .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy())); + let out_file = temp_dir.join(format!("{timestamp}{extension}")); + + let mut binding = Command::new("cp"); + let cmd = binding.arg(in_file).arg(&out_file); + let out = cmd.output().with_context(|| "failed to spawn cp process")?; + + if !out.status.success() { + let stderr = String::from_utf8_lossy(&out.stderr); + bail!("yt-dlp failed: {stderr}"); + } + + hash_file(&out_file) +} From e455f18932fd0024ce043706f97c813dae279031 Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Wed, 21 Jan 2026 20:29:31 +0100 Subject: [PATCH 4/9] deps: add regex crate for URL pattern matching --- Cargo.lock | 45 +++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + 2 files changed, 46 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 8678d20..155a9fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -75,6 +84,7 @@ dependencies = [ "chrono", "clap", "hex", + "regex", "sha3", "uuid", ] @@ -311,6 +321,12 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + [[package]] name = "num-traits" version = "0.2.19" @@ -356,6 +372,35 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + [[package]] name = "rustversion" version = "1.0.22" diff --git a/Cargo.toml b/Cargo.toml index de61350..f40ba88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,5 +8,6 @@ anyhow = "1.0.100" chrono = "0.4.42" clap = { version = "4.5.48", features = ["derive"] } hex = "0.4.3" +regex = "1.12.2" sha3 = "0.10.8" uuid = { version = "1.18.1", features = ["v4"] } From 56fa1eaeb9b43c1fcfef37421907d56e3c60fe24 Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Wed, 21 Jan 2026 20:29:59 +0100 Subject: [PATCH 5/9] feat: expand source detection with granular YouTube types - Split Source::YouTube into YouTubeVideo, YouTubePlaylist, YouTubeChannel - Add Source::X for Twitter/X posts - Add Source::Local for file:// URLs - Add regex-based URL pattern matching for YouTube URLs - Add shorthand schemes (yt:video/ID, youtube:playlist/ID, etc.) - Add comprehensive tests for all URL patterns --- src/hash.rs | 2 +- src/main.rs | 391 +++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 343 insertions(+), 50 deletions(-) diff --git a/src/hash.rs b/src/hash.rs index 4bb1da9..cbf1194 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -1,6 +1,6 @@ +use anyhow::Result; use sha3::{Digest, Sha3_256}; use std::{fs::File, io::Read, path::Path}; -use anyhow::Result; pub fn hash_file(path: &Path) -> Result { let mut file = File::open(path)?; diff --git a/src/main.rs b/src/main.rs index 4deb3b8..f2e6de1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,6 +32,8 @@ enum Command { /// Store path - path to store the archived files in. /// Structure will be: /// store_path/ + /// temp/ + /// ... /// raw/ /// ... /// structured/ @@ -42,6 +44,10 @@ enum Command { /// Name of the archive #[arg(short, long)] name: String, + + /// Wipe existing .archivr repository data + #[arg(long = "force-with-info-removal")] + force_with_info_removal: bool, }, } @@ -58,15 +64,80 @@ fn get_archive_path() -> Option { None } -#[derive(Debug)] +#[derive(Debug, PartialEq)] enum Source { - YouTube, + YouTubeVideo, + YouTubePlaylist, + YouTubeChannel, + X, + Local, Other, } +// INFO: yt-dlp supports a lot of sites; so, when archiving (for example) a website, the user +// -> should be asked whether they want to archive the whole website or just the video(s) on it. fn determine_source(path: &str) -> Source { - if path.starts_with("http://") || path.starts_with("https://") { - return Source::YouTube; + // INFO: Extractors' URLs can be found here: + // -> https://github.com/yt-dlp/yt-dlp/tree/dfc0a84c192a7357dd1768cc345d590253a14fe5/yt_dlp/extractor + // TEST: X posts can have multiple videos. + + // Shorthand schemes: yt: or youtube: + if path.starts_with("yt:") || path.starts_with("youtube:") { + let after_scheme = if path.starts_with("yt:") { + &path[3..] + } else { + &path[8..] + }; + + // video/ID, short/ID, shorts/ID + if after_scheme.starts_with("video/") + || after_scheme.starts_with("short/") + || after_scheme.starts_with("shorts/") + { + return Source::YouTubeVideo; + } + + // playlist/ID + if after_scheme.starts_with("playlist/") { + return Source::YouTubePlaylist; + } + + // channel/ID, c/ID, user/ID, @handle + if after_scheme.starts_with("channel/") + || after_scheme.starts_with("c/") + || after_scheme.starts_with("user/") + || after_scheme.starts_with("@") + { + return Source::YouTubeChannel; + } + } + + if path.starts_with("file://") { + return Source::Local; + } else if path.starts_with("http://") || path.starts_with("https://") { + // Video URLs (watch, youtu.be, shorts) + let video_re = regex::Regex::new(r"^https?://(?:www\.)?(?:youtu\.be/[0-9A-Za-z_-]+|youtube\.com/watch\?v=[0-9A-Za-z_-]+|youtube\.com/shorts/[0-9A-Za-z_-]+)").unwrap(); + if video_re.is_match(path) { + return Source::YouTubeVideo; + } + + // Playlist URLs + let playlist_re = + regex::Regex::new(r"^https?://(?:www\.)?youtube\.com/playlist\?list=[0-9A-Za-z_-]+") + .unwrap(); + if playlist_re.is_match(path) { + return Source::YouTubePlaylist; + } + + // Channel or user URLs (channel IDs, /c/, /user/, or @handles) + let channel_re = regex::Regex::new(r"^https?://(?:www\.)?youtube\.com/(?:channel/[0-9A-Za-z_-]+|c/[0-9A-Za-z_-]+|user/[0-9A-Za-z_-]+|@[0-9A-Za-z_-]+)").unwrap(); + if channel_re.is_match(path) { + return Source::YouTubeChannel; + } + + if path.starts_with("https://x.com/") { + return Source::X; + } } Source::Other } @@ -136,54 +207,92 @@ fn main() -> Result<()> { let timestamp = Local::now().format("%Y-%m-%dT%H-%M-%S%.3f").to_string(); let source = determine_source(path); - if let Source::YouTube = source { - let store_path_string_file = archive_path.unwrap().join("store_path"); - let store_path = match fs::read_to_string(store_path_string_file) { - Ok(p) => PathBuf::from(p.trim()), - Err(e) => { - eprintln!("Failed to read store path: {e}"); - process::exit(1); - } - }; + if let Source::Other = source { + eprintln!("Archiving from this source is not yet implemented."); + process::exit(1); + } - let hash = - match downloader::youtube::download(path.clone(), &store_path, ×tamp) { + let store_path_string_file = archive_path.unwrap().join("store_path"); + let store_path = match fs::read_to_string(store_path_string_file) { + Ok(p) => PathBuf::from(p.trim()), + Err(e) => { + eprintln!("Failed to read store path: {e}"); + process::exit(1); + } + }; + + let hash = match source { + Source::YouTubeVideo | Source::X => { + match downloader::ytdlp::download(path.clone(), &store_path, ×tamp) { Ok(h) => h, Err(e) => { eprintln!("Failed to download from YouTube: {e}"); process::exit(1); } - }; - - let hash_exists = hash_exists(format!("{hash}.mp4"), &store_path); - // TODO: check for repeated archives? - // There could be one of the following: - // - We are literally archiving the same path over again. - // - We are archiving a different path, which had this file. E.g.: we archived a - // website before which had this YouTube video, and while recursively archiving - // everything, we also archived the YouTube video although it wasn't our main - // target. This means that we should archive again; whereas with the first case... - // Not sure. Need to think about this. - // ---- - // Thinking about it a day later... - // If we are specifically archiving a YouTube video, it could also be two of the - // above. So yeah, just create a new DB entry and symlink the Raw to the Structured - // Dir or whatever. it's midnight and my brain ain't wording/braining. - if hash_exists { - println!("File already archived."); - process::exit(0); - } else { - move_temp_to_raw( - &store_path.join("temp").join(format!("{timestamp}.mp4")), - &hash, - &store_path, - )?; - - println!("File archived successfully."); + } } + Source::Local => { + match downloader::local::save(path.clone(), &store_path, ×tamp) { + Ok(h) => h, + Err(e) => { + eprintln!("Failed to archive local file: {e}"); + process::exit(1); + } + } + } + _ => unreachable!(), + }; + + let file_extension = match source { + Source::YouTubeVideo | Source::X => ".mp4", + Source::Local => { + let p = Path::new(path.trim_start_matches("file://")); + &p.extension() + .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy())) + } + _ => "", + }; + + let hash_exists = hash_exists(format!("{hash}{file_extension}"), &store_path); + + // TODO: check for repeated archives? + // There could be one of the following: + // - We are literally archiving the same path over again. + // - We are archiving a different path, which had this file. E.g.: we archived a + // website before which had this YouTube video, and while recursively archiving + // everything, we also archived the YouTube video although it wasn't our main + // target. This means that we should archive again; whereas with the first case... + // Not sure. Need to think about this. + // ---- + // Thinking about it a day later... + // If we are specifically archiving a YouTube video, it could also be two of the + // above. So yeah, just create a new DB entry and symlink the Raw to the Structured + // Dir or whatever. it's midnight and my brain ain't wording/braining. + if hash_exists { + println!("File already archived."); + let _ = fs::remove_file( + store_path + .join("temp") + .join(×tamp) + .join(format!("{timestamp}{file_extension}")), + ); + process::exit(0); + } else { + move_temp_to_raw( + &store_path + .join("temp") + .join(×tamp) + .join(format!("{timestamp}{file_extension}")), + &hash, + &store_path, + )?; + + println!("File archived successfully."); } // TODO: DB INSERT, inserting a record + // https://github.com/rusqlite/rusqlite + // Think of the DB schema Ok(()) } @@ -192,6 +301,7 @@ fn main() -> Result<()> { path: ref archive_path_string, store_path: ref store_path_string, name: ref archive_name, + force_with_info_removal, } => { let archive_path = Path::new(&archive_path_string).join(".archivr"); let store_path = if Path::new(&store_path_string).is_relative() { @@ -201,16 +311,26 @@ fn main() -> Result<()> { }; if archive_path.exists() { - // TODO: check if there is nothing inside. if there is nothing inside, use it - eprintln!("Archive already exists at {}", archive_path.display()); - if store_path.exists() { - eprintln!("Store path already exists at {}", store_path.display()); + if !archive_path.is_dir() { + eprintln!( + "Archive path exists and is not a directory: {}", + archive_path.display() + ); + process::exit(1); + } + + if force_with_info_removal { + fs::remove_dir_all(&archive_path)?; + } else if fs::read_dir(&archive_path)?.next().is_some() { + eprintln!( + "Archive already exists at {} and is not empty. Use --force-with-info-removal to reinitialize.", + archive_path.display() + ); process::exit(1); } - process::exit(1); } - if store_path.exists() { - // TODO: check if the structure is correct. If so, use it. + + if store_path.exists() && !force_with_info_removal { eprintln!("Store path already exists at {}", store_path.display()); process::exit(1); } @@ -232,3 +352,176 @@ fn main() -> Result<()> { } // _ => eprintln!("Unknown command: {:?}", args.command), } } + +#[cfg(test)] +mod tests { + use super::*; + + struct TestCase<'a> { + url: &'a str, + expected: Source, + } + + #[test] + fn test_youtube_sources() { + // --- YouTube Video URLs --- + let video_cases = [ + TestCase { + url: "https://www.youtube.com/watch?v=UHxw-L2WyyY", + expected: Source::YouTubeVideo, + }, + TestCase { + url: "https://youtu.be/UHxw-L2WyyY", + expected: Source::YouTubeVideo, + }, + TestCase { + url: "https://www.youtube.com/shorts/EtC99eWiwRI", + expected: Source::YouTubeVideo, + }, + ]; + + for case in &video_cases { + assert_eq!( + determine_source(case.url), + case.expected, + "Failed for URL: {}", + case.url + ); + } + + // --- YouTube Playlist URLs --- + let playlist_cases = [TestCase { + url: "https://www.youtube.com/playlist?list=PL9vTTBa7QaQOoMfpP3ztvgyQkPWDPfJez", + expected: Source::YouTubePlaylist, + }]; + + for case in &playlist_cases { + assert_eq!( + determine_source(case.url), + case.expected, + "Failed for URL: {}", + case.url + ); + } + + // --- YouTube Channel URLs --- + let channel_cases = [ + TestCase { + url: "https://www.youtube.com/channel/CoreDumpped", + expected: Source::YouTubeChannel, + }, + TestCase { + url: "https://www.youtube.com/@CoreDumpped", + expected: Source::YouTubeChannel, + }, + TestCase { + url: "https://www.youtube.com/c/YouTubeCreators", + expected: Source::YouTubeChannel, + }, + TestCase { + url: "https://www.youtube.com/user/pewdiepie", + expected: Source::YouTubeChannel, + }, + TestCase { + url: "https://youtube.com/@pewdiepie?si=KOcLN_KPYNpe5f_8", + expected: Source::YouTubeChannel, + }, + ]; + + for case in &channel_cases { + assert_eq!( + determine_source(case.url), + case.expected, + "Failed for URL: {}", + case.url + ); + } + + // --- Shorthand scheme URLs --- + let shorthand_cases = [ + // Videos + TestCase { + url: "yt:video/UHxw-L2WyyY", + expected: Source::YouTubeVideo, + }, + TestCase { + url: "youtube:video/UHxw-L2WyyY", + expected: Source::YouTubeVideo, + }, + TestCase { + url: "yt:short/EtC99eWiwRI", + expected: Source::YouTubeVideo, + }, + TestCase { + url: "yt:shorts/EtC99eWiwRI", + expected: Source::YouTubeVideo, + }, + TestCase { + url: "youtube:shorts/EtC99eWiwRI", + expected: Source::YouTubeVideo, + }, + // Playlists + TestCase { + url: "yt:playlist/PL9vTTBa7QaQOoMfpP3ztvgyQkPWDPfJez", + expected: Source::YouTubePlaylist, + }, + TestCase { + url: "youtube:playlist/PL9vTTBa7QaQOoMfpP3ztvgyQkPWDPfJez", + expected: Source::YouTubePlaylist, + }, + // Channels + TestCase { + url: "yt:channel/UCxyz123", + expected: Source::YouTubeChannel, + }, + TestCase { + url: "yt:c/YouTubeCreators", + expected: Source::YouTubeChannel, + }, + TestCase { + url: "yt:user/pewdiepie", + expected: Source::YouTubeChannel, + }, + TestCase { + url: "youtube:@CoreDumpped", + expected: Source::YouTubeChannel, + }, + ]; + + for case in &shorthand_cases { + assert_eq!( + determine_source(case.url), + case.expected, + "Failed for URL: {}", + case.url + ); + } + } + + #[test] + fn test_non_youtube_sources() { + let other_cases = [ + TestCase { + url: "file:///local/path/file.mp4", + expected: Source::Local, + }, + TestCase { + url: "https://x.com/some_post", + expected: Source::X, + }, + TestCase { + url: "https://example.com/", + expected: Source::Other, + }, + ]; + + for case in &other_cases { + assert_eq!( + determine_source(case.url), + case.expected, + "Failed for URL: {}", + case.url + ); + } + } +} From b3677d627c684ff0ae8d82e472bdf7098d5948cd Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Wed, 21 Jan 2026 20:30:15 +0100 Subject: [PATCH 6/9] docs: update README milestones Mark YouTube videos, Twitter videos, and local files as done. --- README.md | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index f59bfab..12c6af6 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,26 @@ # archivr -An open-source self-hosted archiving solution. Work in progress. +An open-source self-hosted archiving tool. Work in progress. ## Milestones - [ ] Archiving - [ ] Archiving media files from social media platforms - - [ ] YouTube - - [ ] Twitter + - [X] YouTube Videos + - [X] Twitter Videos - [ ] Instagram - [ ] Facebook - [ ] TikTok - [ ] Reddit - [ ] Snapchat + - [ ] YouTube Posts (?) - (Some of these could be postponed for later.) - - [ ] Archiving local files - - [ ] Archive videos (MP4, WebM) - - [ ] Archive audio files (MP3, WAV) - - [ ] Archive documents (DOCX, XLSX, PPTX) - - [ ] Archive PDFs - - [ ] Archive images (JPEG, PNG, GIF) + - [X] Archiving local files - [ ] Archiving files from cloud storage services (Google Drive, Dropbox, OneDrive) and from URLs - [ ] URLs - [ ] Google Drive - [ ] Dropbox - [ ] OneDrive + - [ ] Archiving Twitter threads - [ ] Archive web pages (HTML, CSS, JS, images) - [ ] Archiving emails (???) - [ ] Gmail From 76fce7f91e34979cb326ee3af282df4bc4ba78d7 Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Wed, 21 Jan 2026 20:30:23 +0100 Subject: [PATCH 7/9] chore: update flake.lock --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index fd86e53..d406848 100644 --- a/flake.lock +++ b/flake.lock @@ -2,11 +2,11 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1760284886, - "narHash": "sha256-TK9Kr0BYBQ/1P5kAsnNQhmWWKgmZXwUQr4ZMjCzWf2c=", + "lastModified": 1761672384, + "narHash": "sha256-o9KF3DJL7g7iYMZq9SWgfS1BFlNbsm6xplRjVlOCkXI=", "owner": "nixos", "repo": "nixpkgs", - "rev": "cf3f5c4def3c7b5f1fc012b3d839575dbe552d43", + "rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c", "type": "github" }, "original": { From 20d85146961d6108acbf6a0785ce13f8aa8abc07 Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Wed, 21 Jan 2026 20:42:00 +0100 Subject: [PATCH 8/9] feat: add shorthand schemes for X/Twitter media --- src/main.rs | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/src/main.rs b/src/main.rs index f2e6de1..240ea55 100644 --- a/src/main.rs +++ b/src/main.rs @@ -82,13 +82,10 @@ fn determine_source(path: &str) -> Source { // TEST: X posts can have multiple videos. // Shorthand schemes: yt: or youtube: - if path.starts_with("yt:") || path.starts_with("youtube:") { - let after_scheme = if path.starts_with("yt:") { - &path[3..] - } else { - &path[8..] - }; - + if let Some(after_scheme) = path + .strip_prefix("yt:") + .or_else(|| path.strip_prefix("youtube:")) + { // video/ID, short/ID, shorts/ID if after_scheme.starts_with("video/") || after_scheme.starts_with("short/") @@ -112,6 +109,11 @@ fn determine_source(path: &str) -> Source { } } + // Shorthand schemes: x: or twitter: + if path.starts_with("x:") || path.starts_with("twitter:") { + return Source::X; + } + if path.starts_with("file://") { return Source::Local; } else if path.starts_with("http://") || path.starts_with("https://") { @@ -498,6 +500,33 @@ mod tests { } } + #[test] + fn test_x_sources() { + let x_cases = [ + TestCase { + url: "https://x.com/some_post", + expected: Source::X, + }, + TestCase { + url: "x:1234567890", + expected: Source::X, + }, + TestCase { + url: "twitter:1234567890", + expected: Source::X, + }, + ]; + + for case in &x_cases { + assert_eq!( + determine_source(case.url), + case.expected, + "Failed for URL: {}", + case.url + ); + } + } + #[test] fn test_non_youtube_sources() { let other_cases = [ @@ -505,10 +534,6 @@ mod tests { url: "file:///local/path/file.mp4", expected: Source::Local, }, - TestCase { - url: "https://x.com/some_post", - expected: Source::X, - }, TestCase { url: "https://example.com/", expected: Source::Other, From df05687ad97a5c7ed5bb49bdcad32861e638d1f1 Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Wed, 21 Jan 2026 20:47:46 +0100 Subject: [PATCH 9/9] chore: move docs into docs dir --- .gitignore | 4 ++-- LICENSE.md => docs/LICENSE.md | 0 README.md => docs/README.md | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename LICENSE.md => docs/LICENSE.md (100%) rename README.md => docs/README.md (100%) diff --git a/.gitignore b/.gitignore index 75ade1b..c8ea956 100644 --- a/.gitignore +++ b/.gitignore @@ -2,8 +2,8 @@ !.gitignore -LICENSE.md -README.md +!docs +!docs/** !src !src/** diff --git a/LICENSE.md b/docs/LICENSE.md similarity index 100% rename from LICENSE.md rename to docs/LICENSE.md diff --git a/README.md b/docs/README.md similarity index 100% rename from README.md rename to docs/README.md