1
Fork 0
mirror of https://github.com/thegeneralist01/archivr synced 2026-05-30 08:36:47 +02:00

Compare commits

..

No commits in common. "9441a9d9fbada8dc95eb4292a38570ec56081a04" and "553cca99ca1b45e50a840eab90bf3f5234356f20" have entirely different histories.

12 changed files with 74 additions and 616 deletions

4
.gitignore vendored
View file

@ -1,9 +1,7 @@
* *
!.gitignore !.gitignore
!*.md
!docs
!docs/**
!src !src
!src/** !src/**

45
Cargo.lock generated
View file

@ -2,15 +2,6 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 4 version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "android_system_properties" name = "android_system_properties"
version = "0.1.5" version = "0.1.5"
@ -84,7 +75,6 @@ dependencies = [
"chrono", "chrono",
"clap", "clap",
"hex", "hex",
"regex",
"sha3", "sha3",
"uuid", "uuid",
] ]
@ -321,12 +311,6 @@ version = "0.4.28"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.19" version = "0.2.19"
@ -372,35 +356,6 @@ version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]] [[package]]
name = "rustversion" name = "rustversion"
version = "1.0.22" version = "1.0.22"

View file

@ -8,6 +8,5 @@ anyhow = "1.0.100"
chrono = "0.4.42" chrono = "0.4.42"
clap = { version = "4.5.48", features = ["derive"] } clap = { version = "4.5.48", features = ["derive"] }
hex = "0.4.3" hex = "0.4.3"
regex = "1.12.2"
sha3 = "0.10.8" sha3 = "0.10.8"
uuid = { version = "1.18.1", features = ["v4"] } uuid = { version = "1.18.1", features = ["v4"] }

View file

@ -1,26 +1,29 @@
# archivr # archivr
An open-source self-hosted archiving tool. Work in progress. An open-source self-hosted archiving solution. Work in progress.
## Milestones ## Milestones
- [ ] Archiving - [ ] Archiving
- [X] Archiving media files from social media platforms - [ ] Archiving media files from social media platforms
- [X] YouTube Videos - [ ] YouTube
- [X] Twitter Videos - [ ] Twitter
- [X] Instagram - [ ] Instagram
- [X] Facebook - [ ] Facebook
- [X] TikTok - [ ] TikTok
- [X] Reddit - [ ] Reddit
- [X] Snapchat - [ ] Snapchat
- [ ] YouTube Posts (postponed) - (Some of these could be postponed for later.)
- [X] Archiving local files - [ ] Archiving local files
- [ ] Archive videos (MP4, WebM)
- [ ] Archive audio files (MP3, WAV)
- [ ] Archive documents (DOCX, XLSX, PPTX)
- [ ] Archive PDFs
- [ ] Archive images (JPEG, PNG, GIF)
- [ ] Archiving files from cloud storage services (Google Drive, Dropbox, OneDrive) and from URLs - [ ] Archiving files from cloud storage services (Google Drive, Dropbox, OneDrive) and from URLs
- [ ] URLs - [ ] URLs
- [ ] Google Drive - [ ] Google Drive
- [ ] Dropbox - [ ] Dropbox
- [ ] OneDrive - [ ] OneDrive
- (Some of these could be postponed for later.)
- [ ] Archiving Twitter threads
- [ ] Archive web pages (HTML, CSS, JS, images) - [ ] Archive web pages (HTML, CSS, JS, images)
- [ ] Archiving emails (???) - [ ] Archiving emails (???)
- [ ] Gmail - [ ] Gmail

6
flake.lock generated
View file

@ -2,11 +2,11 @@
"nodes": { "nodes": {
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1761672384, "lastModified": 1760284886,
"narHash": "sha256-o9KF3DJL7g7iYMZq9SWgfS1BFlNbsm6xplRjVlOCkXI=", "narHash": "sha256-TK9Kr0BYBQ/1P5kAsnNQhmWWKgmZXwUQr4ZMjCzWf2c=",
"owner": "nixos", "owner": "nixos",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "08dacfca559e1d7da38f3cf05f1f45ee9bfd213c", "rev": "cf3f5c4def3c7b5f1fc012b3d839575dbe552d43",
"type": "github" "type": "github"
}, },
"original": { "original": {

View file

@ -1,18 +1,6 @@
{ {
description = "Archivr - An open-source archive manager"; description = "Archivr - An open-source archive manager";
nixConfig = {
extra-substituters = [
"https://cache.thegeneralist01.com/"
"https://cache.garnix.io/"
"https://cache.nixos.org/"
];
extra-trusted-public-keys = [
"cache.thegeneralist01.com:jkKcenR877r7fQuWq6cr0JKv2piqBWmYLAYsYsSJnT4="
"cache.garnix.io:CTFPyKSLcx5RMJKfLo5EEPUObbA78b0YQ2DTCJXqr9g="
];
};
inputs.nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; inputs.nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable";
outputs = outputs =
@ -33,7 +21,7 @@
pname = "archivr"; pname = "archivr";
version = "0.1.0"; version = "0.1.0";
src = pkgs.lib.cleanSource ./.; src = pkgs.lib.cleanSource ./.;
cargoHash = "sha256-4m+4SMYA/rJ0eHEOc32zA2VdZI1pqzB5NenD0R0f2zM="; cargoHash = "sha256-y47+Fmp3BID86aPnLtrvzg40lOr9cHyg/38+onisK7w=";
nativeBuildInputs = [ pkgs.pkg-config ]; nativeBuildInputs = [ pkgs.pkg-config ];
}; };
archivr = pkgs.stdenv.mkDerivation { archivr = pkgs.stdenv.mkDerivation {
@ -61,7 +49,6 @@
}; };
in in
{ {
default = archivr;
archivr = archivr; archivr = archivr;
archivr-unwrapped = archivr_unwrapped; archivr-unwrapped = archivr_unwrapped;
} }

View file

@ -1,28 +0,0 @@
use anyhow::{Context, Result, bail};
use std::{path::Path, process::Command};
use crate::hash::hash_file;
pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<String> {
println!("Saving path: {path}");
let temp_dir = store_path.join("temp").join(timestamp);
std::fs::create_dir_all(&temp_dir)?;
let in_file = Path::new(path.trim_start_matches("file://"));
let extension = in_file
.extension()
.map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
let out_file = temp_dir.join(format!("{timestamp}{extension}"));
let mut binding = Command::new("cp");
let cmd = binding.arg(in_file).arg(&out_file);
let out = cmd.output().with_context(|| "failed to spawn cp process")?;
if !out.status.success() {
let stderr = String::from_utf8_lossy(&out.stderr);
bail!("yt-dlp failed: {stderr}");
}
hash_file(&out_file)
}

View file

@ -1,2 +1 @@
pub mod local; pub mod youtube;
pub mod ytdlp;

View file

@ -4,13 +4,12 @@ use std::{env, path::Path, process::Command};
use crate::hash::hash_file; use crate::hash::hash_file;
pub fn download(path: String, store_path: &Path, timestamp: &String) -> Result<String> { pub fn download(path: String, store_path: &Path, timestamp: &String) -> Result<String> {
println!("Downloading with yt-dlp: {path}"); println!("Downloading from YouTube: {path}");
let ytdlp = env::var("ARCHIVR_YT_DLP").unwrap_or_else(|_| "yt-dlp".to_string()); let ytdlp = env::var("ARCHIVR_YT_DLP").unwrap_or_else(|_| "yt-dlp".to_string());
let temp_dir = store_path.join("temp").join(timestamp); let temp_dir = store_path.join("temp");
std::fs::create_dir_all(&temp_dir)?; std::fs::create_dir_all(&temp_dir)?;
let out_file = temp_dir.join(format!("{timestamp}.mp4")); let out_file = temp_dir.join(format!("{timestamp}.mp4"));
let out = Command::new(&ytdlp) let out = Command::new(&ytdlp)

View file

@ -1,6 +1,6 @@
use anyhow::Result;
use sha3::{Digest, Sha3_256}; use sha3::{Digest, Sha3_256};
use std::{fs::File, io::Read, path::Path}; use std::{fs::File, io::Read, path::Path};
use anyhow::Result;
pub fn hash_file(path: &Path) -> Result<String> { pub fn hash_file(path: &Path) -> Result<String> {
let mut file = File::open(path)?; let mut file = File::open(path)?;

View file

@ -32,8 +32,6 @@ enum Command {
/// Store path - path to store the archived files in. /// Store path - path to store the archived files in.
/// Structure will be: /// Structure will be:
/// store_path/ /// store_path/
/// temp/
/// ...
/// raw/ /// raw/
/// ... /// ...
/// structured/ /// structured/
@ -44,10 +42,6 @@ enum Command {
/// Name of the archive /// Name of the archive
#[arg(short, long)] #[arg(short, long)]
name: String, name: String,
/// Wipe existing .archivr repository data
#[arg(long = "force-with-info-removal")]
force_with_info_removal: bool,
}, },
} }
@ -64,148 +58,15 @@ fn get_archive_path() -> Option<PathBuf> {
None None
} }
#[derive(Debug, PartialEq)] #[derive(Debug)]
enum Source { enum Source {
YouTubeVideo, YouTube,
YouTubePlaylist,
YouTubeChannel,
X,
Instagram,
Facebook,
TikTok,
Reddit,
Snapchat,
Local,
Other, Other,
} }
// INFO: yt-dlp supports a lot of sites; so, when archiving (for example) a website, the user
// -> should be asked whether they want to archive the whole website or just the video(s) on it.
fn determine_source(path: &str) -> Source { fn determine_source(path: &str) -> Source {
// INFO: Extractor URLs can be found here: if path.starts_with("http://") || path.starts_with("https://") {
// -> https://github.com/yt-dlp/yt-dlp/tree/dfc0a84c192a7357dd1768cc345d590253a14fe5/yt_dlp/extractor return Source::YouTube;
// TEST: X posts can have multiple videos.
// Shorthand schemes: yt: or youtube:
if let Some(after_scheme) = path
.strip_prefix("yt:")
.or_else(|| path.strip_prefix("youtube:"))
{
// video/ID, short/ID, shorts/ID
if after_scheme.starts_with("video/")
|| after_scheme.starts_with("short/")
|| after_scheme.starts_with("shorts/")
{
return Source::YouTubeVideo;
}
// playlist/ID
if after_scheme.starts_with("playlist/") {
return Source::YouTubePlaylist;
}
// channel/ID, c/ID, user/ID, @handle
if after_scheme.starts_with("channel/")
|| after_scheme.starts_with("c/")
|| after_scheme.starts_with("user/")
|| after_scheme.starts_with("@")
{
return Source::YouTubeChannel;
}
}
// Shorthand schemes: x: or twitter:
if path.starts_with("x:") || path.starts_with("twitter:") {
return Source::X;
}
// Shorthand schemes for other yt-dlp extractors
if path.starts_with("instagram:") {
return Source::Instagram;
}
if path.starts_with("facebook:") {
return Source::Facebook;
}
if path.starts_with("tiktok:") {
return Source::TikTok;
}
if path.starts_with("reddit:") {
return Source::Reddit;
}
if path.starts_with("snapchat:") {
return Source::Snapchat;
}
if path.starts_with("file://") {
return Source::Local;
} else if path.starts_with("http://") || path.starts_with("https://") {
// Video URLs (watch, youtu.be, shorts)
let video_re = regex::Regex::new(r"^https?://(?:www\.)?(?:youtu\.be/[0-9A-Za-z_-]+|youtube\.com/watch\?v=[0-9A-Za-z_-]+|youtube\.com/shorts/[0-9A-Za-z_-]+)").unwrap();
if video_re.is_match(path) {
return Source::YouTubeVideo;
}
// Playlist URLs
let playlist_re =
regex::Regex::new(r"^https?://(?:www\.)?youtube\.com/playlist\?list=[0-9A-Za-z_-]+")
.unwrap();
if playlist_re.is_match(path) {
return Source::YouTubePlaylist;
}
// Channel or user URLs (channel IDs, /c/, /user/, or @handles)
let channel_re = regex::Regex::new(r"^https?://(?:www\.)?youtube\.com/(?:channel/[0-9A-Za-z_-]+|c/[0-9A-Za-z_-]+|user/[0-9A-Za-z_-]+|@[0-9A-Za-z_-]+)").unwrap();
if channel_re.is_match(path) {
return Source::YouTubeChannel;
}
if path.starts_with("https://x.com/") {
return Source::X;
}
if path.starts_with("https://instagram.com/")
|| path.starts_with("https://www.instagram.com/")
|| path.starts_with("http://instagram.com/")
|| path.starts_with("http://www.instagram.com/")
{
return Source::Instagram;
}
if path.starts_with("https://facebook.com/")
|| path.starts_with("https://www.facebook.com/")
|| path.starts_with("http://facebook.com/")
|| path.starts_with("http://www.facebook.com/")
|| path.starts_with("https://fb.watch/")
|| path.starts_with("http://fb.watch/")
{
return Source::Facebook;
}
if path.starts_with("https://tiktok.com/")
|| path.starts_with("https://www.tiktok.com/")
|| path.starts_with("http://tiktok.com/")
|| path.starts_with("http://www.tiktok.com/")
{
return Source::TikTok;
}
if path.starts_with("https://reddit.com/")
|| path.starts_with("https://www.reddit.com/")
|| path.starts_with("http://reddit.com/")
|| path.starts_with("http://www.reddit.com/")
|| path.starts_with("https://redd.it/")
|| path.starts_with("http://redd.it/")
{
return Source::Reddit;
}
if path.starts_with("https://snapchat.com/")
|| path.starts_with("https://www.snapchat.com/")
|| path.starts_with("http://snapchat.com/")
|| path.starts_with("http://www.snapchat.com/")
{
return Source::Snapchat;
}
} }
Source::Other Source::Other
} }
@ -275,99 +136,54 @@ fn main() -> Result<()> {
let timestamp = Local::now().format("%Y-%m-%dT%H-%M-%S%.3f").to_string(); let timestamp = Local::now().format("%Y-%m-%dT%H-%M-%S%.3f").to_string();
let source = determine_source(path); let source = determine_source(path);
if let Source::Other = source { if let Source::YouTube = source {
eprintln!("Archiving from this source is not yet implemented."); let store_path_string_file = archive_path.unwrap().join("store_path");
process::exit(1); let store_path = match fs::read_to_string(store_path_string_file) {
} Ok(p) => PathBuf::from(p.trim()),
Err(e) => {
eprintln!("Failed to read store path: {e}");
process::exit(1);
}
};
let store_path_string_file = archive_path.unwrap().join("store_path"); let hash =
let store_path = match fs::read_to_string(store_path_string_file) { match downloader::youtube::download(path.clone(), &store_path, &timestamp) {
Ok(p) => PathBuf::from(p.trim()),
Err(e) => {
eprintln!("Failed to read store path: {e}");
process::exit(1);
}
};
let hash = match source {
Source::YouTubeVideo
| Source::X
| Source::Instagram
| Source::Facebook
| Source::TikTok
| Source::Reddit
| Source::Snapchat => {
match downloader::ytdlp::download(path.clone(), &store_path, &timestamp) {
Ok(h) => h, Ok(h) => h,
Err(e) => { Err(e) => {
eprintln!("Failed to download from YouTube: {e}"); eprintln!("Failed to download from YouTube: {e}");
process::exit(1); process::exit(1);
} }
} };
let hash_exists = hash_exists(format!("{hash}.mp4"), &store_path);
// TODO: check for repeated archives?
// There could be one of the following:
// - We are literally archiving the same path over again.
// - We are archiving a different path, which had this file. E.g.: we archived a
// website before which had this YouTube video, and while recursively archiving
// everything, we also archived the YouTube video although it wasn't our main
// target. This means that we should archive again; whereas with the first case...
// Not sure. Need to think about this.
// ----
// Thinking about it a day later...
// If we are specifically archiving a YouTube video, it could also be two of the
// above. So yeah, just create a new DB entry and symlink the Raw to the Structured
// Dir or whatever. it's midnight and my brain ain't wording/braining.
if hash_exists {
println!("File already archived.");
process::exit(0);
} else {
move_temp_to_raw(
&store_path.join("temp").join(format!("{timestamp}.mp4")),
&hash,
&store_path,
)?;
println!("File archived successfully.");
} }
Source::Local => {
match downloader::local::save(path.clone(), &store_path, &timestamp) {
Ok(h) => h,
Err(e) => {
eprintln!("Failed to archive local file: {e}");
process::exit(1);
}
}
}
_ => unreachable!(),
};
let file_extension = match source {
Source::YouTubeVideo
| Source::X
| Source::Instagram
| Source::Facebook
| Source::TikTok
| Source::Reddit
| Source::Snapchat => ".mp4",
Source::Local => {
let p = Path::new(path.trim_start_matches("file://"));
&p.extension()
.map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()))
}
_ => "",
};
let hash_exists = hash_exists(format!("{hash}{file_extension}"), &store_path);
// TODO: check for repeated archives?
// There could be one of the following:
// - We are literally archiving the same path over again.
// - We are archiving a different path, which had this file. E.g.: we archived a
// website before which had this YouTube video, and while recursively archiving
// everything, we also archived the YouTube video although it wasn't our main
// target. This means that we should archive again; whereas with the first case...
// Not sure. Need to think about this.
// ----
// Thinking about it a day later...
// If we are specifically archiving a YouTube video, it could also be two of the
// above. So yeah, just create a new DB entry and symlink the Raw to the Structured
// Dir or whatever. it's midnight and my brain ain't wording/braining.
if hash_exists {
println!("File already archived.");
let _ = fs::remove_dir_all(store_path.join("temp").join(&timestamp));
} else {
move_temp_to_raw(
&store_path
.join("temp")
.join(&timestamp)
.join(format!("{timestamp}{file_extension}")),
&hash,
&store_path,
)?;
let _ = fs::remove_dir_all(store_path.join("temp").join(&timestamp));
println!("File archived successfully.");
} }
// TODO: DB INSERT, inserting a record // TODO: DB INSERT, inserting a record
// https://github.com/rusqlite/rusqlite
// Think of the DB schema
Ok(()) Ok(())
} }
@ -376,7 +192,6 @@ fn main() -> Result<()> {
path: ref archive_path_string, path: ref archive_path_string,
store_path: ref store_path_string, store_path: ref store_path_string,
name: ref archive_name, name: ref archive_name,
force_with_info_removal,
} => { } => {
let archive_path = Path::new(&archive_path_string).join(".archivr"); let archive_path = Path::new(&archive_path_string).join(".archivr");
let store_path = if Path::new(&store_path_string).is_relative() { let store_path = if Path::new(&store_path_string).is_relative() {
@ -386,26 +201,16 @@ fn main() -> Result<()> {
}; };
if archive_path.exists() { if archive_path.exists() {
if !archive_path.is_dir() { // TODO: check if there is nothing inside. if there is nothing inside, use it
eprintln!( eprintln!("Archive already exists at {}", archive_path.display());
"Archive path exists and is not a directory: {}", if store_path.exists() {
archive_path.display() eprintln!("Store path already exists at {}", store_path.display());
);
process::exit(1);
}
if force_with_info_removal {
fs::remove_dir_all(&archive_path)?;
} else if fs::read_dir(&archive_path)?.next().is_some() {
eprintln!(
"Archive already exists at {} and is not empty. Use --force-with-info-removal to reinitialize.",
archive_path.display()
);
process::exit(1); process::exit(1);
} }
process::exit(1);
} }
if store_path.exists() {
if store_path.exists() && !force_with_info_removal { // TODO: check if the structure is correct. If so, use it.
eprintln!("Store path already exists at {}", store_path.display()); eprintln!("Store path already exists at {}", store_path.display());
process::exit(1); process::exit(1);
} }
@ -427,262 +232,3 @@ fn main() -> Result<()> {
} // _ => eprintln!("Unknown command: {:?}", args.command), } // _ => eprintln!("Unknown command: {:?}", args.command),
} }
} }
#[cfg(test)]
mod tests {
use super::*;
struct TestCase<'a> {
url: &'a str,
expected: Source,
}
#[test]
fn test_youtube_sources() {
// --- YouTube Video URLs ---
let video_cases = [
TestCase {
url: "https://www.youtube.com/watch?v=UHxw-L2WyyY",
expected: Source::YouTubeVideo,
},
TestCase {
url: "https://youtu.be/UHxw-L2WyyY",
expected: Source::YouTubeVideo,
},
TestCase {
url: "https://www.youtube.com/shorts/EtC99eWiwRI",
expected: Source::YouTubeVideo,
},
];
for case in &video_cases {
assert_eq!(
determine_source(case.url),
case.expected,
"Failed for URL: {}",
case.url
);
}
// --- YouTube Playlist URLs ---
let playlist_cases = [TestCase {
url: "https://www.youtube.com/playlist?list=PL9vTTBa7QaQOoMfpP3ztvgyQkPWDPfJez",
expected: Source::YouTubePlaylist,
}];
for case in &playlist_cases {
assert_eq!(
determine_source(case.url),
case.expected,
"Failed for URL: {}",
case.url
);
}
// --- YouTube Channel URLs ---
let channel_cases = [
TestCase {
url: "https://www.youtube.com/channel/CoreDumpped",
expected: Source::YouTubeChannel,
},
TestCase {
url: "https://www.youtube.com/@CoreDumpped",
expected: Source::YouTubeChannel,
},
TestCase {
url: "https://www.youtube.com/c/YouTubeCreators",
expected: Source::YouTubeChannel,
},
TestCase {
url: "https://www.youtube.com/user/pewdiepie",
expected: Source::YouTubeChannel,
},
TestCase {
url: "https://youtube.com/@pewdiepie?si=KOcLN_KPYNpe5f_8",
expected: Source::YouTubeChannel,
},
];
for case in &channel_cases {
assert_eq!(
determine_source(case.url),
case.expected,
"Failed for URL: {}",
case.url
);
}
// --- Shorthand scheme URLs ---
let shorthand_cases = [
// Videos
TestCase {
url: "yt:video/UHxw-L2WyyY",
expected: Source::YouTubeVideo,
},
TestCase {
url: "youtube:video/UHxw-L2WyyY",
expected: Source::YouTubeVideo,
},
TestCase {
url: "yt:short/EtC99eWiwRI",
expected: Source::YouTubeVideo,
},
TestCase {
url: "yt:shorts/EtC99eWiwRI",
expected: Source::YouTubeVideo,
},
TestCase {
url: "youtube:shorts/EtC99eWiwRI",
expected: Source::YouTubeVideo,
},
// Playlists
TestCase {
url: "yt:playlist/PL9vTTBa7QaQOoMfpP3ztvgyQkPWDPfJez",
expected: Source::YouTubePlaylist,
},
TestCase {
url: "youtube:playlist/PL9vTTBa7QaQOoMfpP3ztvgyQkPWDPfJez",
expected: Source::YouTubePlaylist,
},
// Channels
TestCase {
url: "yt:channel/UCxyz123",
expected: Source::YouTubeChannel,
},
TestCase {
url: "yt:c/YouTubeCreators",
expected: Source::YouTubeChannel,
},
TestCase {
url: "yt:user/pewdiepie",
expected: Source::YouTubeChannel,
},
TestCase {
url: "youtube:@CoreDumpped",
expected: Source::YouTubeChannel,
},
];
for case in &shorthand_cases {
assert_eq!(
determine_source(case.url),
case.expected,
"Failed for URL: {}",
case.url
);
}
}
#[test]
fn test_x_sources() {
let x_cases = [
TestCase {
url: "https://x.com/some_post",
expected: Source::X,
},
TestCase {
url: "x:1234567890",
expected: Source::X,
},
TestCase {
url: "twitter:1234567890",
expected: Source::X,
},
];
for case in &x_cases {
assert_eq!(
determine_source(case.url),
case.expected,
"Failed for URL: {}",
case.url
);
}
}
#[test]
fn test_other_social_sources() {
let social_cases = [
TestCase {
url: "https://www.instagram.com/reel/ABC123/",
expected: Source::Instagram,
},
TestCase {
url: "instagram:reel/ABC123",
expected: Source::Instagram,
},
TestCase {
url: "https://www.facebook.com/watch/?v=123456",
expected: Source::Facebook,
},
TestCase {
url: "facebook:watch?v=123456",
expected: Source::Facebook,
},
TestCase {
url: "https://www.tiktok.com/@someone/video/123456789",
expected: Source::TikTok,
},
TestCase {
url: "tiktok:@someone/video/123456789",
expected: Source::TikTok,
},
TestCase {
url: "https://www.reddit.com/r/videos/comments/abc123/example/",
expected: Source::Reddit,
},
TestCase {
url: "reddit:r/videos/comments/abc123/example",
expected: Source::Reddit,
},
TestCase {
url: "https://www.snapchat.com/discover/some-story/1234567890",
expected: Source::Snapchat,
},
TestCase {
url: "snapchat:discover/some-story/1234567890",
expected: Source::Snapchat,
},
];
for case in &social_cases {
assert_eq!(
determine_source(case.url),
case.expected,
"Failed for URL: {}",
case.url
);
}
}
#[test]
fn test_non_youtube_sources() {
let other_cases = [
TestCase {
url: "file:///local/path/file.mp4",
expected: Source::Local,
},
TestCase {
url: "https://example.com/",
expected: Source::Other,
},
TestCase {
url: "https://example.com/?redirect=instagram.com/reel/ABC123",
expected: Source::Other,
},
TestCase {
url: "https://notfacebook.com/watch?v=123456",
expected: Source::Other,
},
];
for case in &other_cases {
assert_eq!(
determine_source(case.url),
case.expected,
"Failed for URL: {}",
case.url
);
}
}
}