1
Fork 0
mirror of https://github.com/thegeneralist01/archivr synced 2026-05-30 08:36:47 +02:00

Add social platform source detection and update milestones

This commit is contained in:
TheGeneralist 2026-03-31 11:54:33 +02:00
parent b9f45d148b
commit c988bea4aa
2 changed files with 116 additions and 7 deletions

View file

@ -7,11 +7,11 @@ An open-source self-hosted archiving tool. Work in progress.
- [ ] Archiving media files from social media platforms - [ ] Archiving media files from social media platforms
- [X] YouTube Videos - [X] YouTube Videos
- [X] Twitter Videos - [X] Twitter Videos
- [ ] Instagram - [X] Instagram
- [ ] Facebook - [X] Facebook
- [ ] TikTok - [X] TikTok
- [ ] Reddit - [X] Reddit
- [ ] Snapchat - [X] Snapchat
- [ ] YouTube Posts (?) - [ ] YouTube Posts (?)
- (Some of these could be postponed for later.) - (Some of these could be postponed for later.)
- [X] Archiving local files - [X] Archiving local files

View file

@ -70,6 +70,11 @@ enum Source {
YouTubePlaylist, YouTubePlaylist,
YouTubeChannel, YouTubeChannel,
X, X,
Instagram,
Facebook,
TikTok,
Reddit,
Snapchat,
Local, Local,
Other, Other,
} }
@ -114,6 +119,23 @@ fn determine_source(path: &str) -> Source {
return Source::X; return Source::X;
} }
// Shorthand schemes for other yt-dlp extractors
if path.starts_with("instagram:") {
return Source::Instagram;
}
if path.starts_with("facebook:") {
return Source::Facebook;
}
if path.starts_with("tiktok:") {
return Source::TikTok;
}
if path.starts_with("reddit:") {
return Source::Reddit;
}
if path.starts_with("snapchat:") {
return Source::Snapchat;
}
if path.starts_with("file://") { if path.starts_with("file://") {
return Source::Local; return Source::Local;
} else if path.starts_with("http://") || path.starts_with("https://") { } else if path.starts_with("http://") || path.starts_with("https://") {
@ -140,6 +162,26 @@ fn determine_source(path: &str) -> Source {
if path.starts_with("https://x.com/") { if path.starts_with("https://x.com/") {
return Source::X; return Source::X;
} }
if path.contains("instagram.com/") {
return Source::Instagram;
}
if path.contains("facebook.com/") || path.contains("fb.watch/") {
return Source::Facebook;
}
if path.contains("tiktok.com/") {
return Source::TikTok;
}
if path.contains("reddit.com/") || path.contains("redd.it/") {
return Source::Reddit;
}
if path.contains("snapchat.com/") {
return Source::Snapchat;
}
} }
Source::Other Source::Other
} }
@ -224,7 +266,13 @@ fn main() -> Result<()> {
}; };
let hash = match source { let hash = match source {
Source::YouTubeVideo | Source::X => { Source::YouTubeVideo
| Source::X
| Source::Instagram
| Source::Facebook
| Source::TikTok
| Source::Reddit
| Source::Snapchat => {
match downloader::ytdlp::download(path.clone(), &store_path, &timestamp) { match downloader::ytdlp::download(path.clone(), &store_path, &timestamp) {
Ok(h) => h, Ok(h) => h,
Err(e) => { Err(e) => {
@ -246,7 +294,13 @@ fn main() -> Result<()> {
}; };
let file_extension = match source { let file_extension = match source {
Source::YouTubeVideo | Source::X => ".mp4", Source::YouTubeVideo
| Source::X
| Source::Instagram
| Source::Facebook
| Source::TikTok
| Source::Reddit
| Source::Snapchat => ".mp4",
Source::Local => { Source::Local => {
let p = Path::new(path.trim_start_matches("file://")); let p = Path::new(path.trim_start_matches("file://"));
&p.extension() &p.extension()
@ -522,6 +576,61 @@ mod tests {
} }
} }
#[test]
fn test_other_social_sources() {
let social_cases = [
TestCase {
url: "https://www.instagram.com/reel/ABC123/",
expected: Source::Instagram,
},
TestCase {
url: "instagram:reel/ABC123",
expected: Source::Instagram,
},
TestCase {
url: "https://www.facebook.com/watch/?v=123456",
expected: Source::Facebook,
},
TestCase {
url: "facebook:watch?v=123456",
expected: Source::Facebook,
},
TestCase {
url: "https://www.tiktok.com/@someone/video/123456789",
expected: Source::TikTok,
},
TestCase {
url: "tiktok:@someone/video/123456789",
expected: Source::TikTok,
},
TestCase {
url: "https://www.reddit.com/r/videos/comments/abc123/example/",
expected: Source::Reddit,
},
TestCase {
url: "reddit:r/videos/comments/abc123/example",
expected: Source::Reddit,
},
TestCase {
url: "https://www.snapchat.com/discover/some-story/1234567890",
expected: Source::Snapchat,
},
TestCase {
url: "snapchat:discover/some-story/1234567890",
expected: Source::Snapchat,
},
];
for case in &social_cases {
assert_eq!(
determine_source(case.url),
case.expected,
"Failed for URL: {}",
case.url
);
}
}
#[test] #[test]
fn test_non_youtube_sources() { fn test_non_youtube_sources() {
let other_cases = [ let other_cases = [