mirror of
https://github.com/thegeneralist01/archivr
synced 2026-05-30 08:36:47 +02:00
Implement social shorthand URL expansion and tweet alias parsing
This commit is contained in:
parent
63897f6a63
commit
423883d96f
2 changed files with 82 additions and 28 deletions
|
|
@ -15,13 +15,14 @@ An open-source self-hosted archiving tool. Work in progress.
|
||||||
- [x] Snapchat
|
- [x] Snapchat
|
||||||
- [ ] YouTube Posts (postponed)
|
- [ ] YouTube Posts (postponed)
|
||||||
- [x] Archiving local files
|
- [x] Archiving local files
|
||||||
|
- [x] Archiving Twitter Tweets & Threads
|
||||||
- [ ] Archiving files from cloud storage services (Google Drive, Dropbox, OneDrive) and from URLs
|
- [ ] Archiving files from cloud storage services (Google Drive, Dropbox, OneDrive) and from URLs
|
||||||
- [ ] URLs
|
- [ ] URLs
|
||||||
- [ ] Google Drive
|
- [ ] Google Drive
|
||||||
- [ ] Dropbox
|
- [ ] Dropbox
|
||||||
- [ ] OneDrive
|
- [ ] OneDrive
|
||||||
- (Some of these could be postponed for later.)
|
- (Some of these could be postponed for later.)
|
||||||
- [x] Archiving Twitter threads
|
- [ ] Archiving Twitter articles
|
||||||
- [ ] Archive web pages (HTML, CSS, JS, images)
|
- [ ] Archive web pages (HTML, CSS, JS, images)
|
||||||
- [ ] Archiving emails (???)
|
- [ ] Archiving emails (???)
|
||||||
- [ ] Gmail
|
- [ ] Gmail
|
||||||
|
|
|
||||||
107
src/main.rs
107
src/main.rs
|
|
@ -95,17 +95,36 @@ fn parse_tweet_id(id: &str) -> Option<String> {
|
||||||
// TODO: Get rid of this somehow, probably encoding the ID logic into a struct.
|
// TODO: Get rid of this somehow, probably encoding the ID logic into a struct.
|
||||||
// TODO: Error handling for inputs?
|
// TODO: Error handling for inputs?
|
||||||
fn expand_shorthand_to_url(path: &str, source: &Source) -> String {
|
fn expand_shorthand_to_url(path: &str, source: &Source) -> String {
|
||||||
if *source == Source::X && path.starts_with("tweet:media:") {
|
if *source == Source::X && (path.starts_with("tweet:media:") || path.starts_with("x:media:")) {
|
||||||
format!(
|
return format!(
|
||||||
"https://x.com/i/status/{}",
|
"https://x.com/i/status/{}",
|
||||||
path.split(':')
|
path.split(':')
|
||||||
.next_back()
|
.next_back()
|
||||||
.and_then(parse_tweet_id)
|
.and_then(parse_tweet_id)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
)
|
);
|
||||||
} else {
|
|
||||||
path.to_string()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(path) = path.strip_prefix("instagram:") {
|
||||||
|
if let Some(id) = path.strip_prefix("reel:") {
|
||||||
|
return format!("https://www.instagram.com/reel/{id}");
|
||||||
|
}
|
||||||
|
return format!("https://www.instagram.com/{path}");
|
||||||
|
}
|
||||||
|
if let Some(path) = path.strip_prefix("facebook:") {
|
||||||
|
return format!("https://www.facebook.com/{path}");
|
||||||
|
}
|
||||||
|
if let Some(path) = path.strip_prefix("tiktok:") {
|
||||||
|
return format!("https://www.tiktok.com/{path}");
|
||||||
|
}
|
||||||
|
if let Some(path) = path.strip_prefix("reddit:") {
|
||||||
|
return format!("https://www.reddit.com/{path}");
|
||||||
|
}
|
||||||
|
if let Some(path) = path.strip_prefix("snapchat:") {
|
||||||
|
return format!("https://www.snapchat.com/{path}");
|
||||||
|
}
|
||||||
|
|
||||||
|
path.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
// INFO: yt-dlp supports a lot of sites; so, when archiving (for example) a website, the user
|
// INFO: yt-dlp supports a lot of sites; so, when archiving (for example) a website, the user
|
||||||
|
|
@ -144,7 +163,14 @@ fn determine_source(path: &str) -> Source {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shorthand schemes: tweet:, x:, or twitter:
|
// Shorthand schemes: tweet:, x:, or twitter:
|
||||||
if let Some(after_scheme) = path.strip_prefix("tweet:") {
|
if let Some(after_scheme) = path
|
||||||
|
.strip_prefix("x:")
|
||||||
|
.or_else(|| path.strip_prefix("twitter:"))
|
||||||
|
.or_else(|| path.strip_prefix("tweet:"))
|
||||||
|
{
|
||||||
|
// For this scope, in comments, N is an alias for a string of type ('twitter' | 'x' | 'tweet').
|
||||||
|
|
||||||
|
// N:media:id
|
||||||
if after_scheme.starts_with("media:")
|
if after_scheme.starts_with("media:")
|
||||||
&& after_scheme
|
&& after_scheme
|
||||||
.strip_prefix("media:")
|
.strip_prefix("media:")
|
||||||
|
|
@ -154,23 +180,7 @@ fn determine_source(path: &str) -> Source {
|
||||||
return Source::X;
|
return Source::X;
|
||||||
}
|
}
|
||||||
|
|
||||||
if parse_tweet_id(after_scheme).is_some() {
|
// N:tweet:id or N:x:id
|
||||||
return Source::Tweet;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(after_scheme) = path
|
|
||||||
.strip_prefix("x:")
|
|
||||||
.or_else(|| path.strip_prefix("twitter:"))
|
|
||||||
{
|
|
||||||
if after_scheme
|
|
||||||
.strip_prefix("thread:")
|
|
||||||
.and_then(parse_tweet_id)
|
|
||||||
.is_some()
|
|
||||||
{
|
|
||||||
return Source::TweetThread;
|
|
||||||
}
|
|
||||||
|
|
||||||
if after_scheme
|
if after_scheme
|
||||||
.strip_prefix("tweet:")
|
.strip_prefix("tweet:")
|
||||||
.or_else(|| after_scheme.strip_prefix("x:"))
|
.or_else(|| after_scheme.strip_prefix("x:"))
|
||||||
|
|
@ -180,7 +190,22 @@ fn determine_source(path: &str) -> Source {
|
||||||
return Source::Tweet;
|
return Source::Tweet;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Source::X;
|
// N:thread:id
|
||||||
|
if after_scheme
|
||||||
|
.strip_prefix("thread:")
|
||||||
|
.and_then(parse_tweet_id)
|
||||||
|
.is_some()
|
||||||
|
{
|
||||||
|
return Source::TweetThread;
|
||||||
|
}
|
||||||
|
|
||||||
|
// N:id
|
||||||
|
if parse_tweet_id(after_scheme).is_some() {
|
||||||
|
return Source::Tweet;
|
||||||
|
}
|
||||||
|
|
||||||
|
// N:non-id
|
||||||
|
return Source::Other;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shorthand schemes for other yt-dlp extractors
|
// Shorthand schemes for other yt-dlp extractors
|
||||||
|
|
@ -571,6 +596,10 @@ mod tests {
|
||||||
url: "tweet:media:1234567890",
|
url: "tweet:media:1234567890",
|
||||||
expected: Source::X,
|
expected: Source::X,
|
||||||
},
|
},
|
||||||
|
TestCase {
|
||||||
|
url: "x:media:1234567890",
|
||||||
|
expected: Source::X,
|
||||||
|
},
|
||||||
TestCase {
|
TestCase {
|
||||||
url: "x:thread:1234567890",
|
url: "x:thread:1234567890",
|
||||||
expected: Source::TweetThread,
|
expected: Source::TweetThread,
|
||||||
|
|
@ -581,7 +610,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
TestCase {
|
TestCase {
|
||||||
url: "tweet:thread:1234567890",
|
url: "tweet:thread:1234567890",
|
||||||
expected: Source::Other,
|
expected: Source::TweetThread,
|
||||||
},
|
},
|
||||||
TestCase {
|
TestCase {
|
||||||
url: "tweet:not-a-number",
|
url: "tweet:not-a-number",
|
||||||
|
|
@ -591,6 +620,10 @@ mod tests {
|
||||||
url: "tweet:media:not-a-number",
|
url: "tweet:media:not-a-number",
|
||||||
expected: Source::Other,
|
expected: Source::Other,
|
||||||
},
|
},
|
||||||
|
TestCase {
|
||||||
|
url: "x:media:not-a-number",
|
||||||
|
expected: Source::Other,
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
for case in &cases {
|
for case in &cases {
|
||||||
|
|
@ -609,6 +642,26 @@ mod tests {
|
||||||
expand_shorthand_to_url("tweet:media:1234567890", &Source::X),
|
expand_shorthand_to_url("tweet:media:1234567890", &Source::X),
|
||||||
"https://x.com/i/status/1234567890"
|
"https://x.com/i/status/1234567890"
|
||||||
);
|
);
|
||||||
|
assert_eq!(
|
||||||
|
expand_shorthand_to_url("instagram:reel/ABC123", &Source::Instagram),
|
||||||
|
"https://www.instagram.com/reel/ABC123"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
expand_shorthand_to_url("facebook:watch?v=123456", &Source::Facebook),
|
||||||
|
"https://www.facebook.com/watch?v=123456"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
expand_shorthand_to_url("tiktok:@someone/video/123456789", &Source::TikTok),
|
||||||
|
"https://www.tiktok.com/@someone/video/123456789"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
expand_shorthand_to_url("reddit:r/videos/comments/abc123/example", &Source::Reddit),
|
||||||
|
"https://www.reddit.com/r/videos/comments/abc123/example"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
expand_shorthand_to_url("snapchat:discover/some-story/1234567890", &Source::Snapchat),
|
||||||
|
"https://www.snapchat.com/discover/some-story/1234567890"
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
expand_shorthand_to_url("tweet:1234567890", &Source::Tweet),
|
expand_shorthand_to_url("tweet:1234567890", &Source::Tweet),
|
||||||
"tweet:1234567890"
|
"tweet:1234567890"
|
||||||
|
|
@ -760,11 +813,11 @@ mod tests {
|
||||||
},
|
},
|
||||||
TestCase {
|
TestCase {
|
||||||
url: "x:1234567890",
|
url: "x:1234567890",
|
||||||
expected: Source::X,
|
expected: Source::Tweet,
|
||||||
},
|
},
|
||||||
TestCase {
|
TestCase {
|
||||||
url: "twitter:1234567890",
|
url: "twitter:1234567890",
|
||||||
expected: Source::X,
|
expected: Source::Tweet,
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue