mirror of
https://github.com/thegeneralist01/archivr
synced 2026-05-30 08:36:47 +02:00
Revert "feat: add generic media source handling and local file archiving"
This commit is contained in:
parent
cd7dfd7c8a
commit
9e38c18bd4
8 changed files with 22 additions and 2252 deletions
247
src/main.rs
247
src/main.rs
|
|
@ -36,8 +36,6 @@ enum Command {
|
|||
/// ...
|
||||
/// raw/
|
||||
/// ...
|
||||
/// raw_tweets/
|
||||
/// ...
|
||||
/// structured/
|
||||
/// ...
|
||||
#[arg(default_value = "./.archivr/store")]
|
||||
|
|
@ -66,14 +64,12 @@ fn get_archive_path() -> Option<PathBuf> {
|
|||
None
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum Source {
|
||||
YouTubeVideo,
|
||||
YouTubePlaylist,
|
||||
YouTubeChannel,
|
||||
X,
|
||||
Tweet,
|
||||
TweetThread,
|
||||
Instagram,
|
||||
Facebook,
|
||||
TikTok,
|
||||
|
|
@ -83,29 +79,6 @@ enum Source {
|
|||
Other,
|
||||
}
|
||||
|
||||
fn parse_tweet_id(id: &str) -> Option<String> {
|
||||
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
|
||||
Some(id.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn tweet_id_from_path(path: &str) -> Option<String> {
|
||||
path.split(':').next_back().and_then(parse_tweet_id)
|
||||
}
|
||||
|
||||
fn resolve_source_path(path: &str, source: &Source) -> String {
|
||||
if *source == Source::X && path.starts_with("tweet:media:") {
|
||||
format!(
|
||||
"https://x.com/i/status/{}",
|
||||
tweet_id_from_path(path).unwrap()
|
||||
)
|
||||
} else {
|
||||
path.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
// INFO: yt-dlp supports a lot of sites; so, when archiving (for example) a website, the user
|
||||
// -> should be asked whether they want to archive the whole website or just the video(s) on it.
|
||||
fn determine_source(path: &str) -> Source {
|
||||
|
|
@ -141,43 +114,8 @@ fn determine_source(path: &str) -> Source {
|
|||
}
|
||||
}
|
||||
|
||||
// Shorthand schemes: tweet:, x:, or twitter:
|
||||
if let Some(after_scheme) = path.strip_prefix("tweet:") {
|
||||
if after_scheme.starts_with("media:")
|
||||
&& after_scheme
|
||||
.strip_prefix("media:")
|
||||
.and_then(parse_tweet_id)
|
||||
.is_some()
|
||||
{
|
||||
return Source::X;
|
||||
}
|
||||
|
||||
if parse_tweet_id(after_scheme).is_some() {
|
||||
return Source::Tweet;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(after_scheme) = path
|
||||
.strip_prefix("x:")
|
||||
.or_else(|| path.strip_prefix("twitter:"))
|
||||
{
|
||||
if after_scheme
|
||||
.strip_prefix("thread:")
|
||||
.and_then(parse_tweet_id)
|
||||
.is_some()
|
||||
{
|
||||
return Source::TweetThread;
|
||||
}
|
||||
|
||||
if after_scheme
|
||||
.strip_prefix("tweet:")
|
||||
.or_else(|| after_scheme.strip_prefix("x:"))
|
||||
.and_then(parse_tweet_id)
|
||||
.is_some()
|
||||
{
|
||||
return Source::Tweet;
|
||||
}
|
||||
|
||||
// Shorthand schemes: x: or twitter:
|
||||
if path.starts_with("x:") || path.starts_with("twitter:") {
|
||||
return Source::X;
|
||||
}
|
||||
|
||||
|
|
@ -322,31 +260,27 @@ fn move_temp_to_raw(file: &Path, hash: &String, store_path: &Path) -> Result<()>
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn initialize_store_directories(store_path: &Path) -> Result<()> {
|
||||
fs::create_dir_all(store_path.join("raw"))?;
|
||||
fs::create_dir_all(store_path.join("raw_tweets"))?;
|
||||
fs::create_dir_all(store_path.join("structured"))?;
|
||||
fs::create_dir_all(store_path.join("temp"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
match args.command {
|
||||
Command::Archive { ref path } => {
|
||||
let archive_path = match get_archive_path() {
|
||||
Some(path) => path,
|
||||
None => {
|
||||
eprintln!("Not in an archive. Use 'archivr init' to create one.");
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
let archive_path = get_archive_path();
|
||||
if get_archive_path().is_none() {
|
||||
eprintln!("Not in an archive. Use 'archivr init' to create one.");
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
// let download_id = uuid::Uuid::new_v4();
|
||||
let timestamp = Local::now().format("%Y-%m-%dT%H-%M-%S%.3f").to_string();
|
||||
|
||||
let store_path_string_file = archive_path.join("store_path");
|
||||
let source = determine_source(path);
|
||||
if let Source::Other = source {
|
||||
eprintln!("Archiving from this source is not yet implemented.");
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
let store_path_string_file = archive_path.unwrap().join("store_path");
|
||||
let store_path = match fs::read_to_string(store_path_string_file) {
|
||||
Ok(p) => PathBuf::from(p.trim()),
|
||||
Err(e) => {
|
||||
|
|
@ -355,46 +289,6 @@ fn main() -> Result<()> {
|
|||
}
|
||||
};
|
||||
|
||||
let source = determine_source(path);
|
||||
|
||||
// Sources: Tweets or Twitter Threads
|
||||
match source {
|
||||
Source::Other => {
|
||||
eprintln!("Archiving from this source is not yet implemented.");
|
||||
process::exit(1);
|
||||
}
|
||||
Source::Tweet | Source::TweetThread => {
|
||||
match downloader::tweets::archive(
|
||||
path,
|
||||
source == Source::TweetThread,
|
||||
&store_path,
|
||||
×tamp,
|
||||
) {
|
||||
Ok(true) => {
|
||||
println!(
|
||||
"Tweet archived successfully to {}",
|
||||
store_path.join("raw_tweets").display()
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
Ok(false) => {
|
||||
println!(
|
||||
"Tweet already archived in {}",
|
||||
store_path.join("raw_tweets").display()
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Failed to archive tweet: {e}");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Sources, for which yt-dlp is needed
|
||||
let path = resolve_source_path(path, &source);
|
||||
let hash = match source {
|
||||
Source::YouTubeVideo
|
||||
| Source::X
|
||||
|
|
@ -523,7 +417,9 @@ fn main() -> Result<()> {
|
|||
archive_path.join("store_path"),
|
||||
store_path.canonicalize().unwrap().to_str().unwrap(),
|
||||
);
|
||||
initialize_store_directories(&store_path).unwrap();
|
||||
fs::create_dir_all(store_path.join("raw")).unwrap();
|
||||
fs::create_dir_all(store_path.join("structured")).unwrap();
|
||||
fs::create_dir_all(store_path.join("tmp")).unwrap();
|
||||
|
||||
println!("Initialized empty archive in {}", archive_path.display());
|
||||
|
||||
|
|
@ -535,101 +431,12 @@ fn main() -> Result<()> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
|
||||
struct TestCase<'a> {
|
||||
url: &'a str,
|
||||
expected: Source,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tweet_sources() {
|
||||
let cases = [
|
||||
TestCase {
|
||||
url: "tweet:1234567890",
|
||||
expected: Source::Tweet,
|
||||
},
|
||||
TestCase {
|
||||
url: "x:tweet:1234567890",
|
||||
expected: Source::Tweet,
|
||||
},
|
||||
TestCase {
|
||||
url: "x:x:1234567890",
|
||||
expected: Source::Tweet,
|
||||
},
|
||||
TestCase {
|
||||
url: "twitter:x:1234567890",
|
||||
expected: Source::Tweet,
|
||||
},
|
||||
TestCase {
|
||||
url: "twitter:tweet:1234567890",
|
||||
expected: Source::Tweet,
|
||||
},
|
||||
TestCase {
|
||||
url: "tweet:media:1234567890",
|
||||
expected: Source::X,
|
||||
},
|
||||
TestCase {
|
||||
url: "x:thread:1234567890",
|
||||
expected: Source::TweetThread,
|
||||
},
|
||||
TestCase {
|
||||
url: "twitter:thread:1234567890",
|
||||
expected: Source::TweetThread,
|
||||
},
|
||||
TestCase {
|
||||
url: "tweet:thread:1234567890",
|
||||
expected: Source::Other,
|
||||
},
|
||||
TestCase {
|
||||
url: "tweet:not-a-number",
|
||||
expected: Source::Other,
|
||||
},
|
||||
TestCase {
|
||||
url: "tweet:media:not-a-number",
|
||||
expected: Source::Other,
|
||||
},
|
||||
];
|
||||
|
||||
for case in &cases {
|
||||
assert_eq!(
|
||||
determine_source(case.url),
|
||||
case.expected,
|
||||
"Failed for URL: {}",
|
||||
case.url
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tweet_id_from_path() {
|
||||
assert_eq!(
|
||||
tweet_id_from_path("tweet:1234567890"),
|
||||
Some("1234567890".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
tweet_id_from_path("tweet:media:1234567890"),
|
||||
Some("1234567890".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
tweet_id_from_path("x:thread:1234567890"),
|
||||
Some("1234567890".to_string())
|
||||
);
|
||||
assert_eq!(tweet_id_from_path("tweet:not-a-number"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_source_path() {
|
||||
assert_eq!(
|
||||
resolve_source_path("tweet:media:1234567890", &Source::X),
|
||||
"https://x.com/i/status/1234567890"
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_source_path("tweet:1234567890", &Source::Tweet),
|
||||
"tweet:1234567890"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_youtube_sources() {
|
||||
// --- YouTube Video URLs ---
|
||||
|
|
@ -878,22 +685,4 @@ mod tests {
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_initialize_store_directories() {
|
||||
let store_path = env::temp_dir().join(format!(
|
||||
"archivr-test-{}",
|
||||
Local::now().format("%Y%m%d%H%M%S%3f")
|
||||
));
|
||||
|
||||
initialize_store_directories(&store_path).unwrap();
|
||||
|
||||
assert!(store_path.join("raw").is_dir());
|
||||
assert!(store_path.join("raw_tweets").is_dir());
|
||||
assert!(store_path.join("structured").is_dir());
|
||||
assert!(store_path.join("temp").is_dir());
|
||||
assert!(!store_path.join("tmp").exists());
|
||||
|
||||
fs::remove_dir_all(store_path).unwrap();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue