diff --git a/src/downloader/local.rs b/src/downloader/local.rs index 6536aa7..f946a2e 100644 --- a/src/downloader/local.rs +++ b/src/downloader/local.rs @@ -1,9 +1,5 @@ use anyhow::{Context, Result, bail}; -use std::{ - fs, - path::{Path, PathBuf}, - process::Command, -}; +use std::{path::Path, process::Command}; use crate::hash::hash_file; @@ -30,71 +26,3 @@ pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result//`. If the destination already -/// exists the source file is removed (deduplication); otherwise it is renamed. -/// Returns the store-relative destination path. -pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result { - let hash = hash_file(file)?; - let destination = raw_relative_path(file, &hash)?; - let absolute_destination = store_path.join(&destination); - - if let Some(parent) = absolute_destination.parent() { - fs::create_dir_all(parent)?; - } - - if absolute_destination.exists() { - fs::remove_file(file)?; - } else { - fs::rename(file, &absolute_destination)?; - } - - Ok(destination) -} - -/// Computes the store-relative path for a file given its `hash`. -/// The layout is `raw///` where `c1`/`c2` are the first -/// two characters of the hash, providing a two-level directory sharding. -fn raw_relative_path(file: &Path, hash: &str) -> Result { - let mut chars = hash.chars(); - let first_letter = chars.next().context("hash must not be empty")?; - let second_letter = chars - .next() - .context("hash must be at least two characters")?; - let extension = file - .extension() - .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy())); - - Ok(PathBuf::from("raw") - .join(first_letter.to_string()) - .join(second_letter.to_string()) - .join(format!("{hash}{extension}"))) -} - -#[cfg(test)] -mod tests { - use super::*; - use std::{env, fs}; - - #[test] - fn test_archive_staged_file_moves_into_raw_store() { - let root = env::temp_dir().join(format!("archivr-local-test-{}", std::process::id())); - let _ = fs::remove_dir_all(&root); - fs::create_dir_all(root.join("temp")).unwrap(); - - let staged = root.join("temp").join("photo.jpg"); - fs::write(&staged, b"image-bytes").unwrap(); - - let relative = archive_staged_file(&staged, &root).unwrap(); - let absolute = root.join(&relative); - - assert!(absolute.is_file()); - assert!(!staged.exists()); - assert!(relative.starts_with("raw")); - - let _ = fs::remove_dir_all(&root); - } -} diff --git a/src/downloader/mod.rs b/src/downloader/mod.rs index 0811854..de5d604 100644 --- a/src/downloader/mod.rs +++ b/src/downloader/mod.rs @@ -1,3 +1,4 @@ pub mod local; +pub mod store; pub mod tweets; pub mod ytdlp; diff --git a/src/downloader/store.rs b/src/downloader/store.rs new file mode 100644 index 0000000..f83d428 --- /dev/null +++ b/src/downloader/store.rs @@ -0,0 +1,75 @@ +use anyhow::{Context, Result}; +use std::{ + fs, + path::{Path, PathBuf}, +}; + +use crate::hash::hash_file; + +/// Moves `file` into the content-addressed raw store under `store_path`. +/// +/// The destination path is derived from the file's SHA-256 hash: +/// `raw///`. If the destination already +/// exists the source file is removed (deduplication); otherwise it is renamed. +/// Returns the store-relative destination path. +pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result { + let hash = hash_file(file)?; + let destination = raw_relative_path(file, &hash)?; + let absolute_destination = store_path.join(&destination); + + if let Some(parent) = absolute_destination.parent() { + fs::create_dir_all(parent)?; + } + + if absolute_destination.exists() { + fs::remove_file(file)?; + } else { + fs::rename(file, &absolute_destination)?; + } + + Ok(destination) +} + +/// Computes the store-relative path for a file given its `hash`. +/// The layout is `raw///` where `c1`/`c2` are the first +/// two characters of the hash, providing a two-level Trie. +fn raw_relative_path(file: &Path, hash: &str) -> Result { + let mut chars = hash.chars(); + let first_letter = chars.next().context("hash must not be empty")?; + let second_letter = chars + .next() + .context("hash must be at least two characters")?; + let extension = file + .extension() + .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy())); + + Ok(PathBuf::from("raw") + .join(first_letter.to_string()) + .join(second_letter.to_string()) + .join(format!("{hash}{extension}"))) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::{env, fs}; + + #[test] + fn test_archive_staged_file_moves_into_raw_store() { + let root = env::temp_dir().join(format!("archivr-store-test-{}", std::process::id())); + let _ = fs::remove_dir_all(&root); + fs::create_dir_all(root.join("temp")).unwrap(); + + let staged = root.join("temp").join("photo.jpg"); + fs::write(&staged, b"image-bytes").unwrap(); + + let relative = archive_staged_file(&staged, &root).unwrap(); + let absolute = root.join(&relative); + + assert!(absolute.is_file()); + assert!(!staged.exists()); + assert!(relative.starts_with("raw")); + + let _ = fs::remove_dir_all(&root); + } +} diff --git a/src/downloader/tweets.rs b/src/downloader/tweets.rs index 57014f2..1e66063 100644 --- a/src/downloader/tweets.rs +++ b/src/downloader/tweets.rs @@ -10,16 +10,9 @@ use std::{ sync::OnceLock, }; -use super::local; +use crate::twitter::parse_tweet_id; -/// Returns `Some(id)` if `id` is a non-empty string of ASCII digits, otherwise `None`. -fn parse_tweet_id(id: &str) -> Option { - if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) { - Some(id.to_string()) - } else { - None - } -} +use super::store; /// Extracts a tweet ID from an archivr path like `"tweet:123"` by taking the /// last colon-separated segment and validating it as a numeric ID. @@ -303,7 +296,7 @@ fn archive_asset_reference( ); } - let relative_path = local::archive_staged_file(&absolute_path, store_path)?; + let relative_path = store::archive_staged_file(&absolute_path, store_path)?; let relative_path = relative_path.to_string_lossy().replace('\\', "/"); archived_assets.insert(key, relative_path.clone()); diff --git a/src/main.rs b/src/main.rs index 4bc6e35..177194b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ use std::{ mod downloader; mod hash; +mod twitter; #[derive(Parser, Debug)] #[command(version, about, long_about = None)] @@ -83,17 +84,8 @@ enum Source { Other, } -/// Returns the tweet ID when `id` is non-empty and contains only ASCII digits. -fn parse_tweet_id(id: &str) -> Option { - if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) { - Some(id.to_string()) - } else { - None - } -} +use crate::twitter::parse_tweet_id; -// TODO: Get rid of this somehow, probably encoding the ID logic into a struct. -// TODO: Error handling for inputs? fn expand_shorthand_to_url(path: &str, source: &Source) -> String { if *source == Source::X && (path.starts_with("tweet:media:") || path.starts_with("x:media:")) { return format!( diff --git a/src/twitter.rs b/src/twitter.rs new file mode 100644 index 0000000..5678c1a --- /dev/null +++ b/src/twitter.rs @@ -0,0 +1,8 @@ +/// Returns the tweet ID if `id` is non-empty and contains only ASCII digits. +pub fn parse_tweet_id(id: &str) -> Option { + if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) { + Some(id.to_string()) + } else { + None + } +}