1
Fork 0
mirror of https://github.com/thegeneralist01/archivr synced 2026-05-30 08:36:47 +02:00

Extract store and Twitter helpers into shared modules

This commit is contained in:
TheGeneralist 2026-04-03 15:24:09 +02:00
parent 423883d96f
commit b23c548fd0
Signed by: thegeneralist01
SSH key fingerprint: SHA256:pp9qddbCNmVNoSjevdvQvM5z0DHN7LTa8qBMbcMq/R4
6 changed files with 90 additions and 93 deletions

View file

@ -1,9 +1,5 @@
use anyhow::{Context, Result, bail};
use std::{
fs,
path::{Path, PathBuf},
process::Command,
};
use std::{path::Path, process::Command};
use crate::hash::hash_file;
@ -30,71 +26,3 @@ pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<Strin
hash_file(&out_file)
}
/// Moves `file` into the content-addressed raw store under `store_path`.
///
/// The destination path is derived from the file's SHA-256 hash:
/// `raw/<first-char>/<second-char>/<hash><ext>`. If the destination already
/// exists the source file is removed (deduplication); otherwise it is renamed.
/// Returns the store-relative destination path.
pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
let hash = hash_file(file)?;
let destination = raw_relative_path(file, &hash)?;
let absolute_destination = store_path.join(&destination);
if let Some(parent) = absolute_destination.parent() {
fs::create_dir_all(parent)?;
}
if absolute_destination.exists() {
fs::remove_file(file)?;
} else {
fs::rename(file, &absolute_destination)?;
}
Ok(destination)
}
/// Computes the store-relative path for a file given its `hash`.
/// The layout is `raw/<c1>/<c2>/<hash><ext>` where `c1`/`c2` are the first
/// two characters of the hash, providing a two-level directory sharding.
fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
let mut chars = hash.chars();
let first_letter = chars.next().context("hash must not be empty")?;
let second_letter = chars
.next()
.context("hash must be at least two characters")?;
let extension = file
.extension()
.map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
Ok(PathBuf::from("raw")
.join(first_letter.to_string())
.join(second_letter.to_string())
.join(format!("{hash}{extension}")))
}
#[cfg(test)]
mod tests {
use super::*;
use std::{env, fs};
#[test]
fn test_archive_staged_file_moves_into_raw_store() {
let root = env::temp_dir().join(format!("archivr-local-test-{}", std::process::id()));
let _ = fs::remove_dir_all(&root);
fs::create_dir_all(root.join("temp")).unwrap();
let staged = root.join("temp").join("photo.jpg");
fs::write(&staged, b"image-bytes").unwrap();
let relative = archive_staged_file(&staged, &root).unwrap();
let absolute = root.join(&relative);
assert!(absolute.is_file());
assert!(!staged.exists());
assert!(relative.starts_with("raw"));
let _ = fs::remove_dir_all(&root);
}
}

View file

@ -1,3 +1,4 @@
pub mod local;
pub mod store;
pub mod tweets;
pub mod ytdlp;

75
src/downloader/store.rs Normal file
View file

@ -0,0 +1,75 @@
use anyhow::{Context, Result};
use std::{
fs,
path::{Path, PathBuf},
};
use crate::hash::hash_file;
/// Moves `file` into the content-addressed raw store under `store_path`.
///
/// The destination path is derived from the file's SHA-256 hash:
/// `raw/<first-char>/<second-char>/<hash><ext>`. If the destination already
/// exists the source file is removed (deduplication); otherwise it is renamed.
/// Returns the store-relative destination path.
pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
let hash = hash_file(file)?;
let destination = raw_relative_path(file, &hash)?;
let absolute_destination = store_path.join(&destination);
if let Some(parent) = absolute_destination.parent() {
fs::create_dir_all(parent)?;
}
if absolute_destination.exists() {
fs::remove_file(file)?;
} else {
fs::rename(file, &absolute_destination)?;
}
Ok(destination)
}
/// Computes the store-relative path for a file given its `hash`.
/// The layout is `raw/<c1>/<c2>/<hash><ext>` where `c1`/`c2` are the first
/// two characters of the hash, providing a two-level Trie.
fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
let mut chars = hash.chars();
let first_letter = chars.next().context("hash must not be empty")?;
let second_letter = chars
.next()
.context("hash must be at least two characters")?;
let extension = file
.extension()
.map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
Ok(PathBuf::from("raw")
.join(first_letter.to_string())
.join(second_letter.to_string())
.join(format!("{hash}{extension}")))
}
#[cfg(test)]
mod tests {
use super::*;
use std::{env, fs};
#[test]
fn test_archive_staged_file_moves_into_raw_store() {
let root = env::temp_dir().join(format!("archivr-store-test-{}", std::process::id()));
let _ = fs::remove_dir_all(&root);
fs::create_dir_all(root.join("temp")).unwrap();
let staged = root.join("temp").join("photo.jpg");
fs::write(&staged, b"image-bytes").unwrap();
let relative = archive_staged_file(&staged, &root).unwrap();
let absolute = root.join(&relative);
assert!(absolute.is_file());
assert!(!staged.exists());
assert!(relative.starts_with("raw"));
let _ = fs::remove_dir_all(&root);
}
}

View file

@ -10,16 +10,9 @@ use std::{
sync::OnceLock,
};
use super::local;
use crate::twitter::parse_tweet_id;
/// Returns `Some(id)` if `id` is a non-empty string of ASCII digits, otherwise `None`.
fn parse_tweet_id(id: &str) -> Option<String> {
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
Some(id.to_string())
} else {
None
}
}
use super::store;
/// Extracts a tweet ID from an archivr path like `"tweet:123"` by taking the
/// last colon-separated segment and validating it as a numeric ID.
@ -303,7 +296,7 @@ fn archive_asset_reference(
);
}
let relative_path = local::archive_staged_file(&absolute_path, store_path)?;
let relative_path = store::archive_staged_file(&absolute_path, store_path)?;
let relative_path = relative_path.to_string_lossy().replace('\\', "/");
archived_assets.insert(key, relative_path.clone());

View file

@ -9,6 +9,7 @@ use std::{
mod downloader;
mod hash;
mod twitter;
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
@ -83,17 +84,8 @@ enum Source {
Other,
}
/// Returns the tweet ID when `id` is non-empty and contains only ASCII digits.
fn parse_tweet_id(id: &str) -> Option<String> {
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
Some(id.to_string())
} else {
None
}
}
use crate::twitter::parse_tweet_id;
// TODO: Get rid of this somehow, probably encoding the ID logic into a struct.
// TODO: Error handling for inputs?
fn expand_shorthand_to_url(path: &str, source: &Source) -> String {
if *source == Source::X && (path.starts_with("tweet:media:") || path.starts_with("x:media:")) {
return format!(

8
src/twitter.rs Normal file
View file

@ -0,0 +1,8 @@
/// Returns the tweet ID if `id` is non-empty and contains only ASCII digits.
pub fn parse_tweet_id(id: &str) -> Option<String> {
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
Some(id.to_string())
} else {
None
}
}