mirror of
https://github.com/thegeneralist01/archivr
synced 2026-05-30 08:36:47 +02:00
Extract store and Twitter helpers into shared modules
This commit is contained in:
parent
423883d96f
commit
b23c548fd0
6 changed files with 90 additions and 93 deletions
|
|
@ -1,9 +1,5 @@
|
|||
use anyhow::{Context, Result, bail};
|
||||
use std::{
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
process::Command,
|
||||
};
|
||||
use std::{path::Path, process::Command};
|
||||
|
||||
use crate::hash::hash_file;
|
||||
|
||||
|
|
@ -30,71 +26,3 @@ pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<Strin
|
|||
|
||||
hash_file(&out_file)
|
||||
}
|
||||
|
||||
/// Moves `file` into the content-addressed raw store under `store_path`.
|
||||
///
|
||||
/// The destination path is derived from the file's SHA-256 hash:
|
||||
/// `raw/<first-char>/<second-char>/<hash><ext>`. If the destination already
|
||||
/// exists the source file is removed (deduplication); otherwise it is renamed.
|
||||
/// Returns the store-relative destination path.
|
||||
pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
|
||||
let hash = hash_file(file)?;
|
||||
let destination = raw_relative_path(file, &hash)?;
|
||||
let absolute_destination = store_path.join(&destination);
|
||||
|
||||
if let Some(parent) = absolute_destination.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
if absolute_destination.exists() {
|
||||
fs::remove_file(file)?;
|
||||
} else {
|
||||
fs::rename(file, &absolute_destination)?;
|
||||
}
|
||||
|
||||
Ok(destination)
|
||||
}
|
||||
|
||||
/// Computes the store-relative path for a file given its `hash`.
|
||||
/// The layout is `raw/<c1>/<c2>/<hash><ext>` where `c1`/`c2` are the first
|
||||
/// two characters of the hash, providing a two-level directory sharding.
|
||||
fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
|
||||
let mut chars = hash.chars();
|
||||
let first_letter = chars.next().context("hash must not be empty")?;
|
||||
let second_letter = chars
|
||||
.next()
|
||||
.context("hash must be at least two characters")?;
|
||||
let extension = file
|
||||
.extension()
|
||||
.map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
|
||||
|
||||
Ok(PathBuf::from("raw")
|
||||
.join(first_letter.to_string())
|
||||
.join(second_letter.to_string())
|
||||
.join(format!("{hash}{extension}")))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::{env, fs};
|
||||
|
||||
#[test]
|
||||
fn test_archive_staged_file_moves_into_raw_store() {
|
||||
let root = env::temp_dir().join(format!("archivr-local-test-{}", std::process::id()));
|
||||
let _ = fs::remove_dir_all(&root);
|
||||
fs::create_dir_all(root.join("temp")).unwrap();
|
||||
|
||||
let staged = root.join("temp").join("photo.jpg");
|
||||
fs::write(&staged, b"image-bytes").unwrap();
|
||||
|
||||
let relative = archive_staged_file(&staged, &root).unwrap();
|
||||
let absolute = root.join(&relative);
|
||||
|
||||
assert!(absolute.is_file());
|
||||
assert!(!staged.exists());
|
||||
assert!(relative.starts_with("raw"));
|
||||
|
||||
let _ = fs::remove_dir_all(&root);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
pub mod local;
|
||||
pub mod store;
|
||||
pub mod tweets;
|
||||
pub mod ytdlp;
|
||||
|
|
|
|||
75
src/downloader/store.rs
Normal file
75
src/downloader/store.rs
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
use anyhow::{Context, Result};
|
||||
use std::{
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use crate::hash::hash_file;
|
||||
|
||||
/// Moves `file` into the content-addressed raw store under `store_path`.
|
||||
///
|
||||
/// The destination path is derived from the file's SHA-256 hash:
|
||||
/// `raw/<first-char>/<second-char>/<hash><ext>`. If the destination already
|
||||
/// exists the source file is removed (deduplication); otherwise it is renamed.
|
||||
/// Returns the store-relative destination path.
|
||||
pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
|
||||
let hash = hash_file(file)?;
|
||||
let destination = raw_relative_path(file, &hash)?;
|
||||
let absolute_destination = store_path.join(&destination);
|
||||
|
||||
if let Some(parent) = absolute_destination.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
if absolute_destination.exists() {
|
||||
fs::remove_file(file)?;
|
||||
} else {
|
||||
fs::rename(file, &absolute_destination)?;
|
||||
}
|
||||
|
||||
Ok(destination)
|
||||
}
|
||||
|
||||
/// Computes the store-relative path for a file given its `hash`.
|
||||
/// The layout is `raw/<c1>/<c2>/<hash><ext>` where `c1`/`c2` are the first
|
||||
/// two characters of the hash, providing a two-level Trie.
|
||||
fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
|
||||
let mut chars = hash.chars();
|
||||
let first_letter = chars.next().context("hash must not be empty")?;
|
||||
let second_letter = chars
|
||||
.next()
|
||||
.context("hash must be at least two characters")?;
|
||||
let extension = file
|
||||
.extension()
|
||||
.map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
|
||||
|
||||
Ok(PathBuf::from("raw")
|
||||
.join(first_letter.to_string())
|
||||
.join(second_letter.to_string())
|
||||
.join(format!("{hash}{extension}")))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::{env, fs};
|
||||
|
||||
#[test]
|
||||
fn test_archive_staged_file_moves_into_raw_store() {
|
||||
let root = env::temp_dir().join(format!("archivr-store-test-{}", std::process::id()));
|
||||
let _ = fs::remove_dir_all(&root);
|
||||
fs::create_dir_all(root.join("temp")).unwrap();
|
||||
|
||||
let staged = root.join("temp").join("photo.jpg");
|
||||
fs::write(&staged, b"image-bytes").unwrap();
|
||||
|
||||
let relative = archive_staged_file(&staged, &root).unwrap();
|
||||
let absolute = root.join(&relative);
|
||||
|
||||
assert!(absolute.is_file());
|
||||
assert!(!staged.exists());
|
||||
assert!(relative.starts_with("raw"));
|
||||
|
||||
let _ = fs::remove_dir_all(&root);
|
||||
}
|
||||
}
|
||||
|
|
@ -10,16 +10,9 @@ use std::{
|
|||
sync::OnceLock,
|
||||
};
|
||||
|
||||
use super::local;
|
||||
use crate::twitter::parse_tweet_id;
|
||||
|
||||
/// Returns `Some(id)` if `id` is a non-empty string of ASCII digits, otherwise `None`.
|
||||
fn parse_tweet_id(id: &str) -> Option<String> {
|
||||
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
|
||||
Some(id.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
use super::store;
|
||||
|
||||
/// Extracts a tweet ID from an archivr path like `"tweet:123"` by taking the
|
||||
/// last colon-separated segment and validating it as a numeric ID.
|
||||
|
|
@ -303,7 +296,7 @@ fn archive_asset_reference(
|
|||
);
|
||||
}
|
||||
|
||||
let relative_path = local::archive_staged_file(&absolute_path, store_path)?;
|
||||
let relative_path = store::archive_staged_file(&absolute_path, store_path)?;
|
||||
let relative_path = relative_path.to_string_lossy().replace('\\', "/");
|
||||
archived_assets.insert(key, relative_path.clone());
|
||||
|
||||
|
|
|
|||
12
src/main.rs
12
src/main.rs
|
|
@ -9,6 +9,7 @@ use std::{
|
|||
|
||||
mod downloader;
|
||||
mod hash;
|
||||
mod twitter;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
|
|
@ -83,17 +84,8 @@ enum Source {
|
|||
Other,
|
||||
}
|
||||
|
||||
/// Returns the tweet ID when `id` is non-empty and contains only ASCII digits.
|
||||
fn parse_tweet_id(id: &str) -> Option<String> {
|
||||
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
|
||||
Some(id.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
use crate::twitter::parse_tweet_id;
|
||||
|
||||
// TODO: Get rid of this somehow, probably encoding the ID logic into a struct.
|
||||
// TODO: Error handling for inputs?
|
||||
fn expand_shorthand_to_url(path: &str, source: &Source) -> String {
|
||||
if *source == Source::X && (path.starts_with("tweet:media:") || path.starts_with("x:media:")) {
|
||||
return format!(
|
||||
|
|
|
|||
8
src/twitter.rs
Normal file
8
src/twitter.rs
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
/// Returns the tweet ID if `id` is non-empty and contains only ASCII digits.
|
||||
pub fn parse_tweet_id(id: &str) -> Option<String> {
|
||||
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
|
||||
Some(id.to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue