mirror of
https://github.com/thegeneralist01/archivr
synced 2026-05-30 08:36:47 +02:00
Extract store and Twitter helpers into shared modules
This commit is contained in:
parent
423883d96f
commit
b23c548fd0
6 changed files with 90 additions and 93 deletions
|
|
@ -1,9 +1,5 @@
|
||||||
use anyhow::{Context, Result, bail};
|
use anyhow::{Context, Result, bail};
|
||||||
use std::{
|
use std::{path::Path, process::Command};
|
||||||
fs,
|
|
||||||
path::{Path, PathBuf},
|
|
||||||
process::Command,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::hash::hash_file;
|
use crate::hash::hash_file;
|
||||||
|
|
||||||
|
|
@ -30,71 +26,3 @@ pub fn save(path: String, store_path: &Path, timestamp: &String) -> Result<Strin
|
||||||
|
|
||||||
hash_file(&out_file)
|
hash_file(&out_file)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Moves `file` into the content-addressed raw store under `store_path`.
|
|
||||||
///
|
|
||||||
/// The destination path is derived from the file's SHA-256 hash:
|
|
||||||
/// `raw/<first-char>/<second-char>/<hash><ext>`. If the destination already
|
|
||||||
/// exists the source file is removed (deduplication); otherwise it is renamed.
|
|
||||||
/// Returns the store-relative destination path.
|
|
||||||
pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
|
|
||||||
let hash = hash_file(file)?;
|
|
||||||
let destination = raw_relative_path(file, &hash)?;
|
|
||||||
let absolute_destination = store_path.join(&destination);
|
|
||||||
|
|
||||||
if let Some(parent) = absolute_destination.parent() {
|
|
||||||
fs::create_dir_all(parent)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
if absolute_destination.exists() {
|
|
||||||
fs::remove_file(file)?;
|
|
||||||
} else {
|
|
||||||
fs::rename(file, &absolute_destination)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(destination)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Computes the store-relative path for a file given its `hash`.
|
|
||||||
/// The layout is `raw/<c1>/<c2>/<hash><ext>` where `c1`/`c2` are the first
|
|
||||||
/// two characters of the hash, providing a two-level directory sharding.
|
|
||||||
fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
|
|
||||||
let mut chars = hash.chars();
|
|
||||||
let first_letter = chars.next().context("hash must not be empty")?;
|
|
||||||
let second_letter = chars
|
|
||||||
.next()
|
|
||||||
.context("hash must be at least two characters")?;
|
|
||||||
let extension = file
|
|
||||||
.extension()
|
|
||||||
.map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
|
|
||||||
|
|
||||||
Ok(PathBuf::from("raw")
|
|
||||||
.join(first_letter.to_string())
|
|
||||||
.join(second_letter.to_string())
|
|
||||||
.join(format!("{hash}{extension}")))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use std::{env, fs};
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_archive_staged_file_moves_into_raw_store() {
|
|
||||||
let root = env::temp_dir().join(format!("archivr-local-test-{}", std::process::id()));
|
|
||||||
let _ = fs::remove_dir_all(&root);
|
|
||||||
fs::create_dir_all(root.join("temp")).unwrap();
|
|
||||||
|
|
||||||
let staged = root.join("temp").join("photo.jpg");
|
|
||||||
fs::write(&staged, b"image-bytes").unwrap();
|
|
||||||
|
|
||||||
let relative = archive_staged_file(&staged, &root).unwrap();
|
|
||||||
let absolute = root.join(&relative);
|
|
||||||
|
|
||||||
assert!(absolute.is_file());
|
|
||||||
assert!(!staged.exists());
|
|
||||||
assert!(relative.starts_with("raw"));
|
|
||||||
|
|
||||||
let _ = fs::remove_dir_all(&root);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
pub mod local;
|
pub mod local;
|
||||||
|
pub mod store;
|
||||||
pub mod tweets;
|
pub mod tweets;
|
||||||
pub mod ytdlp;
|
pub mod ytdlp;
|
||||||
|
|
|
||||||
75
src/downloader/store.rs
Normal file
75
src/downloader/store.rs
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use std::{
|
||||||
|
fs,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::hash::hash_file;
|
||||||
|
|
||||||
|
/// Moves `file` into the content-addressed raw store under `store_path`.
|
||||||
|
///
|
||||||
|
/// The destination path is derived from the file's SHA-256 hash:
|
||||||
|
/// `raw/<first-char>/<second-char>/<hash><ext>`. If the destination already
|
||||||
|
/// exists the source file is removed (deduplication); otherwise it is renamed.
|
||||||
|
/// Returns the store-relative destination path.
|
||||||
|
pub fn archive_staged_file(file: &Path, store_path: &Path) -> Result<PathBuf> {
|
||||||
|
let hash = hash_file(file)?;
|
||||||
|
let destination = raw_relative_path(file, &hash)?;
|
||||||
|
let absolute_destination = store_path.join(&destination);
|
||||||
|
|
||||||
|
if let Some(parent) = absolute_destination.parent() {
|
||||||
|
fs::create_dir_all(parent)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if absolute_destination.exists() {
|
||||||
|
fs::remove_file(file)?;
|
||||||
|
} else {
|
||||||
|
fs::rename(file, &absolute_destination)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(destination)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes the store-relative path for a file given its `hash`.
|
||||||
|
/// The layout is `raw/<c1>/<c2>/<hash><ext>` where `c1`/`c2` are the first
|
||||||
|
/// two characters of the hash, providing a two-level Trie.
|
||||||
|
fn raw_relative_path(file: &Path, hash: &str) -> Result<PathBuf> {
|
||||||
|
let mut chars = hash.chars();
|
||||||
|
let first_letter = chars.next().context("hash must not be empty")?;
|
||||||
|
let second_letter = chars
|
||||||
|
.next()
|
||||||
|
.context("hash must be at least two characters")?;
|
||||||
|
let extension = file
|
||||||
|
.extension()
|
||||||
|
.map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
|
||||||
|
|
||||||
|
Ok(PathBuf::from("raw")
|
||||||
|
.join(first_letter.to_string())
|
||||||
|
.join(second_letter.to_string())
|
||||||
|
.join(format!("{hash}{extension}")))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::{env, fs};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_archive_staged_file_moves_into_raw_store() {
|
||||||
|
let root = env::temp_dir().join(format!("archivr-store-test-{}", std::process::id()));
|
||||||
|
let _ = fs::remove_dir_all(&root);
|
||||||
|
fs::create_dir_all(root.join("temp")).unwrap();
|
||||||
|
|
||||||
|
let staged = root.join("temp").join("photo.jpg");
|
||||||
|
fs::write(&staged, b"image-bytes").unwrap();
|
||||||
|
|
||||||
|
let relative = archive_staged_file(&staged, &root).unwrap();
|
||||||
|
let absolute = root.join(&relative);
|
||||||
|
|
||||||
|
assert!(absolute.is_file());
|
||||||
|
assert!(!staged.exists());
|
||||||
|
assert!(relative.starts_with("raw"));
|
||||||
|
|
||||||
|
let _ = fs::remove_dir_all(&root);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -10,16 +10,9 @@ use std::{
|
||||||
sync::OnceLock,
|
sync::OnceLock,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::local;
|
use crate::twitter::parse_tweet_id;
|
||||||
|
|
||||||
/// Returns `Some(id)` if `id` is a non-empty string of ASCII digits, otherwise `None`.
|
use super::store;
|
||||||
fn parse_tweet_id(id: &str) -> Option<String> {
|
|
||||||
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
|
|
||||||
Some(id.to_string())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extracts a tweet ID from an archivr path like `"tweet:123"` by taking the
|
/// Extracts a tweet ID from an archivr path like `"tweet:123"` by taking the
|
||||||
/// last colon-separated segment and validating it as a numeric ID.
|
/// last colon-separated segment and validating it as a numeric ID.
|
||||||
|
|
@ -303,7 +296,7 @@ fn archive_asset_reference(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let relative_path = local::archive_staged_file(&absolute_path, store_path)?;
|
let relative_path = store::archive_staged_file(&absolute_path, store_path)?;
|
||||||
let relative_path = relative_path.to_string_lossy().replace('\\', "/");
|
let relative_path = relative_path.to_string_lossy().replace('\\', "/");
|
||||||
archived_assets.insert(key, relative_path.clone());
|
archived_assets.insert(key, relative_path.clone());
|
||||||
|
|
||||||
|
|
|
||||||
12
src/main.rs
12
src/main.rs
|
|
@ -9,6 +9,7 @@ use std::{
|
||||||
|
|
||||||
mod downloader;
|
mod downloader;
|
||||||
mod hash;
|
mod hash;
|
||||||
|
mod twitter;
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(version, about, long_about = None)]
|
#[command(version, about, long_about = None)]
|
||||||
|
|
@ -83,17 +84,8 @@ enum Source {
|
||||||
Other,
|
Other,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the tweet ID when `id` is non-empty and contains only ASCII digits.
|
use crate::twitter::parse_tweet_id;
|
||||||
fn parse_tweet_id(id: &str) -> Option<String> {
|
|
||||||
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
|
|
||||||
Some(id.to_string())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Get rid of this somehow, probably encoding the ID logic into a struct.
|
|
||||||
// TODO: Error handling for inputs?
|
|
||||||
fn expand_shorthand_to_url(path: &str, source: &Source) -> String {
|
fn expand_shorthand_to_url(path: &str, source: &Source) -> String {
|
||||||
if *source == Source::X && (path.starts_with("tweet:media:") || path.starts_with("x:media:")) {
|
if *source == Source::X && (path.starts_with("tweet:media:") || path.starts_with("x:media:")) {
|
||||||
return format!(
|
return format!(
|
||||||
|
|
|
||||||
8
src/twitter.rs
Normal file
8
src/twitter.rs
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
/// Returns the tweet ID if `id` is non-empty and contains only ASCII digits.
|
||||||
|
pub fn parse_tweet_id(id: &str) -> Option<String> {
|
||||||
|
if !id.is_empty() && id.chars().all(|char| char.is_ascii_digit()) {
|
||||||
|
Some(id.to_string())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue