mirror of
https://github.com/thegeneralist01/archivr
synced 2026-03-07 11:39:55 +01:00
feat: finish YouTube downloading; primitive "raw" archiving
This commit is contained in:
commit
01cc7826bf
9 changed files with 1059 additions and 0 deletions
1
src/downloader/mod.rs
Normal file
1
src/downloader/mod.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
pub mod youtube;
|
||||
32
src/downloader/youtube.rs
Normal file
32
src/downloader/youtube.rs
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
use anyhow::{Context, Result, bail};
|
||||
use std::{env, path::Path, process::Command};
|
||||
|
||||
use crate::hash::hash_file;
|
||||
|
||||
pub fn download(path: String, store_path: &Path, timestamp: &String) -> Result<String> {
|
||||
println!("Downloading from YouTube: {path}");
|
||||
|
||||
let ytdlp = env::var("ARCHIVR_YT_DLP").unwrap_or_else(|_| "yt-dlp".to_string());
|
||||
|
||||
let temp_dir = store_path.join("temp");
|
||||
std::fs::create_dir_all(&temp_dir)?;
|
||||
let out_file = temp_dir.join(format!("{timestamp}.mp4"));
|
||||
|
||||
let out = Command::new(&ytdlp)
|
||||
.arg(&path)
|
||||
.arg("-f")
|
||||
.arg("bestvideo+bestaudio/best")
|
||||
.arg("--merge-output-format")
|
||||
.arg("mp4")
|
||||
.arg("-o")
|
||||
.arg(&out_file)
|
||||
.output()
|
||||
.with_context(|| format!("failed to spawn {ytdlp} process"))?;
|
||||
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
bail!("yt-dlp failed: {stderr}");
|
||||
}
|
||||
|
||||
hash_file(&out_file)
|
||||
}
|
||||
17
src/hash.rs
Normal file
17
src/hash.rs
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
use sha3::{Digest, Sha3_256};
|
||||
use std::{fs::File, io::Read, path::Path};
|
||||
use anyhow::Result;
|
||||
|
||||
pub fn hash_file(path: &Path) -> Result<String> {
|
||||
let mut file = File::open(path)?;
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf)?;
|
||||
Ok(hash(String::from_utf8_lossy(&buf).to_string()))
|
||||
}
|
||||
|
||||
pub fn hash(path: String) -> String {
|
||||
let mut hasher = Sha3_256::new();
|
||||
hasher.update(path.as_bytes());
|
||||
let result = hasher.finalize();
|
||||
hex::encode(result)
|
||||
}
|
||||
234
src/main.rs
Normal file
234
src/main.rs
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
use anyhow::Result;
|
||||
use chrono::Local;
|
||||
use clap::{Parser, Subcommand};
|
||||
use std::{
|
||||
env, fs,
|
||||
path::{Path, PathBuf},
|
||||
process,
|
||||
};
|
||||
|
||||
mod downloader;
|
||||
mod hash;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
struct Args {
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
enum Command {
|
||||
/// Archive the specified file or directory
|
||||
Archive {
|
||||
/// URL or Path to archive
|
||||
path: String,
|
||||
},
|
||||
Init {
|
||||
/// Path to initialize the archive in
|
||||
#[arg(default_value = ".")]
|
||||
path: String,
|
||||
|
||||
/// Store path - path to store the archived files in.
|
||||
/// Structure will be:
|
||||
/// store_path/
|
||||
/// raw/
|
||||
/// ...
|
||||
/// structured/
|
||||
/// ...
|
||||
#[arg(default_value = "./.archivr/store")]
|
||||
store_path: String,
|
||||
|
||||
/// Name of the archive
|
||||
#[arg(short, long)]
|
||||
name: String,
|
||||
},
|
||||
}
|
||||
|
||||
fn get_archive_path() -> Option<PathBuf> {
|
||||
let mut dir = env::current_dir().unwrap();
|
||||
loop {
|
||||
if dir.join(".archivr").is_dir() {
|
||||
return Some(dir.join(".archivr"));
|
||||
}
|
||||
if !dir.pop() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Source {
|
||||
YouTube,
|
||||
Other,
|
||||
}
|
||||
|
||||
fn determine_source(path: &str) -> Source {
|
||||
if path.starts_with("http://") || path.starts_with("https://") {
|
||||
return Source::YouTube;
|
||||
}
|
||||
Source::Other
|
||||
}
|
||||
|
||||
fn hash_exists(filename: String, store_path: &Path) -> bool {
|
||||
let mut chars = filename.chars();
|
||||
let first_letter = chars.next().unwrap();
|
||||
let second_letter = chars.next().unwrap();
|
||||
|
||||
let path = store_path
|
||||
.join("raw")
|
||||
.join(first_letter.to_string())
|
||||
.join(second_letter.to_string())
|
||||
.join(filename);
|
||||
|
||||
println!("Checking {}", path.display());
|
||||
|
||||
path.exists()
|
||||
}
|
||||
|
||||
fn move_temp_to_raw(file: &Path, hash: &String, store_path: &Path) -> Result<()> {
|
||||
let mut chars = hash.chars();
|
||||
let first_letter = chars.next().unwrap().to_string();
|
||||
let second_letter = chars.next().unwrap().to_string();
|
||||
let file_extension = file
|
||||
.extension()
|
||||
.map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy()));
|
||||
|
||||
fs::create_dir_all(
|
||||
store_path
|
||||
.join("raw")
|
||||
.join(&first_letter)
|
||||
.join(&second_letter),
|
||||
)?;
|
||||
|
||||
fs::rename(
|
||||
file,
|
||||
store_path
|
||||
.join("raw")
|
||||
.join(&first_letter)
|
||||
.join(&second_letter)
|
||||
.join(format!(
|
||||
"{hash}{}",
|
||||
if file_extension.is_empty() {
|
||||
""
|
||||
} else {
|
||||
&file_extension
|
||||
}
|
||||
)),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
match args.command {
|
||||
Command::Archive { ref path } => {
|
||||
let archive_path = get_archive_path();
|
||||
if get_archive_path().is_none() {
|
||||
eprintln!("Not in an archive. Use 'archivr init' to create one.");
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
// let download_id = uuid::Uuid::new_v4();
|
||||
let timestamp = Local::now().format("%Y-%m-%dT%H-%M-%S%.3f").to_string();
|
||||
|
||||
let source = determine_source(path);
|
||||
if let Source::YouTube = source {
|
||||
let store_path_string_file = archive_path.unwrap().join("store_path");
|
||||
let store_path = match fs::read_to_string(store_path_string_file) {
|
||||
Ok(p) => PathBuf::from(p.trim()),
|
||||
Err(e) => {
|
||||
eprintln!("Failed to read store path: {e}");
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let hash =
|
||||
match downloader::youtube::download(path.clone(), &store_path, ×tamp) {
|
||||
Ok(h) => h,
|
||||
Err(e) => {
|
||||
eprintln!("Failed to download from YouTube: {e}");
|
||||
process::exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let hash_exists = hash_exists(format!("{hash}.mp4"), &store_path);
|
||||
// TODO: check for repeated archives?
|
||||
// There could be one of the following:
|
||||
// - We are literally archiving the same path over again.
|
||||
// - We are archiving a different path, which had this file. E.g.: we archived a
|
||||
// website before which had this YouTube video, and while recursively archiving
|
||||
// everything, we also archived the YouTube video although it wasn't our main
|
||||
// target. This means that we should archive again; whereas with the first case...
|
||||
// Not sure. Need to think about this.
|
||||
// ----
|
||||
// Thinking about it a day later...
|
||||
// If we are specifically archiving a YouTube video, it could also be two of the
|
||||
// above. So yeah, just create a new DB entry and symlink the Raw to the Structured
|
||||
// Dir or whatever. it's midnight and my brain ain't wording/braining.
|
||||
if hash_exists {
|
||||
println!("File already archived.");
|
||||
process::exit(0);
|
||||
} else {
|
||||
move_temp_to_raw(
|
||||
&store_path.join("temp").join(format!("{timestamp}.mp4")),
|
||||
&hash,
|
||||
&store_path,
|
||||
)?;
|
||||
|
||||
println!("File archived successfully.");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: DB INSERT, inserting a record
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Command::Init {
|
||||
path: ref archive_path_string,
|
||||
store_path: ref store_path_string,
|
||||
name: ref archive_name,
|
||||
} => {
|
||||
let archive_path = Path::new(&archive_path_string).join(".archivr");
|
||||
let store_path = if Path::new(&store_path_string).is_relative() {
|
||||
env::current_dir().unwrap().join(store_path_string)
|
||||
} else {
|
||||
Path::new(store_path_string).to_path_buf()
|
||||
};
|
||||
|
||||
if archive_path.exists() {
|
||||
// TODO: check if there is nothing inside. if there is nothing inside, use it
|
||||
eprintln!("Archive already exists at {}", archive_path.display());
|
||||
if store_path.exists() {
|
||||
eprintln!("Store path already exists at {}", store_path.display());
|
||||
process::exit(1);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
if store_path.exists() {
|
||||
// TODO: check if the structure is correct. If so, use it.
|
||||
eprintln!("Store path already exists at {}", store_path.display());
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
fs::create_dir_all(&archive_path).unwrap();
|
||||
fs::create_dir_all(&store_path).unwrap();
|
||||
fs::write(archive_path.join("name"), archive_name).unwrap();
|
||||
let _ = fs::write(
|
||||
archive_path.join("store_path"),
|
||||
store_path.canonicalize().unwrap().to_str().unwrap(),
|
||||
);
|
||||
fs::create_dir_all(store_path.join("raw")).unwrap();
|
||||
fs::create_dir_all(store_path.join("structured")).unwrap();
|
||||
fs::create_dir_all(store_path.join("tmp")).unwrap();
|
||||
|
||||
println!("Initialized empty archive in {}", archive_path.display());
|
||||
|
||||
Ok(())
|
||||
} // _ => eprintln!("Unknown command: {:?}", args.command),
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue