diff --git a/Cargo.lock b/Cargo.lock index 2f77637..155a9fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,18 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "ahash" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "aho-corasick" version = "1.1.4" @@ -97,7 +85,6 @@ dependencies = [ "clap", "hex", "regex", - "rusqlite", "sha3", "uuid", ] @@ -108,12 +95,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "bitflags" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" - [[package]] name = "block-buffer" version = "0.10.4" @@ -239,18 +220,6 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "fallible-iterator" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" - -[[package]] -name = "fallible-streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" - [[package]] name = "find-msvc-tools" version = "0.1.4" @@ -279,24 +248,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", -] - -[[package]] -name = "hashlink" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" -dependencies = [ - "hashbrown", -] - [[package]] name = "heck" version = "0.5.0" @@ -364,17 +315,6 @@ version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" -[[package]] -name = "libsqlite3-sys" -version = "0.30.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" -dependencies = [ - "cc", - "pkg-config", - "vcpkg", -] - [[package]] name = "log" version = "0.4.28" @@ -408,12 +348,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" -[[package]] -name = "pkg-config" -version = "0.3.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" - [[package]] name = "proc-macro2" version = "1.0.101" @@ -467,20 +401,6 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" -[[package]] -name = "rusqlite" -version = "0.32.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e" -dependencies = [ - "bitflags", - "fallible-iterator", - "fallible-streaming-iterator", - "hashlink", - "libsqlite3-sys", - "smallvec", -] - [[package]] name = "rustversion" version = "1.0.22" @@ -503,12 +423,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - [[package]] name = "strsim" version = "0.11.1" @@ -555,12 +469,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "version_check" version = "0.9.5" @@ -782,23 +690,3 @@ name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" - -[[package]] -name = "zerocopy" -version = "0.8.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] diff --git a/Cargo.toml b/Cargo.toml index b3ed74d..f40ba88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,5 @@ chrono = "0.4.42" clap = { version = "4.5.48", features = ["derive"] } hex = "0.4.3" regex = "1.12.2" -rusqlite = { version = "0.32.1", features = ["bundled"] } sha3 = "0.10.8" uuid = { version = "1.18.1", features = ["v4"] } diff --git a/flake.nix b/flake.nix index 3978a04..a050caa 100644 --- a/flake.nix +++ b/flake.nix @@ -63,7 +63,7 @@ pname = "archivr"; version = "0.1.0"; src = pkgs.lib.cleanSource ./.; - cargoHash = ""; + cargoHash = "sha256-4m+4SMYA/rJ0eHEOc32zA2VdZI1pqzB5NenD0R0f2zM="; nativeBuildInputs = [ pkgs.pkg-config ]; }; archivr = pkgs.stdenv.mkDerivation { diff --git a/src/database.rs b/src/database.rs deleted file mode 100644 index 9f87a42..0000000 --- a/src/database.rs +++ /dev/null @@ -1,929 +0,0 @@ -use anyhow::{Context, Result, bail}; -use chrono::Utc; -use rusqlite::{Connection, OptionalExtension, params}; -use std::path::{Path, PathBuf}; -use uuid::Uuid; - -pub const DATABASE_FILE_NAME: &str = "archivr.sqlite"; -pub const DEFAULT_USERNAME: &str = "local-admin"; - -#[derive(Debug, Clone)] -pub struct ArchiveRun { - pub id: i64, - pub run_uid: String, -} - -#[derive(Debug, Clone)] -pub struct ArchiveRunItem { - pub id: i64, - pub item_uid: String, -} - -#[derive(Debug, Clone)] -pub struct ArchivedEntry { - pub id: i64, - pub entry_uid: String, - pub structured_root_relpath: String, -} - -#[derive(Debug, Clone)] -pub struct BlobRecord { - pub sha256: String, - pub byte_size: i64, - pub mime_type: Option, - pub extension: Option, - pub raw_relpath: String, -} - -#[derive(Debug, Clone)] -pub struct NewEntry { - pub source_identity_id: i64, - pub archive_run_id: i64, - pub parent_entry_id: Option, - pub root_entry_id: Option, - pub created_by_user_id: i64, - pub owned_by_user_id: i64, - pub source_kind: String, - pub entity_kind: String, - pub title: Option, - pub visibility: String, - pub representation_kind: String, - pub source_metadata_json: String, - pub display_metadata_json: Option, -} - -#[derive(Debug, Clone)] -pub struct NewArtifact { - pub entry_id: i64, - pub artifact_role: String, - pub storage_area: String, - pub relpath: String, - pub blob_id: Option, - pub logical_path: Option, - pub metadata_json: Option, -} - -pub fn database_path(archive_path: &Path) -> PathBuf { - archive_path.join(DATABASE_FILE_NAME) -} - -pub fn open_or_initialize(archive_path: &Path) -> Result { - let conn = Connection::open(database_path(archive_path)).with_context(|| { - format!( - "failed to open archive database in {}", - archive_path.display() - ) - })?; - initialize_schema(&conn)?; - Ok(conn) -} - -pub fn initialize_schema(conn: &Connection) -> Result<()> { - conn.pragma_update(None, "foreign_keys", "ON")?; - conn.execute_batch( - r#" - CREATE TABLE IF NOT EXISTS users ( - id INTEGER PRIMARY KEY, - user_uid TEXT NOT NULL UNIQUE, - username TEXT NOT NULL UNIQUE, - email TEXT UNIQUE, - password_hash TEXT NOT NULL, - status TEXT NOT NULL CHECK (status IN ('active', 'disabled')), - role TEXT NOT NULL CHECK (role IN ('admin', 'user')), - created_at TEXT NOT NULL, - last_login_at TEXT - ); - - CREATE TABLE IF NOT EXISTS instance_settings ( - id INTEGER PRIMARY KEY CHECK (id = 1), - public_index_enabled INTEGER NOT NULL DEFAULT 0 CHECK (public_index_enabled IN (0, 1)), - public_entry_content_enabled INTEGER NOT NULL DEFAULT 0 CHECK (public_entry_content_enabled IN (0, 1)), - public_archive_submission_enabled INTEGER NOT NULL DEFAULT 0 CHECK (public_archive_submission_enabled IN (0, 1)) - ); - - INSERT OR IGNORE INTO instance_settings ( - id, - public_index_enabled, - public_entry_content_enabled, - public_archive_submission_enabled - ) VALUES (1, 0, 0, 0); - - CREATE TABLE IF NOT EXISTS archive_runs ( - id INTEGER PRIMARY KEY, - run_uid TEXT NOT NULL UNIQUE, - created_by_user_id INTEGER NOT NULL REFERENCES users(id), - started_at TEXT NOT NULL, - finished_at TEXT, - status TEXT NOT NULL CHECK (status IN ('in_progress', 'completed', 'failed')), - requested_count INTEGER NOT NULL DEFAULT 0, - discovered_count INTEGER NOT NULL DEFAULT 0, - completed_count INTEGER NOT NULL DEFAULT 0, - failed_count INTEGER NOT NULL DEFAULT 0, - error_summary TEXT - ); - - CREATE TABLE IF NOT EXISTS archive_run_items ( - id INTEGER PRIMARY KEY, - run_id INTEGER NOT NULL REFERENCES archive_runs(id) ON DELETE CASCADE, - item_uid TEXT NOT NULL UNIQUE, - parent_item_id INTEGER REFERENCES archive_run_items(id), - ordinal INTEGER NOT NULL, - requested_locator TEXT NOT NULL, - canonical_locator TEXT, - source_kind TEXT NOT NULL, - entity_kind TEXT NOT NULL, - status TEXT NOT NULL CHECK (status IN ('pending', 'in_progress', 'completed', 'failed')), - error_text TEXT, - produced_entry_id INTEGER REFERENCES archived_entries(id) - ); - - CREATE TABLE IF NOT EXISTS source_identities ( - id INTEGER PRIMARY KEY, - source_kind TEXT NOT NULL, - entity_kind TEXT NOT NULL, - external_id TEXT, - canonical_url TEXT, - normalized_locator TEXT NOT NULL, - identity_key TEXT NOT NULL UNIQUE - ); - - CREATE TABLE IF NOT EXISTS archived_entries ( - id INTEGER PRIMARY KEY, - entry_uid TEXT NOT NULL UNIQUE, - source_identity_id INTEGER NOT NULL REFERENCES source_identities(id), - archive_run_id INTEGER NOT NULL REFERENCES archive_runs(id), - parent_entry_id INTEGER REFERENCES archived_entries(id), - root_entry_id INTEGER NOT NULL REFERENCES archived_entries(id), - created_by_user_id INTEGER NOT NULL REFERENCES users(id), - owned_by_user_id INTEGER NOT NULL REFERENCES users(id), - source_kind TEXT NOT NULL, - entity_kind TEXT NOT NULL, - title TEXT, - visibility TEXT NOT NULL CHECK (visibility IN ('private', 'unlisted', 'public')), - archived_at TEXT NOT NULL, - original_published_at TEXT, - structured_root_relpath TEXT NOT NULL, - representation_kind TEXT NOT NULL, - source_metadata_json TEXT NOT NULL DEFAULT '{}', - display_metadata_json TEXT - ); - - CREATE TABLE IF NOT EXISTS blobs ( - id INTEGER PRIMARY KEY, - sha256 TEXT NOT NULL UNIQUE, - byte_size INTEGER NOT NULL, - mime_type TEXT, - extension TEXT, - raw_relpath TEXT NOT NULL, - created_at TEXT NOT NULL - ); - - CREATE TABLE IF NOT EXISTS entry_artifacts ( - id INTEGER PRIMARY KEY, - entry_id INTEGER NOT NULL REFERENCES archived_entries(id) ON DELETE CASCADE, - artifact_role TEXT NOT NULL, - storage_area TEXT NOT NULL CHECK (storage_area IN ('raw', 'raw_tweets', 'structured')), - relpath TEXT NOT NULL, - blob_id INTEGER REFERENCES blobs(id), - logical_path TEXT, - metadata_json TEXT - ); - - CREATE TABLE IF NOT EXISTS taxonomy_nodes ( - id INTEGER PRIMARY KEY, - node_uid TEXT NOT NULL UNIQUE, - parent_id INTEGER REFERENCES taxonomy_nodes(id), - name TEXT NOT NULL, - slug TEXT NOT NULL, - full_path TEXT NOT NULL UNIQUE - ); - - CREATE TABLE IF NOT EXISTS entry_taxonomy_assignments ( - entry_id INTEGER NOT NULL REFERENCES archived_entries(id) ON DELETE CASCADE, - node_id INTEGER NOT NULL REFERENCES taxonomy_nodes(id) ON DELETE CASCADE, - PRIMARY KEY (entry_id, node_id) - ); - - CREATE INDEX IF NOT EXISTS idx_archive_run_items_run_id ON archive_run_items(run_id); - CREATE INDEX IF NOT EXISTS idx_archived_entries_parent_entry_id ON archived_entries(parent_entry_id); - CREATE INDEX IF NOT EXISTS idx_archived_entries_root_entry_id ON archived_entries(root_entry_id); - CREATE INDEX IF NOT EXISTS idx_archived_entries_visibility ON archived_entries(visibility); - CREATE INDEX IF NOT EXISTS idx_entry_artifacts_entry_id ON entry_artifacts(entry_id); - CREATE INDEX IF NOT EXISTS idx_entry_artifacts_blob_id ON entry_artifacts(blob_id); - CREATE INDEX IF NOT EXISTS idx_taxonomy_nodes_parent_id ON taxonomy_nodes(parent_id); - "#, - )?; - Ok(()) -} - -pub fn ensure_default_user(conn: &Connection) -> Result { - if let Some(id) = conn - .query_row( - "SELECT id FROM users WHERE username = ?1", - [DEFAULT_USERNAME], - |row| row.get(0), - ) - .optional()? - { - return Ok(id); - } - - conn.execute( - "INSERT INTO users ( - user_uid, username, email, password_hash, status, role, created_at, last_login_at - ) VALUES (?1, ?2, NULL, ?3, 'active', 'admin', ?4, NULL)", - params![ - public_id("usr"), - DEFAULT_USERNAME, - "disabled-local-password", - now_timestamp() - ], - )?; - - Ok(conn.last_insert_rowid()) -} - -pub fn create_archive_run( - conn: &Connection, - created_by_user_id: i64, - requested_count: i64, -) -> Result { - let run_uid = public_id("run"); - conn.execute( - "INSERT INTO archive_runs ( - run_uid, created_by_user_id, started_at, status, requested_count - ) VALUES (?1, ?2, ?3, 'in_progress', ?4)", - params![ - run_uid, - created_by_user_id, - now_timestamp(), - requested_count - ], - )?; - - Ok(ArchiveRun { - id: conn.last_insert_rowid(), - run_uid, - }) -} - -pub fn create_archive_run_item( - conn: &Connection, - run_id: i64, - parent_item_id: Option, - ordinal: i64, - requested_locator: &str, - canonical_locator: Option<&str>, - source_kind: &str, - entity_kind: &str, -) -> Result { - let item_uid = public_id("item"); - conn.execute( - "INSERT INTO archive_run_items ( - run_id, item_uid, parent_item_id, ordinal, requested_locator, canonical_locator, - source_kind, entity_kind, status - ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, 'in_progress')", - params![ - run_id, - item_uid, - parent_item_id, - ordinal, - requested_locator, - canonical_locator, - source_kind, - entity_kind - ], - )?; - - Ok(ArchiveRunItem { - id: conn.last_insert_rowid(), - item_uid, - }) -} - -pub fn complete_archive_run_item( - conn: &Connection, - item_id: i64, - produced_entry_id: i64, -) -> Result<()> { - conn.execute( - "UPDATE archive_run_items - SET status = 'completed', produced_entry_id = ?1, error_text = NULL - WHERE id = ?2", - params![produced_entry_id, item_id], - )?; - refresh_run_counters(conn, run_id_for_item(conn, item_id)?)?; - Ok(()) -} - -pub fn fail_archive_run_item(conn: &Connection, item_id: i64, error_text: &str) -> Result<()> { - conn.execute( - "UPDATE archive_run_items - SET status = 'failed', error_text = ?1 - WHERE id = ?2", - params![error_text, item_id], - )?; - refresh_run_counters(conn, run_id_for_item(conn, item_id)?)?; - Ok(()) -} - -pub fn finish_archive_run(conn: &Connection, run_id: i64) -> Result<()> { - refresh_run_counters(conn, run_id)?; - let failed_count: i64 = conn.query_row( - "SELECT failed_count FROM archive_runs WHERE id = ?1", - [run_id], - |row| row.get(0), - )?; - let status = if failed_count > 0 { - "failed" - } else { - "completed" - }; - conn.execute( - "UPDATE archive_runs SET status = ?1, finished_at = ?2 WHERE id = ?3", - params![status, now_timestamp(), run_id], - )?; - Ok(()) -} - -pub fn fail_archive_run(conn: &Connection, run_id: i64, error_summary: &str) -> Result<()> { - refresh_run_counters(conn, run_id)?; - conn.execute( - "UPDATE archive_runs - SET status = 'failed', finished_at = ?1, error_summary = ?2 - WHERE id = ?3", - params![now_timestamp(), error_summary, run_id], - )?; - Ok(()) -} - -pub fn upsert_source_identity( - conn: &Connection, - source_kind: &str, - entity_kind: &str, - external_id: Option<&str>, - canonical_url: Option<&str>, - normalized_locator: &str, -) -> Result { - let identity_key = identity_key( - source_kind, - entity_kind, - external_id, - canonical_url, - normalized_locator, - ); - conn.execute( - "INSERT OR IGNORE INTO source_identities ( - source_kind, entity_kind, external_id, canonical_url, normalized_locator, identity_key - ) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", - params![ - source_kind, - entity_kind, - external_id, - canonical_url, - normalized_locator, - identity_key - ], - )?; - - let id = conn.query_row( - "SELECT id FROM source_identities WHERE identity_key = ?1", - [identity_key], - |row| row.get(0), - )?; - Ok(id) -} - -pub fn upsert_blob(conn: &Connection, blob: &BlobRecord) -> Result { - conn.execute( - "INSERT OR IGNORE INTO blobs ( - sha256, byte_size, mime_type, extension, raw_relpath, created_at - ) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", - params![ - blob.sha256, - blob.byte_size, - blob.mime_type, - blob.extension, - blob.raw_relpath, - now_timestamp() - ], - )?; - - let id = conn.query_row( - "SELECT id FROM blobs WHERE sha256 = ?1", - [blob.sha256.as_str()], - |row| row.get(0), - )?; - Ok(id) -} - -pub fn create_archived_entry(conn: &Connection, entry: &NewEntry) -> Result { - validate_visibility(&entry.visibility)?; - let id: i64 = conn.query_row( - "SELECT COALESCE(MAX(id), 0) + 1 FROM archived_entries", - [], - |row| row.get(0), - )?; - let entry_uid = public_id("entry"); - let root_entry_id = entry.root_entry_id.unwrap_or(id); - let structured_root_relpath = format!("structured/{entry_uid}"); - - conn.execute( - "INSERT INTO archived_entries ( - id, entry_uid, source_identity_id, archive_run_id, parent_entry_id, root_entry_id, - created_by_user_id, owned_by_user_id, source_kind, entity_kind, title, visibility, - archived_at, original_published_at, structured_root_relpath, representation_kind, - source_metadata_json, display_metadata_json - ) VALUES ( - ?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, - ?13, NULL, ?14, ?15, ?16, ?17 - )", - params![ - id, - entry_uid, - entry.source_identity_id, - entry.archive_run_id, - entry.parent_entry_id, - root_entry_id, - entry.created_by_user_id, - entry.owned_by_user_id, - entry.source_kind, - entry.entity_kind, - entry.title, - entry.visibility, - now_timestamp(), - structured_root_relpath, - entry.representation_kind, - entry.source_metadata_json, - entry.display_metadata_json - ], - )?; - - Ok(ArchivedEntry { - id, - entry_uid, - structured_root_relpath, - }) -} - -pub fn add_entry_artifact(conn: &Connection, artifact: &NewArtifact) -> Result { - conn.execute( - "INSERT INTO entry_artifacts ( - entry_id, artifact_role, storage_area, relpath, blob_id, logical_path, metadata_json - ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", - params![ - artifact.entry_id, - artifact.artifact_role, - artifact.storage_area, - artifact.relpath, - artifact.blob_id, - artifact.logical_path, - artifact.metadata_json - ], - )?; - Ok(conn.last_insert_rowid()) -} - -#[allow(dead_code)] -pub fn set_public_settings( - conn: &Connection, - public_index_enabled: bool, - public_entry_content_enabled: bool, - public_archive_submission_enabled: bool, -) -> Result<()> { - conn.execute( - "UPDATE instance_settings - SET public_index_enabled = ?1, - public_entry_content_enabled = ?2, - public_archive_submission_enabled = ?3 - WHERE id = 1", - params![ - public_index_enabled as i64, - public_entry_content_enabled as i64, - public_archive_submission_enabled as i64 - ], - )?; - Ok(()) -} - -#[allow(dead_code)] -pub fn public_index_entry_count(conn: &Connection) -> Result { - let count = conn.query_row( - "SELECT COUNT(*) - FROM archived_entries - WHERE parent_entry_id IS NULL - AND visibility = 'public' - AND (SELECT public_index_enabled FROM instance_settings WHERE id = 1) = 1 - AND (SELECT public_entry_content_enabled FROM instance_settings WHERE id = 1) = 1", - [], - |row| row.get(0), - )?; - Ok(count) -} - -#[allow(dead_code)] -pub fn main_archive_entry_count(conn: &Connection) -> Result { - let count = conn.query_row( - "SELECT COUNT(*) FROM archived_entries WHERE parent_entry_id IS NULL", - [], - |row| row.get(0), - )?; - Ok(count) -} - -#[allow(dead_code)] -pub fn create_taxonomy_path(conn: &Connection, full_path: &str) -> Result { - let segments = normalized_taxonomy_segments(full_path)?; - let mut parent_id = None; - let mut current_path = String::new(); - let mut current_id = 0; - - for segment in segments { - current_path.push('/'); - current_path.push_str(segment); - - if let Some(id) = conn - .query_row( - "SELECT id FROM taxonomy_nodes WHERE full_path = ?1", - [current_path.as_str()], - |row| row.get(0), - ) - .optional()? - { - current_id = id; - parent_id = Some(id); - continue; - } - - conn.execute( - "INSERT INTO taxonomy_nodes (node_uid, parent_id, name, slug, full_path) - VALUES (?1, ?2, ?3, ?4, ?5)", - params![ - public_id("node"), - parent_id, - humanize_slug(segment), - segment, - current_path - ], - )?; - current_id = conn.last_insert_rowid(); - parent_id = Some(current_id); - } - - Ok(current_id) -} - -#[allow(dead_code)] -pub fn assign_entry_to_taxonomy(conn: &Connection, entry_id: i64, node_id: i64) -> Result<()> { - conn.execute( - "INSERT OR IGNORE INTO entry_taxonomy_assignments (entry_id, node_id) - VALUES (?1, ?2)", - params![entry_id, node_id], - )?; - Ok(()) -} - -#[allow(dead_code)] -pub fn entry_count_for_taxonomy_path(conn: &Connection, full_path: &str) -> Result { - let count = conn.query_row( - "WITH RECURSIVE descendants(id) AS ( - SELECT id FROM taxonomy_nodes WHERE full_path = ?1 - UNION ALL - SELECT child.id - FROM taxonomy_nodes child - JOIN descendants parent ON child.parent_id = parent.id - ) - SELECT COUNT(DISTINCT eta.entry_id) - FROM entry_taxonomy_assignments eta - JOIN descendants d ON eta.node_id = d.id", - [full_path], - |row| row.get(0), - )?; - Ok(count) -} - -fn refresh_run_counters(conn: &Connection, run_id: i64) -> Result<()> { - conn.execute( - "UPDATE archive_runs - SET discovered_count = (SELECT COUNT(*) FROM archive_run_items WHERE run_id = ?1), - completed_count = (SELECT COUNT(*) FROM archive_run_items WHERE run_id = ?1 AND status = 'completed'), - failed_count = (SELECT COUNT(*) FROM archive_run_items WHERE run_id = ?1 AND status = 'failed') - WHERE id = ?1", - [run_id], - )?; - Ok(()) -} - -fn run_id_for_item(conn: &Connection, item_id: i64) -> Result { - let run_id = conn.query_row( - "SELECT run_id FROM archive_run_items WHERE id = ?1", - [item_id], - |row| row.get(0), - )?; - Ok(run_id) -} - -fn public_id(prefix: &str) -> String { - format!("{prefix}_{}", Uuid::new_v4().simple()) -} - -fn now_timestamp() -> String { - Utc::now().to_rfc3339() -} - -fn identity_key( - source_kind: &str, - entity_kind: &str, - external_id: Option<&str>, - canonical_url: Option<&str>, - normalized_locator: &str, -) -> String { - let stable_locator = canonical_url.or(external_id).unwrap_or(normalized_locator); - format!("{source_kind}:{entity_kind}:{stable_locator}") -} - -fn validate_visibility(visibility: &str) -> Result<()> { - match visibility { - "private" | "unlisted" | "public" => Ok(()), - _ => bail!("invalid archived entry visibility: {visibility}"), - } -} - -#[allow(dead_code)] -fn normalized_taxonomy_segments(full_path: &str) -> Result> { - let segments = full_path - .trim() - .trim_matches('/') - .split('/') - .filter(|segment| !segment.is_empty()) - .collect::>(); - - if segments.is_empty() { - bail!("taxonomy path must contain at least one segment"); - } - - Ok(segments) -} - -#[allow(dead_code)] -fn humanize_slug(slug: &str) -> String { - slug.split('-') - .map(|part| { - let mut chars = part.chars(); - match chars.next() { - Some(first) => format!("{}{}", first.to_uppercase(), chars.as_str()), - None => String::new(), - } - }) - .collect::>() - .join(" ") -} - -#[cfg(test)] -mod tests { - use super::*; - - fn conn() -> Connection { - let conn = Connection::open_in_memory().unwrap(); - initialize_schema(&conn).unwrap(); - conn - } - - fn create_entry_fixture( - conn: &Connection, - visibility: &str, - parent_entry_id: Option, - root_entry_id: Option, - ) -> ArchivedEntry { - let user_id = ensure_default_user(conn).unwrap(); - let run = create_archive_run(conn, user_id, 1).unwrap(); - let source_id = upsert_source_identity( - conn, - "youtube", - "video", - Some("video-1"), - Some("https://youtube.com/watch?v=video-1"), - "https://youtube.com/watch?v=video-1", - ) - .unwrap(); - - create_archived_entry( - conn, - &NewEntry { - source_identity_id: source_id, - archive_run_id: run.id, - parent_entry_id, - root_entry_id, - created_by_user_id: user_id, - owned_by_user_id: user_id, - source_kind: "youtube".to_string(), - entity_kind: "video".to_string(), - title: None, - visibility: visibility.to_string(), - representation_kind: "video".to_string(), - source_metadata_json: "{}".to_string(), - display_metadata_json: None, - }, - ) - .unwrap() - } - - #[test] - fn schema_defaults_public_settings_to_private() { - let conn = conn(); - let defaults: (i64, i64, i64) = conn - .query_row( - "SELECT public_index_enabled, public_entry_content_enabled, public_archive_submission_enabled - FROM instance_settings WHERE id = 1", - [], - |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)), - ) - .unwrap(); - - assert_eq!(defaults, (0, 0, 0)); - } - - #[test] - fn rearchiving_reuses_source_identity_and_blob_but_creates_entries() { - let conn = conn(); - let user_id = ensure_default_user(&conn).unwrap(); - let blob = BlobRecord { - sha256: "abc123".to_string(), - byte_size: 123, - mime_type: Some("video/mp4".to_string()), - extension: Some("mp4".to_string()), - raw_relpath: "raw/a/b/abc123.mp4".to_string(), - }; - let blob_id = upsert_blob(&conn, &blob).unwrap(); - let duplicate_blob_id = upsert_blob(&conn, &blob).unwrap(); - assert_eq!(blob_id, duplicate_blob_id); - - let first_source_id = upsert_source_identity( - &conn, - "youtube", - "video", - Some("video-1"), - Some("https://youtube.com/watch?v=video-1"), - "https://youtube.com/watch?v=video-1", - ) - .unwrap(); - let second_source_id = upsert_source_identity( - &conn, - "youtube", - "video", - Some("video-1"), - Some("https://youtube.com/watch?v=video-1"), - "https://youtube.com/watch?v=video-1", - ) - .unwrap(); - assert_eq!(first_source_id, second_source_id); - - for _ in 0..2 { - let run = create_archive_run(&conn, user_id, 1).unwrap(); - let entry = create_archived_entry( - &conn, - &NewEntry { - source_identity_id: first_source_id, - archive_run_id: run.id, - parent_entry_id: None, - root_entry_id: None, - created_by_user_id: user_id, - owned_by_user_id: user_id, - source_kind: "youtube".to_string(), - entity_kind: "video".to_string(), - title: None, - visibility: "private".to_string(), - representation_kind: "video".to_string(), - source_metadata_json: "{}".to_string(), - display_metadata_json: None, - }, - ) - .unwrap(); - add_entry_artifact( - &conn, - &NewArtifact { - entry_id: entry.id, - artifact_role: "primary_media".to_string(), - storage_area: "raw".to_string(), - relpath: blob.raw_relpath.clone(), - blob_id: Some(blob_id), - logical_path: None, - metadata_json: None, - }, - ) - .unwrap(); - } - - let entry_count: i64 = conn - .query_row("SELECT COUNT(*) FROM archived_entries", [], |row| { - row.get(0) - }) - .unwrap(); - let source_count: i64 = conn - .query_row("SELECT COUNT(*) FROM source_identities", [], |row| { - row.get(0) - }) - .unwrap(); - let blob_count: i64 = conn - .query_row("SELECT COUNT(*) FROM blobs", [], |row| row.get(0)) - .unwrap(); - - assert_eq!(entry_count, 2); - assert_eq!(source_count, 1); - assert_eq!(blob_count, 1); - } - - #[test] - fn run_items_refresh_progress_counters() { - let conn = conn(); - let user_id = ensure_default_user(&conn).unwrap(); - let run = create_archive_run(&conn, user_id, 2).unwrap(); - let source_id = - upsert_source_identity(&conn, "local", "file", None, None, "file:///a").unwrap(); - let entry = create_archived_entry( - &conn, - &NewEntry { - source_identity_id: source_id, - archive_run_id: run.id, - parent_entry_id: None, - root_entry_id: None, - created_by_user_id: user_id, - owned_by_user_id: user_id, - source_kind: "local".to_string(), - entity_kind: "file".to_string(), - title: None, - visibility: "private".to_string(), - representation_kind: "file".to_string(), - source_metadata_json: "{}".to_string(), - display_metadata_json: None, - }, - ) - .unwrap(); - let first = - create_archive_run_item(&conn, run.id, None, 0, "file:///a", None, "local", "file") - .unwrap(); - let second = - create_archive_run_item(&conn, run.id, None, 1, "file:///b", None, "local", "file") - .unwrap(); - - complete_archive_run_item(&conn, first.id, entry.id).unwrap(); - fail_archive_run_item(&conn, second.id, "copy failed").unwrap(); - finish_archive_run(&conn, run.id).unwrap(); - - let counters: (i64, i64, i64, String) = conn - .query_row( - "SELECT discovered_count, completed_count, failed_count, status - FROM archive_runs WHERE id = ?1", - [run.id], - |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)), - ) - .unwrap(); - - assert_eq!(counters, (2, 1, 1, "failed".to_string())); - } - - #[test] - fn main_archive_query_only_counts_roots() { - let conn = conn(); - let parent = create_entry_fixture(&conn, "private", None, None); - let _child = create_entry_fixture(&conn, "private", Some(parent.id), Some(parent.id)); - - assert_eq!(main_archive_entry_count(&conn).unwrap(), 1); - } - - #[test] - fn public_entries_require_instance_flags_and_public_visibility() { - let conn = conn(); - let _public = create_entry_fixture(&conn, "public", None, None); - let _private = create_entry_fixture(&conn, "private", None, None); - - assert_eq!(public_index_entry_count(&conn).unwrap(), 0); - - set_public_settings(&conn, true, false, false).unwrap(); - assert_eq!(public_index_entry_count(&conn).unwrap(), 0); - - set_public_settings(&conn, true, true, false).unwrap(); - assert_eq!(public_index_entry_count(&conn).unwrap(), 1); - } - - #[test] - fn taxonomy_assignments_are_discoverable_through_ancestors() { - let conn = conn(); - let entry = create_entry_fixture(&conn, "private", None, None); - let node_id = create_taxonomy_path(&conn, "/sciences/computer-science/compilers").unwrap(); - assign_entry_to_taxonomy(&conn, entry.id, node_id).unwrap(); - - assert_eq!( - entry_count_for_taxonomy_path(&conn, "/sciences/computer-science/compilers").unwrap(), - 1 - ); - assert_eq!( - entry_count_for_taxonomy_path(&conn, "/sciences/computer-science").unwrap(), - 1 - ); - assert_eq!( - entry_count_for_taxonomy_path(&conn, "/sciences").unwrap(), - 1 - ); - } -} diff --git a/src/main.rs b/src/main.rs index 833bb59..177194b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,12 @@ -use anyhow::{Context, Result}; +use anyhow::Result; use chrono::Local; use clap::{Parser, Subcommand}; use std::{ - collections::HashSet, env, fs, path::{Path, PathBuf}, process, }; -mod database; mod downloader; mod hash; mod twitter; @@ -351,298 +349,6 @@ fn initialize_store_directories(store_path: &Path) -> Result<()> { Ok(()) } -fn raw_relative_path_from_hash(hash: &str, file_extension: &str) -> Result { - let mut chars = hash.chars(); - let first_letter = chars.next().context("hash must not be empty")?; - let second_letter = chars - .next() - .context("hash must be at least two characters")?; - - Ok(PathBuf::from("raw") - .join(first_letter.to_string()) - .join(second_letter.to_string()) - .join(format!("{hash}{file_extension}"))) -} - -fn path_to_store_string(path: &Path) -> String { - path.to_string_lossy().replace('\\', "/") -} - -fn extension_without_dot(file_extension: &str) -> Option { - file_extension - .strip_prefix('.') - .filter(|extension| !extension.is_empty()) - .map(|extension| extension.to_string()) -} - -fn blob_record_for_raw_relpath( - store_path: &Path, - raw_relpath: &Path, -) -> Result { - let absolute_path = store_path.join(raw_relpath); - let file_name = raw_relpath - .file_name() - .and_then(|name| name.to_str()) - .context("raw artifact path must have a UTF-8 file name")?; - let (sha256, extension) = match file_name.rsplit_once('.') { - Some((hash, extension)) => (hash.to_string(), Some(extension.to_string())), - None => (file_name.to_string(), None), - }; - - Ok(database::BlobRecord { - sha256, - byte_size: fs::metadata(&absolute_path) - .with_context(|| format!("failed to stat raw artifact {}", absolute_path.display()))? - .len() as i64, - mime_type: None, - extension, - raw_relpath: path_to_store_string(raw_relpath), - }) -} - -fn source_metadata(source: Source) -> (&'static str, &'static str, &'static str) { - match source { - Source::YouTubeVideo => ("youtube", "video", "video"), - Source::YouTubePlaylist => ("youtube", "playlist", "container"), - Source::YouTubeChannel => ("youtube", "channel", "container"), - Source::X => ("x", "post", "video"), - Source::Tweet => ("x", "tweet", "tweet_json"), - Source::TweetThread => ("x", "tweet_thread", "tweet_json"), - Source::Instagram => ("instagram", "post", "video"), - Source::Facebook => ("facebook", "post", "video"), - Source::TikTok => ("tiktok", "video", "video"), - Source::Reddit => ("reddit", "post", "video"), - Source::Snapchat => ("snapchat", "story", "video"), - Source::Local => ("local", "file", "file"), - Source::Other => ("other", "unknown", "unknown"), - } -} - -fn local_file_extension(path: &str) -> String { - Path::new(path.trim_start_matches("file://")) - .extension() - .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy())) -} - -fn media_file_extension(source: Source, path: &str) -> String { - match source { - Source::YouTubeVideo - | Source::X - | Source::Instagram - | Source::Facebook - | Source::TikTok - | Source::Reddit - | Source::Snapchat => ".mp4".to_string(), - Source::Local => local_file_extension(path), - _ => String::new(), - } -} - -fn tweet_id_from_archive_path(path: &str) -> Option { - path.split(':').next_back().and_then(parse_tweet_id) -} - -fn create_structured_root(store_path: &Path, entry: &database::ArchivedEntry) -> Result<()> { - debug_assert!(entry.entry_uid.starts_with("entry_")); - fs::create_dir_all(store_path.join(&entry.structured_root_relpath))?; - Ok(()) -} - -fn record_media_entry( - conn: &rusqlite::Connection, - store_path: &Path, - user_id: i64, - run: &database::ArchiveRun, - item: &database::ArchiveRunItem, - requested_locator: &str, - canonical_locator: &str, - source: Source, - hash: &str, - file_extension: &str, - byte_size: i64, -) -> Result { - debug_assert!(run.run_uid.starts_with("run_")); - debug_assert!(item.item_uid.starts_with("item_")); - let (source_kind, entity_kind, representation_kind) = source_metadata(source); - let raw_relpath = raw_relative_path_from_hash(hash, file_extension)?; - let blob = database::BlobRecord { - sha256: hash.to_string(), - byte_size, - mime_type: None, - extension: extension_without_dot(file_extension), - raw_relpath: path_to_store_string(&raw_relpath), - }; - let blob_id = database::upsert_blob(conn, &blob)?; - let source_identity_id = database::upsert_source_identity( - conn, - source_kind, - entity_kind, - None, - Some(canonical_locator), - canonical_locator, - )?; - let entry = database::create_archived_entry( - conn, - &database::NewEntry { - source_identity_id, - archive_run_id: run.id, - parent_entry_id: None, - root_entry_id: None, - created_by_user_id: user_id, - owned_by_user_id: user_id, - source_kind: source_kind.to_string(), - entity_kind: entity_kind.to_string(), - title: None, - visibility: "private".to_string(), - representation_kind: representation_kind.to_string(), - source_metadata_json: format!( - r#"{{"requested_locator":"{}","canonical_locator":"{}"}}"#, - json_escape(requested_locator), - json_escape(canonical_locator) - ), - display_metadata_json: None, - }, - )?; - create_structured_root(store_path, &entry)?; - database::add_entry_artifact( - conn, - &database::NewArtifact { - entry_id: entry.id, - artifact_role: "primary_media".to_string(), - storage_area: "raw".to_string(), - relpath: blob.raw_relpath, - blob_id: Some(blob_id), - logical_path: None, - metadata_json: None, - }, - )?; - database::complete_archive_run_item(conn, item.id, entry.id)?; - database::finish_archive_run(conn, run.id)?; - Ok(entry) -} - -fn record_tweet_entry( - conn: &rusqlite::Connection, - store_path: &Path, - user_id: i64, - run: &database::ArchiveRun, - item: &database::ArchiveRunItem, - requested_locator: &str, - source: Source, - tweet_id: &str, -) -> Result { - debug_assert!(run.run_uid.starts_with("run_")); - debug_assert!(item.item_uid.starts_with("item_")); - let (source_kind, entity_kind, representation_kind) = source_metadata(source); - let canonical_locator = format!("https://x.com/i/status/{tweet_id}"); - let source_identity_id = database::upsert_source_identity( - conn, - source_kind, - entity_kind, - Some(tweet_id), - Some(&canonical_locator), - &canonical_locator, - )?; - let entry = database::create_archived_entry( - conn, - &database::NewEntry { - source_identity_id, - archive_run_id: run.id, - parent_entry_id: None, - root_entry_id: None, - created_by_user_id: user_id, - owned_by_user_id: user_id, - source_kind: source_kind.to_string(), - entity_kind: entity_kind.to_string(), - title: None, - visibility: "private".to_string(), - representation_kind: representation_kind.to_string(), - source_metadata_json: format!( - r#"{{"tweet_id":"{}","requested_locator":"{}"}}"#, - json_escape(tweet_id), - json_escape(requested_locator) - ), - display_metadata_json: None, - }, - )?; - create_structured_root(store_path, &entry)?; - - let tweet_json_relpath = PathBuf::from("raw_tweets").join(format!("tweet-{tweet_id}.json")); - database::add_entry_artifact( - conn, - &database::NewArtifact { - entry_id: entry.id, - artifact_role: "raw_tweet_json".to_string(), - storage_area: "raw_tweets".to_string(), - relpath: path_to_store_string(&tweet_json_relpath), - blob_id: None, - logical_path: None, - metadata_json: None, - }, - )?; - - let tweet_json = fs::read_to_string(store_path.join(&tweet_json_relpath))?; - for (role, raw_relpath) in tweet_raw_artifacts(&tweet_json) { - let raw_path = PathBuf::from(&raw_relpath); - let blob = blob_record_for_raw_relpath(store_path, &raw_path)?; - let blob_id = database::upsert_blob(conn, &blob)?; - database::add_entry_artifact( - conn, - &database::NewArtifact { - entry_id: entry.id, - artifact_role: role, - storage_area: "raw".to_string(), - relpath: raw_relpath, - blob_id: Some(blob_id), - logical_path: None, - metadata_json: None, - }, - )?; - } - - database::complete_archive_run_item(conn, item.id, entry.id)?; - database::finish_archive_run(conn, run.id)?; - Ok(entry) -} - -fn tweet_raw_artifacts(tweet_json: &str) -> Vec<(String, String)> { - let regex = regex::Regex::new(r#""(avatar_local_path|local_path)": "([^"\n]+)""#).unwrap(); - let mut seen = HashSet::new(); - let mut artifacts = Vec::new(); - - for captures in regex.captures_iter(tweet_json) { - let relpath = captures[2].to_string(); - if !relpath.starts_with("raw/") || !seen.insert(relpath.clone()) { - continue; - } - - let role = if &captures[1] == "avatar_local_path" { - "avatar" - } else { - "media" - }; - artifacts.push((role.to_string(), relpath)); - } - - artifacts -} - -fn json_escape(input: &str) -> String { - input.replace('\\', "\\\\").replace('"', "\\\"") -} - -fn fail_archive_and_exit( - conn: &rusqlite::Connection, - run: &database::ArchiveRun, - item: &database::ArchiveRunItem, - message: &str, -) -> ! { - let _ = database::fail_archive_run_item(conn, item.id, message); - let _ = database::fail_archive_run(conn, run.id, message); - eprintln!("{message}"); - process::exit(1); -} - fn main() -> Result<()> { let args = Args::parse(); @@ -669,42 +375,14 @@ fn main() -> Result<()> { }; let source = determine_source(path); - let (source_kind, entity_kind, _) = source_metadata(source); - let conn = database::open_or_initialize(&archive_path)?; - let user_id = database::ensure_default_user(&conn)?; - let run = database::create_archive_run(&conn, user_id, 1)?; - let item = database::create_archive_run_item( - &conn, - run.id, - None, - 0, - path, - None, - source_kind, - entity_kind, - )?; // Sources: Tweets or Twitter Threads match source { Source::Other => { - fail_archive_and_exit( - &conn, - &run, - &item, - "Archiving from this source is not yet implemented.", - ); + eprintln!("Archiving from this source is not yet implemented."); + process::exit(1); } Source::Tweet | Source::TweetThread => { - let tweet_id = match tweet_id_from_archive_path(path) { - Some(tweet_id) => tweet_id, - None => fail_archive_and_exit( - &conn, - &run, - &item, - "Failed to archive tweet: invalid tweet ID", - ), - }; - match downloader::tweets::archive( path, source == Source::TweetThread, @@ -712,16 +390,6 @@ fn main() -> Result<()> { ×tamp, ) { Ok(true) => { - record_tweet_entry( - &conn, - &store_path, - user_id, - &run, - &item, - path, - source, - &tweet_id, - )?; println!( "Tweet archived successfully to {}", store_path.join("raw_tweets").display() @@ -729,16 +397,6 @@ fn main() -> Result<()> { return Ok(()); } Ok(false) => { - record_tweet_entry( - &conn, - &store_path, - user_id, - &run, - &item, - path, - source, - &tweet_id, - )?; println!( "Tweet already archived in {}", store_path.join("raw_tweets").display() @@ -746,12 +404,8 @@ fn main() -> Result<()> { return Ok(()); } Err(e) => { - fail_archive_and_exit( - &conn, - &run, - &item, - &format!("Failed to archive tweet: {e}"), - ); + eprintln!("Failed to archive tweet: {e}"); + process::exit(1); } } } @@ -759,7 +413,6 @@ fn main() -> Result<()> { } // Sources, for which yt-dlp is needed - let requested_path = path.to_string(); let path = expand_shorthand_to_url(path, &source); let hash = match source { Source::YouTubeVideo @@ -772,12 +425,8 @@ fn main() -> Result<()> { match downloader::ytdlp::download(path.clone(), &store_path, ×tamp) { Ok(h) => h, Err(e) => { - fail_archive_and_exit( - &conn, - &run, - &item, - &format!("Failed to download media: {e}"), - ); + eprintln!("Failed to download from YouTube: {e}"); + process::exit(1); } } } @@ -785,34 +434,29 @@ fn main() -> Result<()> { match downloader::local::save(path.clone(), &store_path, ×tamp) { Ok(h) => h, Err(e) => { - fail_archive_and_exit( - &conn, - &run, - &item, - &format!("Failed to archive local file: {e}"), - ); + eprintln!("Failed to archive local file: {e}"); + process::exit(1); } } } - Source::YouTubePlaylist | Source::YouTubeChannel => { - fail_archive_and_exit( - &conn, - &run, - &item, - "Playlist and channel container expansion are not yet implemented.", - ); - } _ => unreachable!(), }; - let file_extension = media_file_extension(source, &path); - let temp_file = store_path - .join("temp") - .join(×tamp) - .join(format!("{timestamp}{file_extension}")); - let byte_size = fs::metadata(&temp_file) - .with_context(|| format!("failed to stat staged file {}", temp_file.display()))? - .len() as i64; + let file_extension = match source { + Source::YouTubeVideo + | Source::X + | Source::Instagram + | Source::Facebook + | Source::TikTok + | Source::Reddit + | Source::Snapchat => ".mp4", + Source::Local => { + let p = Path::new(path.trim_start_matches("file://")); + &p.extension() + .map_or(String::new(), |ext| format!(".{}", ext.to_string_lossy())) + } + _ => "", + }; let hash_exists = hash_exists(format!("{hash}{file_extension}"), &store_path); @@ -846,19 +490,9 @@ fn main() -> Result<()> { println!("File archived successfully."); } - record_media_entry( - &conn, - &store_path, - user_id, - &run, - &item, - &requested_path, - &path, - source, - &hash, - &file_extension, - byte_size, - )?; + // TODO: DB INSERT, inserting a record + // https://github.com/rusqlite/rusqlite + // Think of the DB schema Ok(()) } @@ -909,8 +543,6 @@ fn main() -> Result<()> { store_path.canonicalize().unwrap().to_str().unwrap(), ); initialize_store_directories(&store_path).unwrap(); - let conn = database::open_or_initialize(&archive_path)?; - let _ = database::ensure_default_user(&conn)?; println!("Initialized empty archive in {}", archive_path.display());