diff --git a/.gitignore b/.gitignore index 04ae708..ea8c4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1 @@ /target -__pycache__/ -*.pyc diff --git a/README.md b/README.md index 5117bfc..ad1203e 100644 --- a/README.md +++ b/README.md @@ -30,21 +30,3 @@ user_id = "/run/agenix/readlater-user-id" ```toml user_id = { file = "/run/agenix/readlater-user-id" } ``` - -### `sync_x` - -`/sync_x` imports X/Twitter bookmarks into Read Later. - -- The bot prompts for the Cloudflare cookie header string (`auth_token` + `ct0`). -- It runs `isolate_cookies.py`, then `main.py --mode a`. -- Extracted URLs are prepended to Read Later. -- Temporary `creds.txt` / `bookmarks.txt` files are removed after import. - -Config example: - -```toml -[sync_x] -source_project_path = "/Users/thegeneralist/personal/bookkeeper/vendor/extract-x-bookmarks" -work_dir = "/var/lib/readlater-bot/sync-x" -python_bin = "/Users/thegeneralist/personal/extract-x-bookmarks/.venv/bin/python" -``` diff --git a/flake.nix b/flake.nix index 160bfd9..273b35d 100644 --- a/flake.nix +++ b/flake.nix @@ -1,15 +1,6 @@ { description = "Read Later Telegram bot"; - nixConfig = { - extra-substituters = [ - "https://cache.garnix.io" - ]; - extra-trusted-public-keys = [ - "cache.garnix.io:CTFPyKSLcx5RMJKfLo5EEPUObbA78b0YQ2DTCJXqr9g=" - ]; - }; - inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; flake-utils.url = "github:numtide/flake-utils"; diff --git a/src/main.rs b/src/main.rs index 3fee9b8..ac97051 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ use std::fs; use std::io::Write; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; +use std::process::Command; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use anyhow::{anyhow, Context, Result}; @@ -26,7 +26,6 @@ const RESOURCE_PROMPT_TTL_SECS: u64 = 5 * 60; const PAGE_SIZE: usize = 3; const DOWNLOAD_PROMPT_TTL_SECS: u64 = 5 * 60; const FINISH_TITLE_PROMPT_TTL_SECS: u64 = 5 * 60; -const SYNC_X_PROMPT_TTL_SECS: u64 = 10 * 60; #[derive(Debug, Clone)] struct Config { @@ -39,7 +38,6 @@ struct Config { data_dir: PathBuf, retry_interval_seconds: Option, sync: Option, - sync_x: Option, } #[derive(Debug, Deserialize, Clone)] @@ -53,7 +51,6 @@ struct ConfigFile { data_dir: PathBuf, retry_interval_seconds: Option, sync: Option, - sync_x: Option, } #[derive(Debug, Deserialize, Clone)] @@ -70,15 +67,6 @@ struct SyncConfig { token_file: PathBuf, } -#[derive(Debug, Deserialize, Clone)] -struct SyncXConfig { - source_project_path: PathBuf, - #[serde(default)] - work_dir: Option, - #[serde(default)] - python_bin: Option, -} - #[derive(Parser, Debug)] struct Args { #[arg(long)] @@ -269,12 +257,6 @@ struct FinishTitlePrompt { expires_at: u64, } -#[derive(Clone, Debug)] -struct SyncXCookiePrompt { - prompt_message_id: MessageId, - expires_at: u64, -} - #[derive(Clone, Debug)] struct UndoSession { chat_id: i64, @@ -337,7 +319,6 @@ struct AppState { download_pickers: Mutex>, download_link_prompts: Mutex>, finish_title_prompts: Mutex>, - sync_x_cookie_prompts: Mutex>, queue: Mutex>, undo: Mutex>, queue_path: PathBuf, @@ -385,7 +366,6 @@ async fn main() -> Result<()> { download_pickers: Mutex::new(HashMap::new()), download_link_prompts: Mutex::new(HashMap::new()), finish_title_prompts: Mutex::new(HashMap::new()), - sync_x_cookie_prompts: Mutex::new(HashMap::new()), queue: Mutex::new(load_queue(&queue_path)?), undo: Mutex::new(undo), queue_path, @@ -515,32 +495,6 @@ async fn handle_message( return Ok(()); } - let mut expired_sync_x_prompt: Option = None; - let pending_sync_x_prompt = { - let mut prompts = state.sync_x_cookie_prompts.lock().await; - if let Some(prompt) = prompts.remove(&msg.chat.id.0) { - if prompt.expires_at > now_ts() { - Some(prompt) - } else { - expired_sync_x_prompt = Some(prompt); - None - } - } else { - None - } - }; - - if let Some(prompt) = expired_sync_x_prompt { - let _ = bot - .delete_message(msg.chat.id, prompt.prompt_message_id) - .await; - } - - if let Some(prompt) = pending_sync_x_prompt { - handle_sync_x_cookie_response(&bot, msg.chat.id, msg.id, &state, &text, prompt).await?; - return Ok(()); - } - if let Some(cmd) = parse_command(&text) { let rest = text .splitn(2, |c: char| c.is_whitespace()) @@ -549,7 +503,7 @@ async fn handle_message( .trim(); match cmd { "start" | "help" => { - let help = "Send any text to save it. Commands: /add , /list, /search , /download [url], /undos, /reset_peeked, /pull, /pull theirs, /push, /sync, /sync_x. Use --- to split a message into multiple items. In list views, use buttons for Mark Finished, Add Resource, Delete, Random. Quick actions: reply with del/delete to remove the current item, or send norm to normalize links."; + let help = "Send any text to save it. Commands: /add , /list, /search , /download [url], /undos, /reset_peeked, /pull, /pull theirs, /push, /sync. Use --- to split a message into multiple items. In list views, use buttons for Mark Finished, Add Resource, Delete, Random. Quick actions: reply with del/delete to remove the current item, or send norm to normalize links."; bot.send_message(msg.chat.id, help).await?; return Ok(()); } @@ -605,11 +559,6 @@ async fn handle_message( let _ = bot.delete_message(msg.chat.id, msg.id).await; return Ok(()); } - "sync_x" => { - handle_sync_x_command(bot.clone(), msg.clone(), state).await?; - let _ = bot.delete_message(msg.chat.id, msg.id).await; - return Ok(()); - } _ => { // Unknown command, fall through as text. } @@ -1201,86 +1150,6 @@ async fn handle_sync_command( Ok(()) } -async fn handle_sync_x_command( - bot: Bot, - msg: Message, - state: std::sync::Arc, -) -> Result<()> { - if state.config.sync_x.is_none() { - send_error( - &bot, - msg.chat.id, - "sync_x not configured. Set settings.sync_x.source_project_path (and optionally settings.sync_x.python_bin/work_dir).", - ) - .await?; - return Ok(()); - } - - let prompt_text = "Paste the Cloudflare cookie header string from x.com (must include auth_token and ct0)."; - let sent = bot.send_message(msg.chat.id, prompt_text).await?; - state.sync_x_cookie_prompts.lock().await.insert( - msg.chat.id.0, - SyncXCookiePrompt { - prompt_message_id: sent.id, - expires_at: now_ts() + SYNC_X_PROMPT_TTL_SECS, - }, - ); - Ok(()) -} - -async fn handle_sync_x_cookie_response( - bot: &Bot, - chat_id: ChatId, - message_id: MessageId, - state: &std::sync::Arc, - text: &str, - prompt: SyncXCookiePrompt, -) -> Result<()> { - let cookie_header = text.trim(); - if cookie_header.is_empty() { - send_error(bot, chat_id, "Cookie header is empty. Paste the full header string.").await?; - state.sync_x_cookie_prompts.lock().await.insert( - chat_id.0, - SyncXCookiePrompt { - prompt_message_id: prompt.prompt_message_id, - expires_at: now_ts() + SYNC_X_PROMPT_TTL_SECS, - }, - ); - let _ = bot.delete_message(chat_id, message_id).await; - return Ok(()); - } - - let _ = bot.delete_message(chat_id, prompt.prompt_message_id).await; - let _ = bot.delete_message(chat_id, message_id).await; - - let status_msg = bot.send_message(chat_id, "Syncing X bookmarks...").await?; - let config = state.config.clone(); - let cookie_header = cookie_header.to_string(); - let outcome = tokio::task::spawn_blocking(move || run_sync_x(&config, &cookie_header)) - .await - .context("sync_x task failed")?; - let _ = bot.delete_message(chat_id, status_msg.id).await; - - match outcome { - Ok(sync_outcome) => { - if sync_outcome.extracted_count == 0 { - send_ephemeral(bot, chat_id, "No X bookmarks found.", ACK_TTL_SECS).await?; - } else { - let text = format!( - "X sync complete: extracted {}, added {}, skipped {} duplicates.", - sync_outcome.extracted_count, sync_outcome.added_count, sync_outcome.duplicate_count - ); - bot.send_message(chat_id, text).await?; - } - } - Err(err) => { - send_error(bot, chat_id, &format!("sync_x failed: {}", err)).await?; - } - } - - Ok(()) -} - async fn handle_undos_command( bot: Bot, msg: Message, @@ -2983,13 +2852,6 @@ enum SyncOutcome { Synced, } -#[derive(Debug)] -struct SyncXOutcome { - extracted_count: usize, - added_count: usize, - duplicate_count: usize, -} - async fn queue_op(state: &std::sync::Arc, op: QueuedOp) -> Result<()> { let mut queue = state.queue.lock().await; queue.push(op); @@ -3293,233 +3155,6 @@ fn run_sync(sync: &SyncConfig) -> Result { } } -fn run_sync_x(config: &Config, cookie_header: &str) -> Result { - let sync_x = config - .sync_x - .as_ref() - .ok_or_else(|| anyhow!("sync_x is not configured."))?; - - let source_project = &sync_x.source_project_path; - if !source_project.exists() { - return Err(anyhow!( - "sync_x source project path not found: {}", - source_project.display() - )); - } - if !source_project.is_dir() { - return Err(anyhow!( - "sync_x source project path is not a directory: {}", - source_project.display() - )); - } - - let work_dir = sync_x - .work_dir - .clone() - .unwrap_or_else(|| config.data_dir.join("sync-x")); - prepare_sync_x_workspace(source_project, &work_dir)?; - - let python_bin = resolve_sync_x_python_bin(sync_x); - let creds_path = work_dir.join("creds.txt"); - let bookmarks_path = work_dir.join("bookmarks.txt"); - let _ = fs::remove_file(&creds_path); - let _ = fs::remove_file(&bookmarks_path); - - run_python_script( - &python_bin, - &work_dir, - "isolate_cookies.py", - &[], - Some(cookie_header), - )?; - run_python_script(&python_bin, &work_dir, "main.py", &["--mode", "a"], None)?; - - let urls = if bookmarks_path.exists() { - read_sync_x_urls(&bookmarks_path)? - } else { - Vec::new() - }; - let (added_count, duplicate_count) = prepend_urls_to_read_later_sync(&config.read_later_path, &urls)?; - - let _ = fs::remove_file(&bookmarks_path); - let _ = fs::remove_file(&creds_path); - - Ok(SyncXOutcome { - extracted_count: urls.len(), - added_count, - duplicate_count, - }) -} - -fn resolve_sync_x_python_bin(sync_x: &SyncXConfig) -> PathBuf { - if let Some(path) = &sync_x.python_bin { - return path.clone(); - } - let venv_python3 = sync_x.source_project_path.join(".venv/bin/python3"); - if venv_python3.exists() { - return venv_python3; - } - let venv_python = sync_x.source_project_path.join(".venv/bin/python"); - if venv_python.exists() { - return venv_python; - } - PathBuf::from("python3") -} - -fn prepare_sync_x_workspace(source_project: &Path, work_dir: &Path) -> Result<()> { - fs::create_dir_all(work_dir) - .with_context(|| format!("create sync_x work dir {}", work_dir.display()))?; - - for file in [ - "main.py", - "isolate_cookies.py", - "requirements.txt", - "README.md", - "LICENSE", - ] { - let src = source_project.join(file); - let dest = work_dir.join(file); - if !src.exists() { - if matches!(file, "main.py" | "isolate_cookies.py") { - return Err(anyhow!( - "sync_x source is missing required file: {}", - src.display() - )); - } - continue; - } - fs::copy(&src, &dest) - .with_context(|| format!("copy {} to {}", src.display(), dest.display()))?; - } - - Ok(()) -} - -fn run_python_script( - python_bin: &Path, - work_dir: &Path, - script: &str, - args: &[&str], - stdin_input: Option<&str>, -) -> Result<()> { - let mut cmd = Command::new(python_bin); - cmd.current_dir(work_dir) - .arg(script) - .args(args) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - - if stdin_input.is_some() { - cmd.stdin(Stdio::piped()); - } - - let mut child = cmd - .spawn() - .with_context(|| format!("run {} {}", python_bin.display(), script))?; - if let Some(input) = stdin_input { - if let Some(mut stdin) = child.stdin.take() { - stdin - .write_all(input.as_bytes()) - .context("write stdin to python script")?; - if !input.ends_with('\n') { - stdin - .write_all(b"\n") - .context("write newline to python script")?; - } - } - } - - let output = child.wait_with_output().context("wait for python script")?; - if !output.status.success() { - let stdout = String::from_utf8_lossy(&output.stdout).to_string(); - let stderr = String::from_utf8_lossy(&output.stderr).to_string(); - let tail = summarize_process_output(&stdout, &stderr); - return Err(anyhow!( - "{} {} failed (status {}):\n{}", - python_bin.display(), - script, - output.status, - tail - )); - } - Ok(()) -} - -fn summarize_process_output(stdout: &str, stderr: &str) -> String { - let stderr_trimmed = stderr.trim(); - if !stderr_trimmed.is_empty() { - return trim_tail(stderr_trimmed, 1200); - } - let stdout_trimmed = stdout.trim(); - if !stdout_trimmed.is_empty() { - return trim_tail(stdout_trimmed, 1200); - } - "No output captured.".to_string() -} - -fn trim_tail(text: &str, max_chars: usize) -> String { - if text.len() <= max_chars { - return text.to_string(); - } - let mut cutoff = 0usize; - for (idx, _) in text.char_indices() { - if idx >= text.len().saturating_sub(max_chars) { - cutoff = idx; - break; - } - } - format!("...{}", &text[cutoff..]) -} - -fn read_sync_x_urls(path: &Path) -> Result> { - let contents = - fs::read_to_string(path).with_context(|| format!("read bookmarks file {}", path.display()))?; - let mut seen = HashSet::new(); - let mut urls = Vec::new(); - for line in contents.lines() { - let trimmed = line.trim(); - if trimmed.is_empty() { - continue; - } - if !(trimmed.starts_with("http://") || trimmed.starts_with("https://")) { - continue; - } - if seen.insert(trimmed.to_string()) { - urls.push(trimmed.to_string()); - } - } - Ok(urls) -} - -fn prepend_urls_to_read_later_sync(path: &Path, urls: &[String]) -> Result<(usize, usize)> { - let (preamble, mut entries) = read_entries(path)?; - let mut existing = HashSet::new(); - for entry in &entries { - existing.insert(entry.block_string()); - } - - let mut new_entries = Vec::new(); - let mut duplicate_count = 0usize; - for url in urls { - let entry = EntryBlock::from_text(url); - let block = entry.block_string(); - if existing.insert(block) { - new_entries.push(entry); - } else { - duplicate_count += 1; - } - } - - if !new_entries.is_empty() { - for entry in new_entries.iter().rev() { - entries.insert(0, entry.clone()); - } - write_entries(path, &preamble, &entries)?; - } - - Ok((new_entries.len(), duplicate_count)) -} - struct GitOutput { status: std::process::ExitStatus, stdout: String, @@ -4908,17 +4543,6 @@ fn load_config(path: &Path) -> Result { .unwrap_or_else(|| Path::new(".")) .join("Misc/images_misc"); let media_dir = config_file.media_dir.unwrap_or(default_media_dir); - let sync_x = config_file.sync_x.map(|sync_x| SyncXConfig { - source_project_path: resolve_user_id_path(&sync_x.source_project_path, config_dir), - work_dir: sync_x - .work_dir - .as_ref() - .map(|p| resolve_user_id_path(p, config_dir)), - python_bin: sync_x - .python_bin - .as_ref() - .map(|p| resolve_user_id_path(p, config_dir)), - }); Ok(Config { token: config_file.token, user_id, @@ -4929,7 +4553,6 @@ fn load_config(path: &Path) -> Result { data_dir: config_file.data_dir, retry_interval_seconds: config_file.retry_interval_seconds, sync: config_file.sync, - sync_x, }) } @@ -5500,7 +5123,6 @@ mod tests { data_dir: PathBuf::from("/tmp/data"), retry_interval_seconds: None, sync: None, - sync_x: None, } } @@ -5842,53 +5464,4 @@ mod tests { }; assert!(is_push_up_to_date(&output)); } - - #[test] - fn read_sync_x_urls_keeps_unique_http_lines() { - let temp = TempDir::new().unwrap(); - let path = temp.path().join("bookmarks.txt"); - fs::write( - &path, - "https://a.example\n\nnot-a-url\nhttps://b.example\nhttps://a.example\n", - ) - .unwrap(); - let urls = read_sync_x_urls(&path).unwrap(); - assert_eq!( - urls, - vec![ - "https://a.example".to_string(), - "https://b.example".to_string() - ] - ); - } - - #[test] - fn prepend_urls_to_read_later_sync_preserves_input_order() { - let temp = TempDir::new().unwrap(); - let path = temp.path().join("read-later.md"); - fs::write(&path, "- https://already.example\n").unwrap(); - let urls = vec![ - "https://one.example".to_string(), - "https://two.example".to_string(), - "https://already.example".to_string(), - ]; - - let (added, duplicates) = prepend_urls_to_read_later_sync(&path, &urls).unwrap(); - assert_eq!(added, 2); - assert_eq!(duplicates, 1); - - let (_, entries) = read_entries(&path).unwrap(); - let blocks = entries - .iter() - .map(|entry| entry.block_string()) - .collect::>(); - assert_eq!( - blocks, - vec![ - "- https://one.example".to_string(), - "- https://two.example".to_string(), - "- https://already.example".to_string(), - ] - ); - } } diff --git a/vendor/extract-x-bookmarks/LICENSE b/vendor/extract-x-bookmarks/LICENSE deleted file mode 100644 index ff761d6..0000000 --- a/vendor/extract-x-bookmarks/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2025-present thegeneralist01 - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/vendor/extract-x-bookmarks/README.md b/vendor/extract-x-bookmarks/README.md deleted file mode 100644 index 867ceb1..0000000 --- a/vendor/extract-x-bookmarks/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# extract-x-bookmarks -Written by ChatGPT, not me. - -[Works fine](https://x.com/thegeneralist01/status/2003819489989926932). - -## Requirements -- Python -- Dependencies in `requirements.txt` - -## Usage -- Create a virtual environment if you want (`uv venv .venv` or whatever), then activate it. (This is absolutely optional). - -- Install dependencies: -```bash -pip install -r requirements.txt -``` - -- Now, to get the **Cloudflare cookies,** this is quickest way I use: - - Download the [Cookie-Editor extension](https://cookie-editor.com/). - - Open your browser and go to [Twitter](https://x.com). - - Open the Cookie-Editor extension, press the export button (bottom right) and export as Header String. (It will copy it to the clipboard). - - Open the terminal, and run (the python file): - ```bash - python isolate_cookies.py - ``` - - Paste the clipboard content. - - It will then put the two needed cookies into `creds.txt`, which the script will use. - - **If you want to do that without installing the extension,** the `creds.txt` file will have the following format: - ``` - auth_token=blablabla;ct0=blablabla - ``` - -- A few things to know before running the script: - - It will create a `bookmarks.txt` file with the URLs of your bookmarks. - - The script reads all paginated bookmark responses and, by default, keeps running until there are no bookmarks left. - - Write mode defaults to append (`a`) so new runs continue the timeline in order (newest first to oldest). - - If needed, you can still choose interactively with `python main.py --mode ask`. - - It will take some time in the end to **unbookmark** the fetched bookmarks. Each time 10 new bookmarks are unbookmarked, it will print a message. - -- Run the script until you have all your bookmarks extracted: -```bash -python main.py -``` - -## License -Licensed under the [MIT License](LICENSE). diff --git a/vendor/extract-x-bookmarks/isolate_cookies.py b/vendor/extract-x-bookmarks/isolate_cookies.py deleted file mode 100644 index 5bcefe7..0000000 --- a/vendor/extract-x-bookmarks/isolate_cookies.py +++ /dev/null @@ -1,19 +0,0 @@ -cookie_str = input("Input your cookies in the Header String format: ").strip() - -cookie_dict = {} -for item in cookie_str.split(";"): - part = item.strip() - if not part or "=" not in part: - continue - key, value = part.split("=", 1) - cookie_dict[key.strip()] = value.strip() - -auth_token = cookie_dict.get("auth_token", "") -ct0 = cookie_dict.get("ct0", "") -if not auth_token or not ct0: - raise SystemExit("Missing auth_token or ct0 in the provided cookie header.") - -login_string = f"auth_token={auth_token};ct0={ct0}" - -with open("creds.txt", "w") as file: - file.write(login_string) diff --git a/vendor/extract-x-bookmarks/main.py b/vendor/extract-x-bookmarks/main.py deleted file mode 100644 index 1b18246..0000000 --- a/vendor/extract-x-bookmarks/main.py +++ /dev/null @@ -1,439 +0,0 @@ -import argparse -import time -import os -from twitter.account import Account - -def is_rate_limit_error(error): - """ - Check if an error is a rate limit error (429 Too Many Requests). - - Args: - error: Exception object or error message - - Returns: - True if it's a rate limit error, False otherwise - """ - error_str = str(error).lower() - # Check for common rate limit indicators - rate_limit_indicators = [ - '429', - 'too many requests', - 'rate limit', - 'rate_limit', - 'exceeded', - 'quota', - 'limit exceeded' - ] - return any(indicator in error_str for indicator in rate_limit_indicators) - - -def handle_rate_limit_error(error, retry_count, base_wait_time=60): - """ - Handle rate limit errors with exponential backoff. - - Args: - error: The exception that occurred - retry_count: Number of times we've retried - base_wait_time: Base wait time in seconds (default 60s = 1 minute) - - Returns: - Wait time in seconds before retrying - """ - # Exponential backoff: 1min, 2min, 4min, 8min, etc. - wait_time = base_wait_time * (2 ** retry_count) - # Cap at 15 minutes (900 seconds) - wait_time = min(wait_time, 900) - - print(f"\n ⚠ Rate limit detected (attempt {retry_count + 1})") - print(f" ⏳ Waiting {wait_time}s ({wait_time/60:.1f} minutes) before retry...") - - return wait_time - - -def extract_bookmark_entries_from_response(response_data): - """ - Extract bookmark entries (tweet IDs and user info) from the response. - - Args: - response_data: The response data from account.bookmarks() - - Returns: - List of tuples: [(tweet_id, username), ...] - """ - bookmark_entries = [] - seen_ids = set() - - def add_entry(tweet_id, username): - tid = str(tweet_id).strip() - if not tid or tid in seen_ids: - return - seen_ids.add(tid) - bookmark_entries.append((tid, username)) - - try: - # First, check if response is a simple list of tweet IDs or tweet objects. - payloads = [] - if isinstance(response_data, list): - # Check if it's a list of simple values (tweet IDs) - if len(response_data) > 0 and isinstance(response_data[0], (str, int)): - # Simple list of tweet IDs - for tid in response_data: - add_entry(tid, None) - return bookmark_entries - # Check if it's a list of tweet objects - elif len(response_data) > 0 and isinstance(response_data[0], dict): - # If it has 'id' or 'id_str' field, it might be a simple tweet object - if 'id' in response_data[0] or 'id_str' in response_data[0]: - for item in response_data: - tweet_id = item.get('id_str') or str(item.get('id', '')) - username = item.get('user', {}).get('screen_name') if 'user' in item else None - if tweet_id: - add_entry(tweet_id, username) - return bookmark_entries - - # Otherwise, treat as paginated GraphQL response structure. - payloads = [item for item in response_data if isinstance(item, dict)] - elif isinstance(response_data, dict): - payloads = [response_data] - else: - return bookmark_entries - - for data in payloads: - # Navigate through the nested GraphQL structure (similar to tweets structure). - timeline = data.get('data', {}).get('bookmark_timeline_v2', {}).get('timeline', {}) - if not timeline: - # Try alternative path. - timeline = data.get('data', {}).get('user', {}).get('result', {}).get('timeline_v2', {}).get('timeline', {}) - - instructions = timeline.get('instructions', []) - - for instruction in instructions: - if instruction.get('type') == 'TimelineAddEntries': - entries = instruction.get('entries', []) - for entry in entries: - content = entry.get('content', {}) - # Extract bookmark entries - if content.get('entryType') == 'TimelineTimelineItem': - item_content = content.get('itemContent', {}) - if item_content.get('itemType') == 'TimelineTweet': - tweet_result = item_content.get('tweet_results', {}).get('result', {}) - # Get rest_id (the tweet ID) - tweet_id = tweet_result.get('rest_id') - - # Get username from tweet result - username = None - # Try to get username from user info in tweet - user_info = tweet_result.get('core', {}).get('user_results', {}).get('result', {}) - if user_info: - legacy_user = user_info.get('legacy', {}) - if legacy_user: - username = legacy_user.get('screen_name') - - if tweet_id: - add_entry(tweet_id, username) - - return bookmark_entries - except Exception as e: - print(f" ⚠ Warning: Error extracting bookmark entries: {e}") - return bookmark_entries - - -def extract_all_bookmarks(account, delay_between_requests=2.0): - """ - Extract all bookmarks from the account with proper rate limit handling. - Account.bookmarks() returns all bookmarks in a single call. - - Args: - account: Account instance from twitter.account - delay_between_requests: Delay in seconds between requests (not used for single call, but kept for consistency) - - Returns: - List of tuples: [(tweet_id, username), ...] (newest first) - """ - all_bookmarks = [] - retry_count = 0 - - print("Starting to extract bookmarks...") - print("-" * 50) - - try: - print("Fetching bookmarks...", end=" ") - - # Fetch all bookmarks (single call, no pagination needed) - try: - response_data = account.bookmarks() - retry_count = 0 - - except Exception as e: - error_msg = str(e) - print(f"\n ❌ Error fetching bookmarks: {error_msg}") - - # Check if it's a rate limit error - if is_rate_limit_error(e): - wait_time = handle_rate_limit_error(e, retry_count) - time.sleep(wait_time) - retry_count += 1 - # Retry the request - try: - response_data = account.bookmarks() - retry_count = 0 - except Exception as retry_error: - print(f" ❌ Failed after retry: {retry_error}") - raise - else: - # For non-rate-limit errors, wait a bit and retry once - if retry_count < 2: - wait_time = delay_between_requests * 3 - print(f" ⏳ Waiting {wait_time}s before retry...") - time.sleep(wait_time) - retry_count += 1 - try: - response_data = account.bookmarks() - retry_count = 0 - except Exception as retry_error: - print(f" ❌ Failed after retry: {retry_error}") - raise - else: - print(f" ❌ Max retries reached. Stopping.") - raise - - # Extract bookmark entries from response - all_bookmarks = extract_bookmark_entries_from_response(response_data) - - if all_bookmarks: - print(f"✓ Retrieved {len(all_bookmarks)} bookmarks") - else: - print("⚠ No bookmarks found") - - except KeyboardInterrupt: - print("\n\n⚠ Extraction interrupted by user") - except Exception as e: - print(f"\n\n❌ Error occurred: {str(e)}") - raise - - print(f"\n{'='*80}") - print(f"Bookmark extraction complete!") - print(f" Total bookmarks found: {len(all_bookmarks)}") - print(f"{'='*80}\n") - - return all_bookmarks - - -def save_bookmarks_and_unbookmark( - account, - bookmarks, - output_file="bookmarks.txt", - delay_between_requests=2.0, - write_mode="a", -): - """ - Save bookmark URLs to file (newest first) and unbookmark each one. - - Args: - account: Account instance from twitter.account - bookmarks: List of tuples [(tweet_id, username), ...] - output_file: Output file path - delay_between_requests: Delay in seconds between unbookmark requests - """ - print(f"\nSaving bookmarks to {output_file} and unbookmarking...") - print("-" * 50) - - # Read existing content if file exists - existing_content = "" - if os.path.exists(output_file): - with open(output_file, "r") as f: - existing_content = f.read() - - # Choose whether to prepend or append. - if write_mode not in ['ask', 'p', 'a']: - raise ValueError("write_mode must be one of: ask, p, a") - - if write_mode == "ask": - while True: - choice = input("Prepend (p) or append (a) new bookmarks? [p/a] (default a): ").strip().lower() - if choice == "": - choice = "a" - if choice in ['p', 'a']: - break - print(" ⚠ Invalid choice. Please enter 'p' for prepend or 'a' for append.") - else: - choice = write_mode - - prepend = (choice == 'p') - - # Collect new bookmark URLs (newest first) - new_bookmark_urls = [] - unbookmark_count = 0 - retry_count = 0 - - # Process bookmarks (they should already be in order, newest first) - for tweet_id, username in bookmarks: - # Construct URL - if username: - url = f"https://twitter.com/{username}/status/{tweet_id}" - else: - # Fallback if username not available - url = f"https://twitter.com/i/web/status/{tweet_id}" - - # Add to new bookmarks list - new_bookmark_urls.append(url) - - # Unbookmark the tweet - try: - account.unbookmark(tweet_id) - unbookmark_count += 1 - retry_count = 0 # Reset retry count on success - - if unbookmark_count % 10 == 0: - print(f" ✓ Processed {unbookmark_count}/{len(bookmarks)} bookmarks...") - - except Exception as e: - error_msg = str(e) - print(f"\n ⚠ Error unbookmarking tweet {tweet_id}: {error_msg}") - - # Check if it's a rate limit error - if is_rate_limit_error(e): - wait_time = handle_rate_limit_error(e, retry_count) - time.sleep(wait_time) - retry_count += 1 - # Retry the unbookmark - try: - account.unbookmark(tweet_id) - unbookmark_count += 1 - retry_count = 0 - except Exception as retry_error: - print(f" ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}") - else: - # For other errors, just log and continue - if retry_count < 2: - wait_time = delay_between_requests * 3 - print(f" ⏳ Waiting {wait_time}s before retry...") - time.sleep(wait_time) - retry_count += 1 - try: - account.unbookmark(tweet_id) - unbookmark_count += 1 - retry_count = 0 - except Exception as retry_error: - print(f" ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}") - else: - print(f" ❌ Skipping unbookmark for {tweet_id} after max retries") - - # Rate limiting: wait before next unbookmark request - if delay_between_requests > 0: - time.sleep(delay_between_requests) - - # Write bookmarks based on user's choice - with open(output_file, "w") as f: - if prepend: - # Write new bookmarks first (prepended), then existing content - for url in new_bookmark_urls: - f.write(f"{url}\n") - if existing_content: - f.write(existing_content) - else: - # Write existing content first, then new bookmarks (appended) - if existing_content: - f.write(existing_content) - for url in new_bookmark_urls: - f.write(f"{url}\n") - - print(f"\n{'='*80}") - print(f"Processing complete!") - print(f" Total bookmarks saved: {len(bookmarks)}") - print(f" Total unbookmarked: {unbookmark_count}") - print(f" Output file: {output_file}") - print(f"{'='*80}\n") - return { - "saved_count": len(bookmarks), - "unbookmarked_count": unbookmark_count, - } - - -def parse_args(): - parser = argparse.ArgumentParser(description="Extract and unbookmark X/Twitter bookmarks.") - parser.add_argument("--output-file", default="bookmarks.txt", help="Path to output bookmarks file.") - parser.add_argument( - "--delay-between-requests", - type=float, - default=2.0, - help="Seconds to wait between unbookmark requests.", - ) - parser.add_argument( - "--mode", - choices=["a", "p", "ask"], - default="a", - help="Write mode for bookmark file: append (a), prepend (p), or ask interactively.", - ) - parser.add_argument( - "--single-run", - action="store_true", - help="Run one extraction pass only.", - ) - parser.add_argument( - "--max-runs", - type=int, - default=100, - help="Maximum number of extraction runs when syncing until empty.", - ) - parser.add_argument( - "--delay-between-runs", - type=float, - default=1.0, - help="Seconds to wait between extraction runs.", - ) - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - # Load cookies - with open("creds.txt", "r") as file: - cookie_str = file.read().strip() - cookie_dict = dict(item.split("=", 1) for item in cookie_str.split(";")) - - # Initialize account - account = Account(cookies=cookie_dict) - - # Configuration - delay_between_requests = args.delay_between_requests - output_file = args.output_file - - total_saved = 0 - total_unbookmarked = 0 - runs = 0 - - while runs < args.max_runs: - runs += 1 - print(f"\nRun {runs}: fetching bookmarks...") - bookmarks = extract_all_bookmarks(account, delay_between_requests=delay_between_requests) - - if not bookmarks: - print("\nNo bookmarks found.") - break - - # Save bookmarks to file and unbookmark them. - stats = save_bookmarks_and_unbookmark( - account, - bookmarks, - output_file=output_file, - delay_between_requests=delay_between_requests, - write_mode=args.mode, - ) - total_saved += stats["saved_count"] - total_unbookmarked += stats["unbookmarked_count"] - print(f"\nSuccessfully processed {len(bookmarks)} bookmarks in run {runs}") - - if args.single_run: - break - if stats["unbookmarked_count"] == 0: - print("No bookmarks were unbookmarked in this run; stopping to avoid an infinite loop.") - break - if runs < args.max_runs and args.delay_between_runs > 0: - time.sleep(args.delay_between_runs) - - if runs >= args.max_runs: - print(f"\nReached max runs ({args.max_runs}) before bookmarks were fully exhausted.") - - print(f"\nDone. Total saved: {total_saved}, total unbookmarked: {total_unbookmarked}") diff --git a/vendor/extract-x-bookmarks/requirements.txt b/vendor/extract-x-bookmarks/requirements.txt deleted file mode 100644 index c76d673..0000000 --- a/vendor/extract-x-bookmarks/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -twitter-api-client == 0.10.22