From 874f3ec570f24e2cbf08f9d72e7d7db73e63053b Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Tue, 17 Feb 2026 20:33:28 +0100 Subject: [PATCH] Add /sync_x command with X bookmarks import flow --- .gitignore | 2 + README.md | 18 + src/main.rs | 431 ++++++++++++++++- vendor/extract-x-bookmarks/LICENSE | 21 + vendor/extract-x-bookmarks/README.md | 46 ++ vendor/extract-x-bookmarks/isolate_cookies.py | 19 + vendor/extract-x-bookmarks/main.py | 439 ++++++++++++++++++ vendor/extract-x-bookmarks/requirements.txt | 1 + 8 files changed, 975 insertions(+), 2 deletions(-) create mode 100644 vendor/extract-x-bookmarks/LICENSE create mode 100644 vendor/extract-x-bookmarks/README.md create mode 100644 vendor/extract-x-bookmarks/isolate_cookies.py create mode 100644 vendor/extract-x-bookmarks/main.py create mode 100644 vendor/extract-x-bookmarks/requirements.txt diff --git a/.gitignore b/.gitignore index ea8c4bf..04ae708 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /target +__pycache__/ +*.pyc diff --git a/README.md b/README.md index ad1203e..5117bfc 100644 --- a/README.md +++ b/README.md @@ -30,3 +30,21 @@ user_id = "/run/agenix/readlater-user-id" ```toml user_id = { file = "/run/agenix/readlater-user-id" } ``` + +### `sync_x` + +`/sync_x` imports X/Twitter bookmarks into Read Later. + +- The bot prompts for the Cloudflare cookie header string (`auth_token` + `ct0`). +- It runs `isolate_cookies.py`, then `main.py --mode a`. +- Extracted URLs are prepended to Read Later. +- Temporary `creds.txt` / `bookmarks.txt` files are removed after import. + +Config example: + +```toml +[sync_x] +source_project_path = "/Users/thegeneralist/personal/bookkeeper/vendor/extract-x-bookmarks" +work_dir = "/var/lib/readlater-bot/sync-x" +python_bin = "/Users/thegeneralist/personal/extract-x-bookmarks/.venv/bin/python" +``` diff --git a/src/main.rs b/src/main.rs index ac97051..3fee9b8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ use std::fs; use std::io::Write; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; -use std::process::Command; +use std::process::{Command, Stdio}; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use anyhow::{anyhow, Context, Result}; @@ -26,6 +26,7 @@ const RESOURCE_PROMPT_TTL_SECS: u64 = 5 * 60; const PAGE_SIZE: usize = 3; const DOWNLOAD_PROMPT_TTL_SECS: u64 = 5 * 60; const FINISH_TITLE_PROMPT_TTL_SECS: u64 = 5 * 60; +const SYNC_X_PROMPT_TTL_SECS: u64 = 10 * 60; #[derive(Debug, Clone)] struct Config { @@ -38,6 +39,7 @@ struct Config { data_dir: PathBuf, retry_interval_seconds: Option, sync: Option, + sync_x: Option, } #[derive(Debug, Deserialize, Clone)] @@ -51,6 +53,7 @@ struct ConfigFile { data_dir: PathBuf, retry_interval_seconds: Option, sync: Option, + sync_x: Option, } #[derive(Debug, Deserialize, Clone)] @@ -67,6 +70,15 @@ struct SyncConfig { token_file: PathBuf, } +#[derive(Debug, Deserialize, Clone)] +struct SyncXConfig { + source_project_path: PathBuf, + #[serde(default)] + work_dir: Option, + #[serde(default)] + python_bin: Option, +} + #[derive(Parser, Debug)] struct Args { #[arg(long)] @@ -257,6 +269,12 @@ struct FinishTitlePrompt { expires_at: u64, } +#[derive(Clone, Debug)] +struct SyncXCookiePrompt { + prompt_message_id: MessageId, + expires_at: u64, +} + #[derive(Clone, Debug)] struct UndoSession { chat_id: i64, @@ -319,6 +337,7 @@ struct AppState { download_pickers: Mutex>, download_link_prompts: Mutex>, finish_title_prompts: Mutex>, + sync_x_cookie_prompts: Mutex>, queue: Mutex>, undo: Mutex>, queue_path: PathBuf, @@ -366,6 +385,7 @@ async fn main() -> Result<()> { download_pickers: Mutex::new(HashMap::new()), download_link_prompts: Mutex::new(HashMap::new()), finish_title_prompts: Mutex::new(HashMap::new()), + sync_x_cookie_prompts: Mutex::new(HashMap::new()), queue: Mutex::new(load_queue(&queue_path)?), undo: Mutex::new(undo), queue_path, @@ -495,6 +515,32 @@ async fn handle_message( return Ok(()); } + let mut expired_sync_x_prompt: Option = None; + let pending_sync_x_prompt = { + let mut prompts = state.sync_x_cookie_prompts.lock().await; + if let Some(prompt) = prompts.remove(&msg.chat.id.0) { + if prompt.expires_at > now_ts() { + Some(prompt) + } else { + expired_sync_x_prompt = Some(prompt); + None + } + } else { + None + } + }; + + if let Some(prompt) = expired_sync_x_prompt { + let _ = bot + .delete_message(msg.chat.id, prompt.prompt_message_id) + .await; + } + + if let Some(prompt) = pending_sync_x_prompt { + handle_sync_x_cookie_response(&bot, msg.chat.id, msg.id, &state, &text, prompt).await?; + return Ok(()); + } + if let Some(cmd) = parse_command(&text) { let rest = text .splitn(2, |c: char| c.is_whitespace()) @@ -503,7 +549,7 @@ async fn handle_message( .trim(); match cmd { "start" | "help" => { - let help = "Send any text to save it. Commands: /add , /list, /search , /download [url], /undos, /reset_peeked, /pull, /pull theirs, /push, /sync. Use --- to split a message into multiple items. In list views, use buttons for Mark Finished, Add Resource, Delete, Random. Quick actions: reply with del/delete to remove the current item, or send norm to normalize links."; + let help = "Send any text to save it. Commands: /add , /list, /search , /download [url], /undos, /reset_peeked, /pull, /pull theirs, /push, /sync, /sync_x. Use --- to split a message into multiple items. In list views, use buttons for Mark Finished, Add Resource, Delete, Random. Quick actions: reply with del/delete to remove the current item, or send norm to normalize links."; bot.send_message(msg.chat.id, help).await?; return Ok(()); } @@ -559,6 +605,11 @@ async fn handle_message( let _ = bot.delete_message(msg.chat.id, msg.id).await; return Ok(()); } + "sync_x" => { + handle_sync_x_command(bot.clone(), msg.clone(), state).await?; + let _ = bot.delete_message(msg.chat.id, msg.id).await; + return Ok(()); + } _ => { // Unknown command, fall through as text. } @@ -1150,6 +1201,86 @@ async fn handle_sync_command( Ok(()) } +async fn handle_sync_x_command( + bot: Bot, + msg: Message, + state: std::sync::Arc, +) -> Result<()> { + if state.config.sync_x.is_none() { + send_error( + &bot, + msg.chat.id, + "sync_x not configured. Set settings.sync_x.source_project_path (and optionally settings.sync_x.python_bin/work_dir).", + ) + .await?; + return Ok(()); + } + + let prompt_text = "Paste the Cloudflare cookie header string from x.com (must include auth_token and ct0)."; + let sent = bot.send_message(msg.chat.id, prompt_text).await?; + state.sync_x_cookie_prompts.lock().await.insert( + msg.chat.id.0, + SyncXCookiePrompt { + prompt_message_id: sent.id, + expires_at: now_ts() + SYNC_X_PROMPT_TTL_SECS, + }, + ); + Ok(()) +} + +async fn handle_sync_x_cookie_response( + bot: &Bot, + chat_id: ChatId, + message_id: MessageId, + state: &std::sync::Arc, + text: &str, + prompt: SyncXCookiePrompt, +) -> Result<()> { + let cookie_header = text.trim(); + if cookie_header.is_empty() { + send_error(bot, chat_id, "Cookie header is empty. Paste the full header string.").await?; + state.sync_x_cookie_prompts.lock().await.insert( + chat_id.0, + SyncXCookiePrompt { + prompt_message_id: prompt.prompt_message_id, + expires_at: now_ts() + SYNC_X_PROMPT_TTL_SECS, + }, + ); + let _ = bot.delete_message(chat_id, message_id).await; + return Ok(()); + } + + let _ = bot.delete_message(chat_id, prompt.prompt_message_id).await; + let _ = bot.delete_message(chat_id, message_id).await; + + let status_msg = bot.send_message(chat_id, "Syncing X bookmarks...").await?; + let config = state.config.clone(); + let cookie_header = cookie_header.to_string(); + let outcome = tokio::task::spawn_blocking(move || run_sync_x(&config, &cookie_header)) + .await + .context("sync_x task failed")?; + let _ = bot.delete_message(chat_id, status_msg.id).await; + + match outcome { + Ok(sync_outcome) => { + if sync_outcome.extracted_count == 0 { + send_ephemeral(bot, chat_id, "No X bookmarks found.", ACK_TTL_SECS).await?; + } else { + let text = format!( + "X sync complete: extracted {}, added {}, skipped {} duplicates.", + sync_outcome.extracted_count, sync_outcome.added_count, sync_outcome.duplicate_count + ); + bot.send_message(chat_id, text).await?; + } + } + Err(err) => { + send_error(bot, chat_id, &format!("sync_x failed: {}", err)).await?; + } + } + + Ok(()) +} + async fn handle_undos_command( bot: Bot, msg: Message, @@ -2852,6 +2983,13 @@ enum SyncOutcome { Synced, } +#[derive(Debug)] +struct SyncXOutcome { + extracted_count: usize, + added_count: usize, + duplicate_count: usize, +} + async fn queue_op(state: &std::sync::Arc, op: QueuedOp) -> Result<()> { let mut queue = state.queue.lock().await; queue.push(op); @@ -3155,6 +3293,233 @@ fn run_sync(sync: &SyncConfig) -> Result { } } +fn run_sync_x(config: &Config, cookie_header: &str) -> Result { + let sync_x = config + .sync_x + .as_ref() + .ok_or_else(|| anyhow!("sync_x is not configured."))?; + + let source_project = &sync_x.source_project_path; + if !source_project.exists() { + return Err(anyhow!( + "sync_x source project path not found: {}", + source_project.display() + )); + } + if !source_project.is_dir() { + return Err(anyhow!( + "sync_x source project path is not a directory: {}", + source_project.display() + )); + } + + let work_dir = sync_x + .work_dir + .clone() + .unwrap_or_else(|| config.data_dir.join("sync-x")); + prepare_sync_x_workspace(source_project, &work_dir)?; + + let python_bin = resolve_sync_x_python_bin(sync_x); + let creds_path = work_dir.join("creds.txt"); + let bookmarks_path = work_dir.join("bookmarks.txt"); + let _ = fs::remove_file(&creds_path); + let _ = fs::remove_file(&bookmarks_path); + + run_python_script( + &python_bin, + &work_dir, + "isolate_cookies.py", + &[], + Some(cookie_header), + )?; + run_python_script(&python_bin, &work_dir, "main.py", &["--mode", "a"], None)?; + + let urls = if bookmarks_path.exists() { + read_sync_x_urls(&bookmarks_path)? + } else { + Vec::new() + }; + let (added_count, duplicate_count) = prepend_urls_to_read_later_sync(&config.read_later_path, &urls)?; + + let _ = fs::remove_file(&bookmarks_path); + let _ = fs::remove_file(&creds_path); + + Ok(SyncXOutcome { + extracted_count: urls.len(), + added_count, + duplicate_count, + }) +} + +fn resolve_sync_x_python_bin(sync_x: &SyncXConfig) -> PathBuf { + if let Some(path) = &sync_x.python_bin { + return path.clone(); + } + let venv_python3 = sync_x.source_project_path.join(".venv/bin/python3"); + if venv_python3.exists() { + return venv_python3; + } + let venv_python = sync_x.source_project_path.join(".venv/bin/python"); + if venv_python.exists() { + return venv_python; + } + PathBuf::from("python3") +} + +fn prepare_sync_x_workspace(source_project: &Path, work_dir: &Path) -> Result<()> { + fs::create_dir_all(work_dir) + .with_context(|| format!("create sync_x work dir {}", work_dir.display()))?; + + for file in [ + "main.py", + "isolate_cookies.py", + "requirements.txt", + "README.md", + "LICENSE", + ] { + let src = source_project.join(file); + let dest = work_dir.join(file); + if !src.exists() { + if matches!(file, "main.py" | "isolate_cookies.py") { + return Err(anyhow!( + "sync_x source is missing required file: {}", + src.display() + )); + } + continue; + } + fs::copy(&src, &dest) + .with_context(|| format!("copy {} to {}", src.display(), dest.display()))?; + } + + Ok(()) +} + +fn run_python_script( + python_bin: &Path, + work_dir: &Path, + script: &str, + args: &[&str], + stdin_input: Option<&str>, +) -> Result<()> { + let mut cmd = Command::new(python_bin); + cmd.current_dir(work_dir) + .arg(script) + .args(args) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + if stdin_input.is_some() { + cmd.stdin(Stdio::piped()); + } + + let mut child = cmd + .spawn() + .with_context(|| format!("run {} {}", python_bin.display(), script))?; + if let Some(input) = stdin_input { + if let Some(mut stdin) = child.stdin.take() { + stdin + .write_all(input.as_bytes()) + .context("write stdin to python script")?; + if !input.ends_with('\n') { + stdin + .write_all(b"\n") + .context("write newline to python script")?; + } + } + } + + let output = child.wait_with_output().context("wait for python script")?; + if !output.status.success() { + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + let tail = summarize_process_output(&stdout, &stderr); + return Err(anyhow!( + "{} {} failed (status {}):\n{}", + python_bin.display(), + script, + output.status, + tail + )); + } + Ok(()) +} + +fn summarize_process_output(stdout: &str, stderr: &str) -> String { + let stderr_trimmed = stderr.trim(); + if !stderr_trimmed.is_empty() { + return trim_tail(stderr_trimmed, 1200); + } + let stdout_trimmed = stdout.trim(); + if !stdout_trimmed.is_empty() { + return trim_tail(stdout_trimmed, 1200); + } + "No output captured.".to_string() +} + +fn trim_tail(text: &str, max_chars: usize) -> String { + if text.len() <= max_chars { + return text.to_string(); + } + let mut cutoff = 0usize; + for (idx, _) in text.char_indices() { + if idx >= text.len().saturating_sub(max_chars) { + cutoff = idx; + break; + } + } + format!("...{}", &text[cutoff..]) +} + +fn read_sync_x_urls(path: &Path) -> Result> { + let contents = + fs::read_to_string(path).with_context(|| format!("read bookmarks file {}", path.display()))?; + let mut seen = HashSet::new(); + let mut urls = Vec::new(); + for line in contents.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + if !(trimmed.starts_with("http://") || trimmed.starts_with("https://")) { + continue; + } + if seen.insert(trimmed.to_string()) { + urls.push(trimmed.to_string()); + } + } + Ok(urls) +} + +fn prepend_urls_to_read_later_sync(path: &Path, urls: &[String]) -> Result<(usize, usize)> { + let (preamble, mut entries) = read_entries(path)?; + let mut existing = HashSet::new(); + for entry in &entries { + existing.insert(entry.block_string()); + } + + let mut new_entries = Vec::new(); + let mut duplicate_count = 0usize; + for url in urls { + let entry = EntryBlock::from_text(url); + let block = entry.block_string(); + if existing.insert(block) { + new_entries.push(entry); + } else { + duplicate_count += 1; + } + } + + if !new_entries.is_empty() { + for entry in new_entries.iter().rev() { + entries.insert(0, entry.clone()); + } + write_entries(path, &preamble, &entries)?; + } + + Ok((new_entries.len(), duplicate_count)) +} + struct GitOutput { status: std::process::ExitStatus, stdout: String, @@ -4543,6 +4908,17 @@ fn load_config(path: &Path) -> Result { .unwrap_or_else(|| Path::new(".")) .join("Misc/images_misc"); let media_dir = config_file.media_dir.unwrap_or(default_media_dir); + let sync_x = config_file.sync_x.map(|sync_x| SyncXConfig { + source_project_path: resolve_user_id_path(&sync_x.source_project_path, config_dir), + work_dir: sync_x + .work_dir + .as_ref() + .map(|p| resolve_user_id_path(p, config_dir)), + python_bin: sync_x + .python_bin + .as_ref() + .map(|p| resolve_user_id_path(p, config_dir)), + }); Ok(Config { token: config_file.token, user_id, @@ -4553,6 +4929,7 @@ fn load_config(path: &Path) -> Result { data_dir: config_file.data_dir, retry_interval_seconds: config_file.retry_interval_seconds, sync: config_file.sync, + sync_x, }) } @@ -5123,6 +5500,7 @@ mod tests { data_dir: PathBuf::from("/tmp/data"), retry_interval_seconds: None, sync: None, + sync_x: None, } } @@ -5464,4 +5842,53 @@ mod tests { }; assert!(is_push_up_to_date(&output)); } + + #[test] + fn read_sync_x_urls_keeps_unique_http_lines() { + let temp = TempDir::new().unwrap(); + let path = temp.path().join("bookmarks.txt"); + fs::write( + &path, + "https://a.example\n\nnot-a-url\nhttps://b.example\nhttps://a.example\n", + ) + .unwrap(); + let urls = read_sync_x_urls(&path).unwrap(); + assert_eq!( + urls, + vec![ + "https://a.example".to_string(), + "https://b.example".to_string() + ] + ); + } + + #[test] + fn prepend_urls_to_read_later_sync_preserves_input_order() { + let temp = TempDir::new().unwrap(); + let path = temp.path().join("read-later.md"); + fs::write(&path, "- https://already.example\n").unwrap(); + let urls = vec![ + "https://one.example".to_string(), + "https://two.example".to_string(), + "https://already.example".to_string(), + ]; + + let (added, duplicates) = prepend_urls_to_read_later_sync(&path, &urls).unwrap(); + assert_eq!(added, 2); + assert_eq!(duplicates, 1); + + let (_, entries) = read_entries(&path).unwrap(); + let blocks = entries + .iter() + .map(|entry| entry.block_string()) + .collect::>(); + assert_eq!( + blocks, + vec![ + "- https://one.example".to_string(), + "- https://two.example".to_string(), + "- https://already.example".to_string(), + ] + ); + } } diff --git a/vendor/extract-x-bookmarks/LICENSE b/vendor/extract-x-bookmarks/LICENSE new file mode 100644 index 0000000..ff761d6 --- /dev/null +++ b/vendor/extract-x-bookmarks/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025-present thegeneralist01 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/extract-x-bookmarks/README.md b/vendor/extract-x-bookmarks/README.md new file mode 100644 index 0000000..3e2d833 --- /dev/null +++ b/vendor/extract-x-bookmarks/README.md @@ -0,0 +1,46 @@ +# extract-x-bookmarks +Written by ChatGPT, not me. + +[Works fine](https://x.com/thegeneralist01/status/2003819489989926932). + +## Requirements +- Python +- Dependencies in `requirements.txt` + +## Usage +- Create a virtual environment if you want (`uv venv .venv` or whatever), then activate it. (This is absolutely optional). + +- Install dependencies: +```bash +pip install -r requirements.txt +``` + +- Now, to get the **Cloudflare cookies,** this is quickest way I use: + - Download the [Cookie-Editor extension](https://cookie-editor.com/). + - Open your browser and go to [Twitter](https://x.com). + - Open the Cookie-Editor extension, press the export button (bottom right) and export as Header String. (It will copy it to the clipboard). + - Open the terminal, and run (the python file): + ```bash + python isolate_cookies.py + ``` + - Paste the clipboard content. + - It will then put the two needed cookies into `creds.txt`, which the script will use. + - **If you want to do that without installing the extension,** the `creds.txt` file will have the following format: + ``` + auth_token=blablabla;ct0=blablabla + ``` + +- A few things to know before running the script: + - It will create a `bookmarks.txt` file with the URLs of your bookmarks. + - The script fetches about 90 bookmarks per run. That means you might want to continually run it until you have no cookies left. + - A run writes (appends, really) URLs in a descending order (newest first). + - It might ask you whether to prepend or append the URLs - whether a new run should add URLs to the start or end of the file. **Generally, for a linear timeline, you want to append,** so: `a`. + - It will take some time in the end to **unbookmark** the fetched bookmarks. Each time 10 new bookmarks are unbookmarked, it will print a message. + +- Run the script until you have all your bookmarks extracted: +```bash +python main.py +``` + +## License +Licensed under the [MIT License](LICENSE). diff --git a/vendor/extract-x-bookmarks/isolate_cookies.py b/vendor/extract-x-bookmarks/isolate_cookies.py new file mode 100644 index 0000000..5bcefe7 --- /dev/null +++ b/vendor/extract-x-bookmarks/isolate_cookies.py @@ -0,0 +1,19 @@ +cookie_str = input("Input your cookies in the Header String format: ").strip() + +cookie_dict = {} +for item in cookie_str.split(";"): + part = item.strip() + if not part or "=" not in part: + continue + key, value = part.split("=", 1) + cookie_dict[key.strip()] = value.strip() + +auth_token = cookie_dict.get("auth_token", "") +ct0 = cookie_dict.get("ct0", "") +if not auth_token or not ct0: + raise SystemExit("Missing auth_token or ct0 in the provided cookie header.") + +login_string = f"auth_token={auth_token};ct0={ct0}" + +with open("creds.txt", "w") as file: + file.write(login_string) diff --git a/vendor/extract-x-bookmarks/main.py b/vendor/extract-x-bookmarks/main.py new file mode 100644 index 0000000..1b18246 --- /dev/null +++ b/vendor/extract-x-bookmarks/main.py @@ -0,0 +1,439 @@ +import argparse +import time +import os +from twitter.account import Account + +def is_rate_limit_error(error): + """ + Check if an error is a rate limit error (429 Too Many Requests). + + Args: + error: Exception object or error message + + Returns: + True if it's a rate limit error, False otherwise + """ + error_str = str(error).lower() + # Check for common rate limit indicators + rate_limit_indicators = [ + '429', + 'too many requests', + 'rate limit', + 'rate_limit', + 'exceeded', + 'quota', + 'limit exceeded' + ] + return any(indicator in error_str for indicator in rate_limit_indicators) + + +def handle_rate_limit_error(error, retry_count, base_wait_time=60): + """ + Handle rate limit errors with exponential backoff. + + Args: + error: The exception that occurred + retry_count: Number of times we've retried + base_wait_time: Base wait time in seconds (default 60s = 1 minute) + + Returns: + Wait time in seconds before retrying + """ + # Exponential backoff: 1min, 2min, 4min, 8min, etc. + wait_time = base_wait_time * (2 ** retry_count) + # Cap at 15 minutes (900 seconds) + wait_time = min(wait_time, 900) + + print(f"\n ⚠ Rate limit detected (attempt {retry_count + 1})") + print(f" ⏳ Waiting {wait_time}s ({wait_time/60:.1f} minutes) before retry...") + + return wait_time + + +def extract_bookmark_entries_from_response(response_data): + """ + Extract bookmark entries (tweet IDs and user info) from the response. + + Args: + response_data: The response data from account.bookmarks() + + Returns: + List of tuples: [(tweet_id, username), ...] + """ + bookmark_entries = [] + seen_ids = set() + + def add_entry(tweet_id, username): + tid = str(tweet_id).strip() + if not tid or tid in seen_ids: + return + seen_ids.add(tid) + bookmark_entries.append((tid, username)) + + try: + # First, check if response is a simple list of tweet IDs or tweet objects. + payloads = [] + if isinstance(response_data, list): + # Check if it's a list of simple values (tweet IDs) + if len(response_data) > 0 and isinstance(response_data[0], (str, int)): + # Simple list of tweet IDs + for tid in response_data: + add_entry(tid, None) + return bookmark_entries + # Check if it's a list of tweet objects + elif len(response_data) > 0 and isinstance(response_data[0], dict): + # If it has 'id' or 'id_str' field, it might be a simple tweet object + if 'id' in response_data[0] or 'id_str' in response_data[0]: + for item in response_data: + tweet_id = item.get('id_str') or str(item.get('id', '')) + username = item.get('user', {}).get('screen_name') if 'user' in item else None + if tweet_id: + add_entry(tweet_id, username) + return bookmark_entries + + # Otherwise, treat as paginated GraphQL response structure. + payloads = [item for item in response_data if isinstance(item, dict)] + elif isinstance(response_data, dict): + payloads = [response_data] + else: + return bookmark_entries + + for data in payloads: + # Navigate through the nested GraphQL structure (similar to tweets structure). + timeline = data.get('data', {}).get('bookmark_timeline_v2', {}).get('timeline', {}) + if not timeline: + # Try alternative path. + timeline = data.get('data', {}).get('user', {}).get('result', {}).get('timeline_v2', {}).get('timeline', {}) + + instructions = timeline.get('instructions', []) + + for instruction in instructions: + if instruction.get('type') == 'TimelineAddEntries': + entries = instruction.get('entries', []) + for entry in entries: + content = entry.get('content', {}) + # Extract bookmark entries + if content.get('entryType') == 'TimelineTimelineItem': + item_content = content.get('itemContent', {}) + if item_content.get('itemType') == 'TimelineTweet': + tweet_result = item_content.get('tweet_results', {}).get('result', {}) + # Get rest_id (the tweet ID) + tweet_id = tweet_result.get('rest_id') + + # Get username from tweet result + username = None + # Try to get username from user info in tweet + user_info = tweet_result.get('core', {}).get('user_results', {}).get('result', {}) + if user_info: + legacy_user = user_info.get('legacy', {}) + if legacy_user: + username = legacy_user.get('screen_name') + + if tweet_id: + add_entry(tweet_id, username) + + return bookmark_entries + except Exception as e: + print(f" ⚠ Warning: Error extracting bookmark entries: {e}") + return bookmark_entries + + +def extract_all_bookmarks(account, delay_between_requests=2.0): + """ + Extract all bookmarks from the account with proper rate limit handling. + Account.bookmarks() returns all bookmarks in a single call. + + Args: + account: Account instance from twitter.account + delay_between_requests: Delay in seconds between requests (not used for single call, but kept for consistency) + + Returns: + List of tuples: [(tweet_id, username), ...] (newest first) + """ + all_bookmarks = [] + retry_count = 0 + + print("Starting to extract bookmarks...") + print("-" * 50) + + try: + print("Fetching bookmarks...", end=" ") + + # Fetch all bookmarks (single call, no pagination needed) + try: + response_data = account.bookmarks() + retry_count = 0 + + except Exception as e: + error_msg = str(e) + print(f"\n ❌ Error fetching bookmarks: {error_msg}") + + # Check if it's a rate limit error + if is_rate_limit_error(e): + wait_time = handle_rate_limit_error(e, retry_count) + time.sleep(wait_time) + retry_count += 1 + # Retry the request + try: + response_data = account.bookmarks() + retry_count = 0 + except Exception as retry_error: + print(f" ❌ Failed after retry: {retry_error}") + raise + else: + # For non-rate-limit errors, wait a bit and retry once + if retry_count < 2: + wait_time = delay_between_requests * 3 + print(f" ⏳ Waiting {wait_time}s before retry...") + time.sleep(wait_time) + retry_count += 1 + try: + response_data = account.bookmarks() + retry_count = 0 + except Exception as retry_error: + print(f" ❌ Failed after retry: {retry_error}") + raise + else: + print(f" ❌ Max retries reached. Stopping.") + raise + + # Extract bookmark entries from response + all_bookmarks = extract_bookmark_entries_from_response(response_data) + + if all_bookmarks: + print(f"✓ Retrieved {len(all_bookmarks)} bookmarks") + else: + print("⚠ No bookmarks found") + + except KeyboardInterrupt: + print("\n\n⚠ Extraction interrupted by user") + except Exception as e: + print(f"\n\n❌ Error occurred: {str(e)}") + raise + + print(f"\n{'='*80}") + print(f"Bookmark extraction complete!") + print(f" Total bookmarks found: {len(all_bookmarks)}") + print(f"{'='*80}\n") + + return all_bookmarks + + +def save_bookmarks_and_unbookmark( + account, + bookmarks, + output_file="bookmarks.txt", + delay_between_requests=2.0, + write_mode="a", +): + """ + Save bookmark URLs to file (newest first) and unbookmark each one. + + Args: + account: Account instance from twitter.account + bookmarks: List of tuples [(tweet_id, username), ...] + output_file: Output file path + delay_between_requests: Delay in seconds between unbookmark requests + """ + print(f"\nSaving bookmarks to {output_file} and unbookmarking...") + print("-" * 50) + + # Read existing content if file exists + existing_content = "" + if os.path.exists(output_file): + with open(output_file, "r") as f: + existing_content = f.read() + + # Choose whether to prepend or append. + if write_mode not in ['ask', 'p', 'a']: + raise ValueError("write_mode must be one of: ask, p, a") + + if write_mode == "ask": + while True: + choice = input("Prepend (p) or append (a) new bookmarks? [p/a] (default a): ").strip().lower() + if choice == "": + choice = "a" + if choice in ['p', 'a']: + break + print(" ⚠ Invalid choice. Please enter 'p' for prepend or 'a' for append.") + else: + choice = write_mode + + prepend = (choice == 'p') + + # Collect new bookmark URLs (newest first) + new_bookmark_urls = [] + unbookmark_count = 0 + retry_count = 0 + + # Process bookmarks (they should already be in order, newest first) + for tweet_id, username in bookmarks: + # Construct URL + if username: + url = f"https://twitter.com/{username}/status/{tweet_id}" + else: + # Fallback if username not available + url = f"https://twitter.com/i/web/status/{tweet_id}" + + # Add to new bookmarks list + new_bookmark_urls.append(url) + + # Unbookmark the tweet + try: + account.unbookmark(tweet_id) + unbookmark_count += 1 + retry_count = 0 # Reset retry count on success + + if unbookmark_count % 10 == 0: + print(f" ✓ Processed {unbookmark_count}/{len(bookmarks)} bookmarks...") + + except Exception as e: + error_msg = str(e) + print(f"\n ⚠ Error unbookmarking tweet {tweet_id}: {error_msg}") + + # Check if it's a rate limit error + if is_rate_limit_error(e): + wait_time = handle_rate_limit_error(e, retry_count) + time.sleep(wait_time) + retry_count += 1 + # Retry the unbookmark + try: + account.unbookmark(tweet_id) + unbookmark_count += 1 + retry_count = 0 + except Exception as retry_error: + print(f" ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}") + else: + # For other errors, just log and continue + if retry_count < 2: + wait_time = delay_between_requests * 3 + print(f" ⏳ Waiting {wait_time}s before retry...") + time.sleep(wait_time) + retry_count += 1 + try: + account.unbookmark(tweet_id) + unbookmark_count += 1 + retry_count = 0 + except Exception as retry_error: + print(f" ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}") + else: + print(f" ❌ Skipping unbookmark for {tweet_id} after max retries") + + # Rate limiting: wait before next unbookmark request + if delay_between_requests > 0: + time.sleep(delay_between_requests) + + # Write bookmarks based on user's choice + with open(output_file, "w") as f: + if prepend: + # Write new bookmarks first (prepended), then existing content + for url in new_bookmark_urls: + f.write(f"{url}\n") + if existing_content: + f.write(existing_content) + else: + # Write existing content first, then new bookmarks (appended) + if existing_content: + f.write(existing_content) + for url in new_bookmark_urls: + f.write(f"{url}\n") + + print(f"\n{'='*80}") + print(f"Processing complete!") + print(f" Total bookmarks saved: {len(bookmarks)}") + print(f" Total unbookmarked: {unbookmark_count}") + print(f" Output file: {output_file}") + print(f"{'='*80}\n") + return { + "saved_count": len(bookmarks), + "unbookmarked_count": unbookmark_count, + } + + +def parse_args(): + parser = argparse.ArgumentParser(description="Extract and unbookmark X/Twitter bookmarks.") + parser.add_argument("--output-file", default="bookmarks.txt", help="Path to output bookmarks file.") + parser.add_argument( + "--delay-between-requests", + type=float, + default=2.0, + help="Seconds to wait between unbookmark requests.", + ) + parser.add_argument( + "--mode", + choices=["a", "p", "ask"], + default="a", + help="Write mode for bookmark file: append (a), prepend (p), or ask interactively.", + ) + parser.add_argument( + "--single-run", + action="store_true", + help="Run one extraction pass only.", + ) + parser.add_argument( + "--max-runs", + type=int, + default=100, + help="Maximum number of extraction runs when syncing until empty.", + ) + parser.add_argument( + "--delay-between-runs", + type=float, + default=1.0, + help="Seconds to wait between extraction runs.", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + # Load cookies + with open("creds.txt", "r") as file: + cookie_str = file.read().strip() + cookie_dict = dict(item.split("=", 1) for item in cookie_str.split(";")) + + # Initialize account + account = Account(cookies=cookie_dict) + + # Configuration + delay_between_requests = args.delay_between_requests + output_file = args.output_file + + total_saved = 0 + total_unbookmarked = 0 + runs = 0 + + while runs < args.max_runs: + runs += 1 + print(f"\nRun {runs}: fetching bookmarks...") + bookmarks = extract_all_bookmarks(account, delay_between_requests=delay_between_requests) + + if not bookmarks: + print("\nNo bookmarks found.") + break + + # Save bookmarks to file and unbookmark them. + stats = save_bookmarks_and_unbookmark( + account, + bookmarks, + output_file=output_file, + delay_between_requests=delay_between_requests, + write_mode=args.mode, + ) + total_saved += stats["saved_count"] + total_unbookmarked += stats["unbookmarked_count"] + print(f"\nSuccessfully processed {len(bookmarks)} bookmarks in run {runs}") + + if args.single_run: + break + if stats["unbookmarked_count"] == 0: + print("No bookmarks were unbookmarked in this run; stopping to avoid an infinite loop.") + break + if runs < args.max_runs and args.delay_between_runs > 0: + time.sleep(args.delay_between_runs) + + if runs >= args.max_runs: + print(f"\nReached max runs ({args.max_runs}) before bookmarks were fully exhausted.") + + print(f"\nDone. Total saved: {total_saved}, total unbookmarked: {total_unbookmarked}") diff --git a/vendor/extract-x-bookmarks/requirements.txt b/vendor/extract-x-bookmarks/requirements.txt new file mode 100644 index 0000000..c76d673 --- /dev/null +++ b/vendor/extract-x-bookmarks/requirements.txt @@ -0,0 +1 @@ +twitter-api-client == 0.10.22