Add /sync_x command with X bookmarks import flow

2026-02-17 20:33:28 +01:00 · 2026-02-17 20:33:28 +01:00 · 874f3ec570
commit 874f3ec570
parent 79073b7a2c
8 changed files with 975 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,3 @@
 /target
 __pycache__/
 *.pyc
--- a/README.md
+++ b/README.md
@ -30,3 +30,21 @@ user_id = "/run/agenix/readlater-user-id"
 ```toml
 user_id = { file = "/run/agenix/readlater-user-id" }
 ```
 ### `sync_x`
 `/sync_x` imports X/Twitter bookmarks into Read Later.
 - The bot prompts for the Cloudflare cookie header string (`auth_token` + `ct0`).
 - It runs `isolate_cookies.py`, then `main.py --mode a`.
 - Extracted URLs are prepended to Read Later.
 - Temporary `creds.txt` / `bookmarks.txt` files are removed after import.
 Config example:
 ```toml
 [sync_x]
 source_project_path = "/Users/thegeneralist/personal/bookkeeper/vendor/extract-x-bookmarks"
 work_dir = "/var/lib/readlater-bot/sync-x"
 python_bin = "/Users/thegeneralist/personal/extract-x-bookmarks/.venv/bin/python"
 ```
--- a/src/main.rs
+++ b/src/main.rs
@ -3,7 +3,7 @@ use std::fs;
 use std::io::Write;
 use std::os::unix::fs::PermissionsExt;
 use std::path::{Path, PathBuf};
-use std::process::Command;
+use std::process::{Command, Stdio};
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
 use anyhow::{anyhow, Context, Result};
@ -26,6 +26,7 @@ const RESOURCE_PROMPT_TTL_SECS: u64 = 5 * 60;
 const PAGE_SIZE: usize = 3;
 const DOWNLOAD_PROMPT_TTL_SECS: u64 = 5 * 60;
 const FINISH_TITLE_PROMPT_TTL_SECS: u64 = 5 * 60;
 const SYNC_X_PROMPT_TTL_SECS: u64 = 10 * 60;
 #[derive(Debug, Clone)]
 struct Config {
@ -38,6 +39,7 @@ struct Config {
    data_dir: PathBuf,
    retry_interval_seconds: Option<u64>,
    sync: Option<SyncConfig>,
    sync_x: Option<SyncXConfig>,
 }
 #[derive(Debug, Deserialize, Clone)]
@ -51,6 +53,7 @@ struct ConfigFile {
    data_dir: PathBuf,
    retry_interval_seconds: Option<u64>,
    sync: Option<SyncConfig>,
    sync_x: Option<SyncXConfig>,
 }
 #[derive(Debug, Deserialize, Clone)]
@ -67,6 +70,15 @@ struct SyncConfig {
    token_file: PathBuf,
 }
 #[derive(Debug, Deserialize, Clone)]
 struct SyncXConfig {
    source_project_path: PathBuf,
    #[serde(default)]
    work_dir: Option<PathBuf>,
    #[serde(default)]
    python_bin: Option<PathBuf>,
 }
 #[derive(Parser, Debug)]
 struct Args {
    #[arg(long)]
@ -257,6 +269,12 @@ struct FinishTitlePrompt {
    expires_at: u64,
 }
 #[derive(Clone, Debug)]
 struct SyncXCookiePrompt {
    prompt_message_id: MessageId,
    expires_at: u64,
 }
 #[derive(Clone, Debug)]
 struct UndoSession {
    chat_id: i64,
@ -319,6 +337,7 @@ struct AppState {
    download_pickers: Mutex<HashMap<String, DownloadPickerState>>,
    download_link_prompts: Mutex<HashMap<i64, DownloadLinkPrompt>>,
    finish_title_prompts: Mutex<HashMap<i64, FinishTitlePrompt>>,
    sync_x_cookie_prompts: Mutex<HashMap<i64, SyncXCookiePrompt>>,
    queue: Mutex<Vec<QueuedOp>>,
    undo: Mutex<Vec<UndoRecord>>,
    queue_path: PathBuf,
@ -366,6 +385,7 @@ async fn main() -> Result<()> {
        download_pickers: Mutex::new(HashMap::new()),
        download_link_prompts: Mutex::new(HashMap::new()),
        finish_title_prompts: Mutex::new(HashMap::new()),
        sync_x_cookie_prompts: Mutex::new(HashMap::new()),
        queue: Mutex::new(load_queue(&queue_path)?),
        undo: Mutex::new(undo),
        queue_path,
@ -495,6 +515,32 @@ async fn handle_message(
        return Ok(());
    }
    let mut expired_sync_x_prompt: Option<SyncXCookiePrompt> = None;
    let pending_sync_x_prompt = {
        let mut prompts = state.sync_x_cookie_prompts.lock().await;
        if let Some(prompt) = prompts.remove(&msg.chat.id.0) {
            if prompt.expires_at > now_ts() {
                Some(prompt)
            } else {
                expired_sync_x_prompt = Some(prompt);
                None
            }
        } else {
            None
        }
    };
    if let Some(prompt) = expired_sync_x_prompt {
        let _ = bot
            .delete_message(msg.chat.id, prompt.prompt_message_id)
            .await;
    }
    if let Some(prompt) = pending_sync_x_prompt {
        handle_sync_x_cookie_response(&bot, msg.chat.id, msg.id, &state, &text, prompt).await?;
        return Ok(());
    }
    if let Some(cmd) = parse_command(&text) {
        let rest = text
            .splitn(2, |c: char| c.is_whitespace())
@ -503,7 +549,7 @@ async fn handle_message(
            .trim();
        match cmd {
            "start" | "help" => {
-                let help = "Send any text to save it. Commands: /add <text>, /list, /search <query>, /download [url], /undos, /reset_peeked, /pull, /pull theirs, /push, /sync. Use --- to split a message into multiple items. In list views, use buttons for Mark Finished, Add Resource, Delete, Random. Quick actions: reply with del/delete to remove the current item, or send norm to normalize links.";
+                let help = "Send any text to save it. Commands: /add <text>, /list, /search <query>, /download [url], /undos, /reset_peeked, /pull, /pull theirs, /push, /sync, /sync_x. Use --- to split a message into multiple items. In list views, use buttons for Mark Finished, Add Resource, Delete, Random. Quick actions: reply with del/delete to remove the current item, or send norm to normalize links.";
                bot.send_message(msg.chat.id, help).await?;
                return Ok(());
            }
@ -559,6 +605,11 @@ async fn handle_message(
                let _ = bot.delete_message(msg.chat.id, msg.id).await;
                return Ok(());
            }
            "sync_x" => {
                handle_sync_x_command(bot.clone(), msg.clone(), state).await?;
                let _ = bot.delete_message(msg.chat.id, msg.id).await;
                return Ok(());
            }
            _ => {
                // Unknown command, fall through as text.
            }
@ -1150,6 +1201,86 @@ async fn handle_sync_command(
    Ok(())
 }
 async fn handle_sync_x_command(
    bot: Bot,
    msg: Message,
    state: std::sync::Arc<AppState>,
 ) -> Result<()> {
    if state.config.sync_x.is_none() {
        send_error(
            &bot,
            msg.chat.id,
            "sync_x not configured. Set settings.sync_x.source_project_path (and optionally settings.sync_x.python_bin/work_dir).",
        )
        .await?;
        return Ok(());
    }
    let prompt_text = "Paste the Cloudflare cookie header string from x.com (must include auth_token and ct0).";
    let sent = bot.send_message(msg.chat.id, prompt_text).await?;
    state.sync_x_cookie_prompts.lock().await.insert(
        msg.chat.id.0,
        SyncXCookiePrompt {
            prompt_message_id: sent.id,
            expires_at: now_ts() + SYNC_X_PROMPT_TTL_SECS,
        },
    );
    Ok(())
 }
 async fn handle_sync_x_cookie_response(
    bot: &Bot,
    chat_id: ChatId,
    message_id: MessageId,
    state: &std::sync::Arc<AppState>,
    text: &str,
    prompt: SyncXCookiePrompt,
 ) -> Result<()> {
    let cookie_header = text.trim();
    if cookie_header.is_empty() {
        send_error(bot, chat_id, "Cookie header is empty. Paste the full header string.").await?;
        state.sync_x_cookie_prompts.lock().await.insert(
            chat_id.0,
            SyncXCookiePrompt {
                prompt_message_id: prompt.prompt_message_id,
                expires_at: now_ts() + SYNC_X_PROMPT_TTL_SECS,
            },
        );
        let _ = bot.delete_message(chat_id, message_id).await;
        return Ok(());
    }
    let _ = bot.delete_message(chat_id, prompt.prompt_message_id).await;
    let _ = bot.delete_message(chat_id, message_id).await;
    let status_msg = bot.send_message(chat_id, "Syncing X bookmarks...").await?;
    let config = state.config.clone();
    let cookie_header = cookie_header.to_string();
    let outcome = tokio::task::spawn_blocking(move || run_sync_x(&config, &cookie_header))
        .await
        .context("sync_x task failed")?;
    let _ = bot.delete_message(chat_id, status_msg.id).await;
    match outcome {
        Ok(sync_outcome) => {
            if sync_outcome.extracted_count == 0 {
                send_ephemeral(bot, chat_id, "No X bookmarks found.", ACK_TTL_SECS).await?;
            } else {
                let text = format!(
                    "X sync complete: extracted {}, added {}, skipped {} duplicates.",
                    sync_outcome.extracted_count, sync_outcome.added_count, sync_outcome.duplicate_count
                );
                bot.send_message(chat_id, text).await?;
            }
        }
        Err(err) => {
            send_error(bot, chat_id, &format!("sync_x failed: {}", err)).await?;
        }
    }
    Ok(())
 }
 async fn handle_undos_command(
    bot: Bot,
    msg: Message,
@ -2852,6 +2983,13 @@ enum SyncOutcome {
    Synced,
 }
 #[derive(Debug)]
 struct SyncXOutcome {
    extracted_count: usize,
    added_count: usize,
    duplicate_count: usize,
 }
 async fn queue_op(state: &std::sync::Arc<AppState>, op: QueuedOp) -> Result<()> {
    let mut queue = state.queue.lock().await;
    queue.push(op);
@ -3155,6 +3293,233 @@ fn run_sync(sync: &SyncConfig) -> Result<SyncOutcome> {
    }
 }
 fn run_sync_x(config: &Config, cookie_header: &str) -> Result<SyncXOutcome> {
    let sync_x = config
        .sync_x
        .as_ref()
        .ok_or_else(|| anyhow!("sync_x is not configured."))?;
    let source_project = &sync_x.source_project_path;
    if !source_project.exists() {
        return Err(anyhow!(
            "sync_x source project path not found: {}",
            source_project.display()
        ));
    }
    if !source_project.is_dir() {
        return Err(anyhow!(
            "sync_x source project path is not a directory: {}",
            source_project.display()
        ));
    }
    let work_dir = sync_x
        .work_dir
        .clone()
        .unwrap_or_else(|| config.data_dir.join("sync-x"));
    prepare_sync_x_workspace(source_project, &work_dir)?;
    let python_bin = resolve_sync_x_python_bin(sync_x);
    let creds_path = work_dir.join("creds.txt");
    let bookmarks_path = work_dir.join("bookmarks.txt");
    let _ = fs::remove_file(&creds_path);
    let _ = fs::remove_file(&bookmarks_path);
    run_python_script(
        &python_bin,
        &work_dir,
        "isolate_cookies.py",
        &[],
        Some(cookie_header),
    )?;
    run_python_script(&python_bin, &work_dir, "main.py", &["--mode", "a"], None)?;
    let urls = if bookmarks_path.exists() {
        read_sync_x_urls(&bookmarks_path)?
    } else {
        Vec::new()
    };
    let (added_count, duplicate_count) = prepend_urls_to_read_later_sync(&config.read_later_path, &urls)?;
    let _ = fs::remove_file(&bookmarks_path);
    let _ = fs::remove_file(&creds_path);
    Ok(SyncXOutcome {
        extracted_count: urls.len(),
        added_count,
        duplicate_count,
    })
 }
 fn resolve_sync_x_python_bin(sync_x: &SyncXConfig) -> PathBuf {
    if let Some(path) = &sync_x.python_bin {
        return path.clone();
    }
    let venv_python3 = sync_x.source_project_path.join(".venv/bin/python3");
    if venv_python3.exists() {
        return venv_python3;
    }
    let venv_python = sync_x.source_project_path.join(".venv/bin/python");
    if venv_python.exists() {
        return venv_python;
    }
    PathBuf::from("python3")
 }
 fn prepare_sync_x_workspace(source_project: &Path, work_dir: &Path) -> Result<()> {
    fs::create_dir_all(work_dir)
        .with_context(|| format!("create sync_x work dir {}", work_dir.display()))?;
    for file in [
        "main.py",
        "isolate_cookies.py",
        "requirements.txt",
        "README.md",
        "LICENSE",
    ] {
        let src = source_project.join(file);
        let dest = work_dir.join(file);
        if !src.exists() {
            if matches!(file, "main.py" | "isolate_cookies.py") {
                return Err(anyhow!(
                    "sync_x source is missing required file: {}",
                    src.display()
                ));
            }
            continue;
        }
        fs::copy(&src, &dest)
            .with_context(|| format!("copy {} to {}", src.display(), dest.display()))?;
    }
    Ok(())
 }
 fn run_python_script(
    python_bin: &Path,
    work_dir: &Path,
    script: &str,
    args: &[&str],
    stdin_input: Option<&str>,
 ) -> Result<()> {
    let mut cmd = Command::new(python_bin);
    cmd.current_dir(work_dir)
        .arg(script)
        .args(args)
        .stdout(Stdio::piped())
        .stderr(Stdio::piped());
    if stdin_input.is_some() {
        cmd.stdin(Stdio::piped());
    }
    let mut child = cmd
        .spawn()
        .with_context(|| format!("run {} {}", python_bin.display(), script))?;
    if let Some(input) = stdin_input {
        if let Some(mut stdin) = child.stdin.take() {
            stdin
                .write_all(input.as_bytes())
                .context("write stdin to python script")?;
            if !input.ends_with('\n') {
                stdin
                    .write_all(b"\n")
                    .context("write newline to python script")?;
            }
        }
    }
    let output = child.wait_with_output().context("wait for python script")?;
    if !output.status.success() {
        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
        let tail = summarize_process_output(&stdout, &stderr);
        return Err(anyhow!(
            "{} {} failed (status {}):\n{}",
            python_bin.display(),
            script,
            output.status,
            tail
        ));
    }
    Ok(())
 }
 fn summarize_process_output(stdout: &str, stderr: &str) -> String {
    let stderr_trimmed = stderr.trim();
    if !stderr_trimmed.is_empty() {
        return trim_tail(stderr_trimmed, 1200);
    }
    let stdout_trimmed = stdout.trim();
    if !stdout_trimmed.is_empty() {
        return trim_tail(stdout_trimmed, 1200);
    }
    "No output captured.".to_string()
 }
 fn trim_tail(text: &str, max_chars: usize) -> String {
    if text.len() <= max_chars {
        return text.to_string();
    }
    let mut cutoff = 0usize;
    for (idx, _) in text.char_indices() {
        if idx >= text.len().saturating_sub(max_chars) {
            cutoff = idx;
            break;
        }
    }
    format!("...{}", &text[cutoff..])
 }
 fn read_sync_x_urls(path: &Path) -> Result<Vec<String>> {
    let contents =
        fs::read_to_string(path).with_context(|| format!("read bookmarks file {}", path.display()))?;
    let mut seen = HashSet::new();
    let mut urls = Vec::new();
    for line in contents.lines() {
        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }
        if !(trimmed.starts_with("http://") || trimmed.starts_with("https://")) {
            continue;
        }
        if seen.insert(trimmed.to_string()) {
            urls.push(trimmed.to_string());
        }
    }
    Ok(urls)
 }
 fn prepend_urls_to_read_later_sync(path: &Path, urls: &[String]) -> Result<(usize, usize)> {
    let (preamble, mut entries) = read_entries(path)?;
    let mut existing = HashSet::new();
    for entry in &entries {
        existing.insert(entry.block_string());
    }
    let mut new_entries = Vec::new();
    let mut duplicate_count = 0usize;
    for url in urls {
        let entry = EntryBlock::from_text(url);
        let block = entry.block_string();
        if existing.insert(block) {
            new_entries.push(entry);
        } else {
            duplicate_count += 1;
        }
    }
    if !new_entries.is_empty() {
        for entry in new_entries.iter().rev() {
            entries.insert(0, entry.clone());
        }
        write_entries(path, &preamble, &entries)?;
    }
    Ok((new_entries.len(), duplicate_count))
 }
 struct GitOutput {
    status: std::process::ExitStatus,
    stdout: String,
@ -4543,6 +4908,17 @@ fn load_config(path: &Path) -> Result<Config> {
        .unwrap_or_else(|| Path::new("."))
        .join("Misc/images_misc");
    let media_dir = config_file.media_dir.unwrap_or(default_media_dir);
    let sync_x = config_file.sync_x.map(|sync_x| SyncXConfig {
        source_project_path: resolve_user_id_path(&sync_x.source_project_path, config_dir),
        work_dir: sync_x
            .work_dir
            .as_ref()
            .map(|p| resolve_user_id_path(p, config_dir)),
        python_bin: sync_x
            .python_bin
            .as_ref()
            .map(|p| resolve_user_id_path(p, config_dir)),
    });
    Ok(Config {
        token: config_file.token,
        user_id,
@ -4553,6 +4929,7 @@ fn load_config(path: &Path) -> Result<Config> {
        data_dir: config_file.data_dir,
        retry_interval_seconds: config_file.retry_interval_seconds,
        sync: config_file.sync,
        sync_x,
    })
 }
@ -5123,6 +5500,7 @@ mod tests {
            data_dir: PathBuf::from("/tmp/data"),
            retry_interval_seconds: None,
            sync: None,
            sync_x: None,
        }
    }
@ -5464,4 +5842,53 @@ mod tests {
        };
        assert!(is_push_up_to_date(&output));
    }
    #[test]
    fn read_sync_x_urls_keeps_unique_http_lines() {
        let temp = TempDir::new().unwrap();
        let path = temp.path().join("bookmarks.txt");
        fs::write(
            &path,
            "https://a.example\n\nnot-a-url\nhttps://b.example\nhttps://a.example\n",
        )
        .unwrap();
        let urls = read_sync_x_urls(&path).unwrap();
        assert_eq!(
            urls,
            vec![
                "https://a.example".to_string(),
                "https://b.example".to_string()
            ]
        );
    }
    #[test]
    fn prepend_urls_to_read_later_sync_preserves_input_order() {
        let temp = TempDir::new().unwrap();
        let path = temp.path().join("read-later.md");
        fs::write(&path, "- https://already.example\n").unwrap();
        let urls = vec![
            "https://one.example".to_string(),
            "https://two.example".to_string(),
            "https://already.example".to_string(),
        ];
        let (added, duplicates) = prepend_urls_to_read_later_sync(&path, &urls).unwrap();
        assert_eq!(added, 2);
        assert_eq!(duplicates, 1);
        let (_, entries) = read_entries(&path).unwrap();
        let blocks = entries
            .iter()
            .map(|entry| entry.block_string())
            .collect::<Vec<_>>();
        assert_eq!(
            blocks,
            vec![
                "- https://one.example".to_string(),
                "- https://two.example".to_string(),
                "- https://already.example".to_string(),
            ]
        );
    }
 }
--- a/vendor/extract-x-bookmarks/LICENSE
+++ b/vendor/extract-x-bookmarks/LICENSE
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2025-present thegeneralist01
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/vendor/extract-x-bookmarks/README.md
+++ b/vendor/extract-x-bookmarks/README.md
@ -0,0 +1,46 @@
 # extract-x-bookmarks
 Written by ChatGPT, not me.
 [Works fine](https://x.com/thegeneralist01/status/2003819489989926932).
 ## Requirements
 - Python
 - Dependencies in `requirements.txt`
 ## Usage
 - Create a virtual environment if you want (`uv venv .venv` or whatever), then activate it. (This is absolutely optional).
 - Install dependencies:
 ```bash
 pip install -r requirements.txt
 ```
 - Now, to get the **Cloudflare cookies,** this is quickest way I use:
    - Download the [Cookie-Editor extension](https://cookie-editor.com/).
    - Open your browser and go to [Twitter](https://x.com).
    - Open the Cookie-Editor extension, press the export button (bottom right) and export as Header String. (It will copy it to the clipboard).
    - Open the terminal, and run (the python file):
    ```bash
    python isolate_cookies.py
    ```
    - Paste the clipboard content.
    - It will then put the two needed cookies into `creds.txt`, which the script will use.
        - **If you want to do that without installing the extension,** the `creds.txt` file will have the following format:
        ```
        auth_token=blablabla;ct0=blablabla
        ```
 - A few things to know before running the script:
    - It will create a `bookmarks.txt` file with the URLs of your bookmarks.
    - The script fetches about 90 bookmarks per run. That means you might want to continually run it until you have no cookies left.
        - A run writes (appends, really) URLs in a descending order (newest first).
    - It might ask you whether to prepend or append the URLs - whether a new run should add URLs to the start or end of the file. **Generally, for a linear timeline, you want to append,** so: `a`.
    - It will take some time in the end to **unbookmark** the fetched bookmarks. Each time 10 new bookmarks are unbookmarked, it will print a message.
 - Run the script until you have all your bookmarks extracted:
 ```bash
 python main.py
 ```
 ## License
 Licensed under the [MIT License](LICENSE).
--- a/vendor/extract-x-bookmarks/isolate_cookies.py
+++ b/vendor/extract-x-bookmarks/isolate_cookies.py
@ -0,0 +1,19 @@
 cookie_str = input("Input your cookies in the Header String format: ").strip()
 cookie_dict = {}
 for item in cookie_str.split(";"):
    part = item.strip()
    if not part or "=" not in part:
        continue
    key, value = part.split("=", 1)
    cookie_dict[key.strip()] = value.strip()
 auth_token = cookie_dict.get("auth_token", "")
 ct0 = cookie_dict.get("ct0", "")
 if not auth_token or not ct0:
    raise SystemExit("Missing auth_token or ct0 in the provided cookie header.")
 login_string = f"auth_token={auth_token};ct0={ct0}"
 with open("creds.txt", "w") as file:
    file.write(login_string)
--- a/vendor/extract-x-bookmarks/main.py
+++ b/vendor/extract-x-bookmarks/main.py
@ -0,0 +1,439 @@
 import argparse
 import time
 import os
 from twitter.account import Account
 def is_rate_limit_error(error):
    """
    Check if an error is a rate limit error (429 Too Many Requests).
    Args:
        error: Exception object or error message
    Returns:
        True if it's a rate limit error, False otherwise
    """
    error_str = str(error).lower()
    # Check for common rate limit indicators
    rate_limit_indicators = [
        '429',
        'too many requests',
        'rate limit',
        'rate_limit',
        'exceeded',
        'quota',
        'limit exceeded'
    ]
    return any(indicator in error_str for indicator in rate_limit_indicators)
 def handle_rate_limit_error(error, retry_count, base_wait_time=60):
    """
    Handle rate limit errors with exponential backoff.
    Args:
        error: The exception that occurred
        retry_count: Number of times we've retried
        base_wait_time: Base wait time in seconds (default 60s = 1 minute)
    Returns:
        Wait time in seconds before retrying
    """
    # Exponential backoff: 1min, 2min, 4min, 8min, etc.
    wait_time = base_wait_time * (2 ** retry_count)
    # Cap at 15 minutes (900 seconds)
    wait_time = min(wait_time, 900)
    print(f"\n  ⚠ Rate limit detected (attempt {retry_count + 1})")
    print(f"  ⏳ Waiting {wait_time}s ({wait_time/60:.1f} minutes) before retry...")
    return wait_time
 def extract_bookmark_entries_from_response(response_data):
    """
    Extract bookmark entries (tweet IDs and user info) from the response.
    Args:
        response_data: The response data from account.bookmarks()
    Returns:
        List of tuples: [(tweet_id, username), ...]
    """
    bookmark_entries = []
    seen_ids = set()
    def add_entry(tweet_id, username):
        tid = str(tweet_id).strip()
        if not tid or tid in seen_ids:
            return
        seen_ids.add(tid)
        bookmark_entries.append((tid, username))
    try:
        # First, check if response is a simple list of tweet IDs or tweet objects.
        payloads = []
        if isinstance(response_data, list):
            # Check if it's a list of simple values (tweet IDs)
            if len(response_data) > 0 and isinstance(response_data[0], (str, int)):
                # Simple list of tweet IDs
                for tid in response_data:
                    add_entry(tid, None)
                return bookmark_entries
            # Check if it's a list of tweet objects
            elif len(response_data) > 0 and isinstance(response_data[0], dict):
                # If it has 'id' or 'id_str' field, it might be a simple tweet object
                if 'id' in response_data[0] or 'id_str' in response_data[0]:
                    for item in response_data:
                        tweet_id = item.get('id_str') or str(item.get('id', ''))
                        username = item.get('user', {}).get('screen_name') if 'user' in item else None
                        if tweet_id:
                            add_entry(tweet_id, username)
                    return bookmark_entries
            # Otherwise, treat as paginated GraphQL response structure.
            payloads = [item for item in response_data if isinstance(item, dict)]
        elif isinstance(response_data, dict):
            payloads = [response_data]
        else:
            return bookmark_entries
        for data in payloads:
            # Navigate through the nested GraphQL structure (similar to tweets structure).
            timeline = data.get('data', {}).get('bookmark_timeline_v2', {}).get('timeline', {})
            if not timeline:
                # Try alternative path.
                timeline = data.get('data', {}).get('user', {}).get('result', {}).get('timeline_v2', {}).get('timeline', {})
            instructions = timeline.get('instructions', [])
            for instruction in instructions:
                if instruction.get('type') == 'TimelineAddEntries':
                    entries = instruction.get('entries', [])
                    for entry in entries:
                        content = entry.get('content', {})
                        # Extract bookmark entries
                        if content.get('entryType') == 'TimelineTimelineItem':
                            item_content = content.get('itemContent', {})
                            if item_content.get('itemType') == 'TimelineTweet':
                                tweet_result = item_content.get('tweet_results', {}).get('result', {})
                                # Get rest_id (the tweet ID)
                                tweet_id = tweet_result.get('rest_id')
                                # Get username from tweet result
                                username = None
                                # Try to get username from user info in tweet
                                user_info = tweet_result.get('core', {}).get('user_results', {}).get('result', {})
                                if user_info:
                                    legacy_user = user_info.get('legacy', {})
                                    if legacy_user:
                                        username = legacy_user.get('screen_name')
                                if tweet_id:
                                    add_entry(tweet_id, username)
        return bookmark_entries
    except Exception as e:
        print(f"  ⚠ Warning: Error extracting bookmark entries: {e}")
        return bookmark_entries
 def extract_all_bookmarks(account, delay_between_requests=2.0):
    """
    Extract all bookmarks from the account with proper rate limit handling.
    Account.bookmarks() returns all bookmarks in a single call.
    Args:
        account: Account instance from twitter.account
        delay_between_requests: Delay in seconds between requests (not used for single call, but kept for consistency)
    Returns:
        List of tuples: [(tweet_id, username), ...] (newest first)
    """
    all_bookmarks = []
    retry_count = 0
    print("Starting to extract bookmarks...")
    print("-" * 50)
    try:
        print("Fetching bookmarks...", end=" ")
        # Fetch all bookmarks (single call, no pagination needed)
        try:
            response_data = account.bookmarks()
            retry_count = 0
        except Exception as e:
            error_msg = str(e)
            print(f"\n  ❌ Error fetching bookmarks: {error_msg}")
            # Check if it's a rate limit error
            if is_rate_limit_error(e):
                wait_time = handle_rate_limit_error(e, retry_count)
                time.sleep(wait_time)
                retry_count += 1
                # Retry the request
                try:
                    response_data = account.bookmarks()
                    retry_count = 0
                except Exception as retry_error:
                    print(f"  ❌ Failed after retry: {retry_error}")
                    raise
            else:
                # For non-rate-limit errors, wait a bit and retry once
                if retry_count < 2:
                    wait_time = delay_between_requests * 3
                    print(f"  ⏳ Waiting {wait_time}s before retry...")
                    time.sleep(wait_time)
                    retry_count += 1
                    try:
                        response_data = account.bookmarks()
                        retry_count = 0
                    except Exception as retry_error:
                        print(f"  ❌ Failed after retry: {retry_error}")
                        raise
                else:
                    print(f"  ❌ Max retries reached. Stopping.")
                    raise
        # Extract bookmark entries from response
        all_bookmarks = extract_bookmark_entries_from_response(response_data)
        if all_bookmarks:
            print(f"✓ Retrieved {len(all_bookmarks)} bookmarks")
        else:
            print("⚠ No bookmarks found")
    except KeyboardInterrupt:
        print("\n\n⚠ Extraction interrupted by user")
    except Exception as e:
        print(f"\n\n❌ Error occurred: {str(e)}")
        raise
    print(f"\n{'='*80}")
    print(f"Bookmark extraction complete!")
    print(f"  Total bookmarks found: {len(all_bookmarks)}")
    print(f"{'='*80}\n")
    return all_bookmarks
 def save_bookmarks_and_unbookmark(
    account,
    bookmarks,
    output_file="bookmarks.txt",
    delay_between_requests=2.0,
    write_mode="a",
 ):
    """
    Save bookmark URLs to file (newest first) and unbookmark each one.
    Args:
        account: Account instance from twitter.account
        bookmarks: List of tuples [(tweet_id, username), ...]
        output_file: Output file path
        delay_between_requests: Delay in seconds between unbookmark requests
    """
    print(f"\nSaving bookmarks to {output_file} and unbookmarking...")
    print("-" * 50)
    # Read existing content if file exists
    existing_content = ""
    if os.path.exists(output_file):
        with open(output_file, "r") as f:
            existing_content = f.read()
    # Choose whether to prepend or append.
    if write_mode not in ['ask', 'p', 'a']:
        raise ValueError("write_mode must be one of: ask, p, a")
    if write_mode == "ask":
        while True:
            choice = input("Prepend (p) or append (a) new bookmarks? [p/a] (default a): ").strip().lower()
            if choice == "":
                choice = "a"
            if choice in ['p', 'a']:
                break
            print("  ⚠ Invalid choice. Please enter 'p' for prepend or 'a' for append.")
    else:
        choice = write_mode
    prepend = (choice == 'p')
    # Collect new bookmark URLs (newest first)
    new_bookmark_urls = []
    unbookmark_count = 0
    retry_count = 0
    # Process bookmarks (they should already be in order, newest first)
    for tweet_id, username in bookmarks:
        # Construct URL
        if username:
            url = f"https://twitter.com/{username}/status/{tweet_id}"
        else:
            # Fallback if username not available
            url = f"https://twitter.com/i/web/status/{tweet_id}"
        # Add to new bookmarks list
        new_bookmark_urls.append(url)
        # Unbookmark the tweet
        try:
            account.unbookmark(tweet_id)
            unbookmark_count += 1
            retry_count = 0  # Reset retry count on success
            if unbookmark_count % 10 == 0:
                print(f"  ✓ Processed {unbookmark_count}/{len(bookmarks)} bookmarks...")
        except Exception as e:
            error_msg = str(e)
            print(f"\n  ⚠ Error unbookmarking tweet {tweet_id}: {error_msg}")
            # Check if it's a rate limit error
            if is_rate_limit_error(e):
                wait_time = handle_rate_limit_error(e, retry_count)
                time.sleep(wait_time)
                retry_count += 1
                # Retry the unbookmark
                try:
                    account.unbookmark(tweet_id)
                    unbookmark_count += 1
                    retry_count = 0
                except Exception as retry_error:
                    print(f"  ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}")
            else:
                # For other errors, just log and continue
                if retry_count < 2:
                    wait_time = delay_between_requests * 3
                    print(f"  ⏳ Waiting {wait_time}s before retry...")
                    time.sleep(wait_time)
                    retry_count += 1
                    try:
                        account.unbookmark(tweet_id)
                        unbookmark_count += 1
                        retry_count = 0
                    except Exception as retry_error:
                        print(f"  ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}")
                else:
                    print(f"  ❌ Skipping unbookmark for {tweet_id} after max retries")
        # Rate limiting: wait before next unbookmark request
        if delay_between_requests > 0:
            time.sleep(delay_between_requests)
    # Write bookmarks based on user's choice
    with open(output_file, "w") as f:
        if prepend:
            # Write new bookmarks first (prepended), then existing content
            for url in new_bookmark_urls:
                f.write(f"{url}\n")
            if existing_content:
                f.write(existing_content)
        else:
            # Write existing content first, then new bookmarks (appended)
            if existing_content:
                f.write(existing_content)
            for url in new_bookmark_urls:
                f.write(f"{url}\n")
    print(f"\n{'='*80}")
    print(f"Processing complete!")
    print(f"  Total bookmarks saved: {len(bookmarks)}")
    print(f"  Total unbookmarked: {unbookmark_count}")
    print(f"  Output file: {output_file}")
    print(f"{'='*80}\n")
    return {
        "saved_count": len(bookmarks),
        "unbookmarked_count": unbookmark_count,
    }
 def parse_args():
    parser = argparse.ArgumentParser(description="Extract and unbookmark X/Twitter bookmarks.")
    parser.add_argument("--output-file", default="bookmarks.txt", help="Path to output bookmarks file.")
    parser.add_argument(
        "--delay-between-requests",
        type=float,
        default=2.0,
        help="Seconds to wait between unbookmark requests.",
    )
    parser.add_argument(
        "--mode",
        choices=["a", "p", "ask"],
        default="a",
        help="Write mode for bookmark file: append (a), prepend (p), or ask interactively.",
    )
    parser.add_argument(
        "--single-run",
        action="store_true",
        help="Run one extraction pass only.",
    )
    parser.add_argument(
        "--max-runs",
        type=int,
        default=100,
        help="Maximum number of extraction runs when syncing until empty.",
    )
    parser.add_argument(
        "--delay-between-runs",
        type=float,
        default=1.0,
        help="Seconds to wait between extraction runs.",
    )
    return parser.parse_args()
 if __name__ == "__main__":
    args = parse_args()
    # Load cookies
    with open("creds.txt", "r") as file:
        cookie_str = file.read().strip()
    cookie_dict = dict(item.split("=", 1) for item in cookie_str.split(";"))
    # Initialize account
    account = Account(cookies=cookie_dict)
    # Configuration
    delay_between_requests = args.delay_between_requests
    output_file = args.output_file
    total_saved = 0
    total_unbookmarked = 0
    runs = 0
    while runs < args.max_runs:
        runs += 1
        print(f"\nRun {runs}: fetching bookmarks...")
        bookmarks = extract_all_bookmarks(account, delay_between_requests=delay_between_requests)
        if not bookmarks:
            print("\nNo bookmarks found.")
            break
        # Save bookmarks to file and unbookmark them.
        stats = save_bookmarks_and_unbookmark(
            account,
            bookmarks,
            output_file=output_file,
            delay_between_requests=delay_between_requests,
            write_mode=args.mode,
        )
        total_saved += stats["saved_count"]
        total_unbookmarked += stats["unbookmarked_count"]
        print(f"\nSuccessfully processed {len(bookmarks)} bookmarks in run {runs}")
        if args.single_run:
            break
        if stats["unbookmarked_count"] == 0:
            print("No bookmarks were unbookmarked in this run; stopping to avoid an infinite loop.")
            break
        if runs < args.max_runs and args.delay_between_runs > 0:
            time.sleep(args.delay_between_runs)
    if runs >= args.max_runs:
        print(f"\nReached max runs ({args.max_runs}) before bookmarks were fully exhausted.")
    print(f"\nDone. Total saved: {total_saved}, total unbookmarked: {total_unbookmarked}")
--- a/vendor/extract-x-bookmarks/requirements.txt
+++ b/vendor/extract-x-bookmarks/requirements.txt
@ -0,0 +1 @@
 twitter-api-client == 0.10.22