Add /sync_x command with X bookmarks import flow
This commit is contained in:
parent
79073b7a2c
commit
874f3ec570
8 changed files with 975 additions and 2 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -1 +1,3 @@
|
||||||
/target
|
/target
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
|
|
||||||
18
README.md
18
README.md
|
|
@ -30,3 +30,21 @@ user_id = "/run/agenix/readlater-user-id"
|
||||||
```toml
|
```toml
|
||||||
user_id = { file = "/run/agenix/readlater-user-id" }
|
user_id = { file = "/run/agenix/readlater-user-id" }
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### `sync_x`
|
||||||
|
|
||||||
|
`/sync_x` imports X/Twitter bookmarks into Read Later.
|
||||||
|
|
||||||
|
- The bot prompts for the Cloudflare cookie header string (`auth_token` + `ct0`).
|
||||||
|
- It runs `isolate_cookies.py`, then `main.py --mode a`.
|
||||||
|
- Extracted URLs are prepended to Read Later.
|
||||||
|
- Temporary `creds.txt` / `bookmarks.txt` files are removed after import.
|
||||||
|
|
||||||
|
Config example:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[sync_x]
|
||||||
|
source_project_path = "/Users/thegeneralist/personal/bookkeeper/vendor/extract-x-bookmarks"
|
||||||
|
work_dir = "/var/lib/readlater-bot/sync-x"
|
||||||
|
python_bin = "/Users/thegeneralist/personal/extract-x-bookmarks/.venv/bin/python"
|
||||||
|
```
|
||||||
|
|
|
||||||
431
src/main.rs
431
src/main.rs
|
|
@ -3,7 +3,7 @@ use std::fs;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::os::unix::fs::PermissionsExt;
|
use std::os::unix::fs::PermissionsExt;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::process::Command;
|
use std::process::{Command, Stdio};
|
||||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
use anyhow::{anyhow, Context, Result};
|
use anyhow::{anyhow, Context, Result};
|
||||||
|
|
@ -26,6 +26,7 @@ const RESOURCE_PROMPT_TTL_SECS: u64 = 5 * 60;
|
||||||
const PAGE_SIZE: usize = 3;
|
const PAGE_SIZE: usize = 3;
|
||||||
const DOWNLOAD_PROMPT_TTL_SECS: u64 = 5 * 60;
|
const DOWNLOAD_PROMPT_TTL_SECS: u64 = 5 * 60;
|
||||||
const FINISH_TITLE_PROMPT_TTL_SECS: u64 = 5 * 60;
|
const FINISH_TITLE_PROMPT_TTL_SECS: u64 = 5 * 60;
|
||||||
|
const SYNC_X_PROMPT_TTL_SECS: u64 = 10 * 60;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct Config {
|
struct Config {
|
||||||
|
|
@ -38,6 +39,7 @@ struct Config {
|
||||||
data_dir: PathBuf,
|
data_dir: PathBuf,
|
||||||
retry_interval_seconds: Option<u64>,
|
retry_interval_seconds: Option<u64>,
|
||||||
sync: Option<SyncConfig>,
|
sync: Option<SyncConfig>,
|
||||||
|
sync_x: Option<SyncXConfig>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Clone)]
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
|
@ -51,6 +53,7 @@ struct ConfigFile {
|
||||||
data_dir: PathBuf,
|
data_dir: PathBuf,
|
||||||
retry_interval_seconds: Option<u64>,
|
retry_interval_seconds: Option<u64>,
|
||||||
sync: Option<SyncConfig>,
|
sync: Option<SyncConfig>,
|
||||||
|
sync_x: Option<SyncXConfig>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Clone)]
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
|
@ -67,6 +70,15 @@ struct SyncConfig {
|
||||||
token_file: PathBuf,
|
token_file: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Clone)]
|
||||||
|
struct SyncXConfig {
|
||||||
|
source_project_path: PathBuf,
|
||||||
|
#[serde(default)]
|
||||||
|
work_dir: Option<PathBuf>,
|
||||||
|
#[serde(default)]
|
||||||
|
python_bin: Option<PathBuf>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
struct Args {
|
struct Args {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
|
|
@ -257,6 +269,12 @@ struct FinishTitlePrompt {
|
||||||
expires_at: u64,
|
expires_at: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct SyncXCookiePrompt {
|
||||||
|
prompt_message_id: MessageId,
|
||||||
|
expires_at: u64,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
struct UndoSession {
|
struct UndoSession {
|
||||||
chat_id: i64,
|
chat_id: i64,
|
||||||
|
|
@ -319,6 +337,7 @@ struct AppState {
|
||||||
download_pickers: Mutex<HashMap<String, DownloadPickerState>>,
|
download_pickers: Mutex<HashMap<String, DownloadPickerState>>,
|
||||||
download_link_prompts: Mutex<HashMap<i64, DownloadLinkPrompt>>,
|
download_link_prompts: Mutex<HashMap<i64, DownloadLinkPrompt>>,
|
||||||
finish_title_prompts: Mutex<HashMap<i64, FinishTitlePrompt>>,
|
finish_title_prompts: Mutex<HashMap<i64, FinishTitlePrompt>>,
|
||||||
|
sync_x_cookie_prompts: Mutex<HashMap<i64, SyncXCookiePrompt>>,
|
||||||
queue: Mutex<Vec<QueuedOp>>,
|
queue: Mutex<Vec<QueuedOp>>,
|
||||||
undo: Mutex<Vec<UndoRecord>>,
|
undo: Mutex<Vec<UndoRecord>>,
|
||||||
queue_path: PathBuf,
|
queue_path: PathBuf,
|
||||||
|
|
@ -366,6 +385,7 @@ async fn main() -> Result<()> {
|
||||||
download_pickers: Mutex::new(HashMap::new()),
|
download_pickers: Mutex::new(HashMap::new()),
|
||||||
download_link_prompts: Mutex::new(HashMap::new()),
|
download_link_prompts: Mutex::new(HashMap::new()),
|
||||||
finish_title_prompts: Mutex::new(HashMap::new()),
|
finish_title_prompts: Mutex::new(HashMap::new()),
|
||||||
|
sync_x_cookie_prompts: Mutex::new(HashMap::new()),
|
||||||
queue: Mutex::new(load_queue(&queue_path)?),
|
queue: Mutex::new(load_queue(&queue_path)?),
|
||||||
undo: Mutex::new(undo),
|
undo: Mutex::new(undo),
|
||||||
queue_path,
|
queue_path,
|
||||||
|
|
@ -495,6 +515,32 @@ async fn handle_message(
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut expired_sync_x_prompt: Option<SyncXCookiePrompt> = None;
|
||||||
|
let pending_sync_x_prompt = {
|
||||||
|
let mut prompts = state.sync_x_cookie_prompts.lock().await;
|
||||||
|
if let Some(prompt) = prompts.remove(&msg.chat.id.0) {
|
||||||
|
if prompt.expires_at > now_ts() {
|
||||||
|
Some(prompt)
|
||||||
|
} else {
|
||||||
|
expired_sync_x_prompt = Some(prompt);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(prompt) = expired_sync_x_prompt {
|
||||||
|
let _ = bot
|
||||||
|
.delete_message(msg.chat.id, prompt.prompt_message_id)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(prompt) = pending_sync_x_prompt {
|
||||||
|
handle_sync_x_cookie_response(&bot, msg.chat.id, msg.id, &state, &text, prompt).await?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(cmd) = parse_command(&text) {
|
if let Some(cmd) = parse_command(&text) {
|
||||||
let rest = text
|
let rest = text
|
||||||
.splitn(2, |c: char| c.is_whitespace())
|
.splitn(2, |c: char| c.is_whitespace())
|
||||||
|
|
@ -503,7 +549,7 @@ async fn handle_message(
|
||||||
.trim();
|
.trim();
|
||||||
match cmd {
|
match cmd {
|
||||||
"start" | "help" => {
|
"start" | "help" => {
|
||||||
let help = "Send any text to save it. Commands: /add <text>, /list, /search <query>, /download [url], /undos, /reset_peeked, /pull, /pull theirs, /push, /sync. Use --- to split a message into multiple items. In list views, use buttons for Mark Finished, Add Resource, Delete, Random. Quick actions: reply with del/delete to remove the current item, or send norm to normalize links.";
|
let help = "Send any text to save it. Commands: /add <text>, /list, /search <query>, /download [url], /undos, /reset_peeked, /pull, /pull theirs, /push, /sync, /sync_x. Use --- to split a message into multiple items. In list views, use buttons for Mark Finished, Add Resource, Delete, Random. Quick actions: reply with del/delete to remove the current item, or send norm to normalize links.";
|
||||||
bot.send_message(msg.chat.id, help).await?;
|
bot.send_message(msg.chat.id, help).await?;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
@ -559,6 +605,11 @@ async fn handle_message(
|
||||||
let _ = bot.delete_message(msg.chat.id, msg.id).await;
|
let _ = bot.delete_message(msg.chat.id, msg.id).await;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
"sync_x" => {
|
||||||
|
handle_sync_x_command(bot.clone(), msg.clone(), state).await?;
|
||||||
|
let _ = bot.delete_message(msg.chat.id, msg.id).await;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
_ => {
|
_ => {
|
||||||
// Unknown command, fall through as text.
|
// Unknown command, fall through as text.
|
||||||
}
|
}
|
||||||
|
|
@ -1150,6 +1201,86 @@ async fn handle_sync_command(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn handle_sync_x_command(
|
||||||
|
bot: Bot,
|
||||||
|
msg: Message,
|
||||||
|
state: std::sync::Arc<AppState>,
|
||||||
|
) -> Result<()> {
|
||||||
|
if state.config.sync_x.is_none() {
|
||||||
|
send_error(
|
||||||
|
&bot,
|
||||||
|
msg.chat.id,
|
||||||
|
"sync_x not configured. Set settings.sync_x.source_project_path (and optionally settings.sync_x.python_bin/work_dir).",
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let prompt_text = "Paste the Cloudflare cookie header string from x.com (must include auth_token and ct0).";
|
||||||
|
let sent = bot.send_message(msg.chat.id, prompt_text).await?;
|
||||||
|
state.sync_x_cookie_prompts.lock().await.insert(
|
||||||
|
msg.chat.id.0,
|
||||||
|
SyncXCookiePrompt {
|
||||||
|
prompt_message_id: sent.id,
|
||||||
|
expires_at: now_ts() + SYNC_X_PROMPT_TTL_SECS,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn handle_sync_x_cookie_response(
|
||||||
|
bot: &Bot,
|
||||||
|
chat_id: ChatId,
|
||||||
|
message_id: MessageId,
|
||||||
|
state: &std::sync::Arc<AppState>,
|
||||||
|
text: &str,
|
||||||
|
prompt: SyncXCookiePrompt,
|
||||||
|
) -> Result<()> {
|
||||||
|
let cookie_header = text.trim();
|
||||||
|
if cookie_header.is_empty() {
|
||||||
|
send_error(bot, chat_id, "Cookie header is empty. Paste the full header string.").await?;
|
||||||
|
state.sync_x_cookie_prompts.lock().await.insert(
|
||||||
|
chat_id.0,
|
||||||
|
SyncXCookiePrompt {
|
||||||
|
prompt_message_id: prompt.prompt_message_id,
|
||||||
|
expires_at: now_ts() + SYNC_X_PROMPT_TTL_SECS,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
let _ = bot.delete_message(chat_id, message_id).await;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let _ = bot.delete_message(chat_id, prompt.prompt_message_id).await;
|
||||||
|
let _ = bot.delete_message(chat_id, message_id).await;
|
||||||
|
|
||||||
|
let status_msg = bot.send_message(chat_id, "Syncing X bookmarks...").await?;
|
||||||
|
let config = state.config.clone();
|
||||||
|
let cookie_header = cookie_header.to_string();
|
||||||
|
let outcome = tokio::task::spawn_blocking(move || run_sync_x(&config, &cookie_header))
|
||||||
|
.await
|
||||||
|
.context("sync_x task failed")?;
|
||||||
|
let _ = bot.delete_message(chat_id, status_msg.id).await;
|
||||||
|
|
||||||
|
match outcome {
|
||||||
|
Ok(sync_outcome) => {
|
||||||
|
if sync_outcome.extracted_count == 0 {
|
||||||
|
send_ephemeral(bot, chat_id, "No X bookmarks found.", ACK_TTL_SECS).await?;
|
||||||
|
} else {
|
||||||
|
let text = format!(
|
||||||
|
"X sync complete: extracted {}, added {}, skipped {} duplicates.",
|
||||||
|
sync_outcome.extracted_count, sync_outcome.added_count, sync_outcome.duplicate_count
|
||||||
|
);
|
||||||
|
bot.send_message(chat_id, text).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
send_error(bot, chat_id, &format!("sync_x failed: {}", err)).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
async fn handle_undos_command(
|
async fn handle_undos_command(
|
||||||
bot: Bot,
|
bot: Bot,
|
||||||
msg: Message,
|
msg: Message,
|
||||||
|
|
@ -2852,6 +2983,13 @@ enum SyncOutcome {
|
||||||
Synced,
|
Synced,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct SyncXOutcome {
|
||||||
|
extracted_count: usize,
|
||||||
|
added_count: usize,
|
||||||
|
duplicate_count: usize,
|
||||||
|
}
|
||||||
|
|
||||||
async fn queue_op(state: &std::sync::Arc<AppState>, op: QueuedOp) -> Result<()> {
|
async fn queue_op(state: &std::sync::Arc<AppState>, op: QueuedOp) -> Result<()> {
|
||||||
let mut queue = state.queue.lock().await;
|
let mut queue = state.queue.lock().await;
|
||||||
queue.push(op);
|
queue.push(op);
|
||||||
|
|
@ -3155,6 +3293,233 @@ fn run_sync(sync: &SyncConfig) -> Result<SyncOutcome> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn run_sync_x(config: &Config, cookie_header: &str) -> Result<SyncXOutcome> {
|
||||||
|
let sync_x = config
|
||||||
|
.sync_x
|
||||||
|
.as_ref()
|
||||||
|
.ok_or_else(|| anyhow!("sync_x is not configured."))?;
|
||||||
|
|
||||||
|
let source_project = &sync_x.source_project_path;
|
||||||
|
if !source_project.exists() {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"sync_x source project path not found: {}",
|
||||||
|
source_project.display()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if !source_project.is_dir() {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"sync_x source project path is not a directory: {}",
|
||||||
|
source_project.display()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let work_dir = sync_x
|
||||||
|
.work_dir
|
||||||
|
.clone()
|
||||||
|
.unwrap_or_else(|| config.data_dir.join("sync-x"));
|
||||||
|
prepare_sync_x_workspace(source_project, &work_dir)?;
|
||||||
|
|
||||||
|
let python_bin = resolve_sync_x_python_bin(sync_x);
|
||||||
|
let creds_path = work_dir.join("creds.txt");
|
||||||
|
let bookmarks_path = work_dir.join("bookmarks.txt");
|
||||||
|
let _ = fs::remove_file(&creds_path);
|
||||||
|
let _ = fs::remove_file(&bookmarks_path);
|
||||||
|
|
||||||
|
run_python_script(
|
||||||
|
&python_bin,
|
||||||
|
&work_dir,
|
||||||
|
"isolate_cookies.py",
|
||||||
|
&[],
|
||||||
|
Some(cookie_header),
|
||||||
|
)?;
|
||||||
|
run_python_script(&python_bin, &work_dir, "main.py", &["--mode", "a"], None)?;
|
||||||
|
|
||||||
|
let urls = if bookmarks_path.exists() {
|
||||||
|
read_sync_x_urls(&bookmarks_path)?
|
||||||
|
} else {
|
||||||
|
Vec::new()
|
||||||
|
};
|
||||||
|
let (added_count, duplicate_count) = prepend_urls_to_read_later_sync(&config.read_later_path, &urls)?;
|
||||||
|
|
||||||
|
let _ = fs::remove_file(&bookmarks_path);
|
||||||
|
let _ = fs::remove_file(&creds_path);
|
||||||
|
|
||||||
|
Ok(SyncXOutcome {
|
||||||
|
extracted_count: urls.len(),
|
||||||
|
added_count,
|
||||||
|
duplicate_count,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_sync_x_python_bin(sync_x: &SyncXConfig) -> PathBuf {
|
||||||
|
if let Some(path) = &sync_x.python_bin {
|
||||||
|
return path.clone();
|
||||||
|
}
|
||||||
|
let venv_python3 = sync_x.source_project_path.join(".venv/bin/python3");
|
||||||
|
if venv_python3.exists() {
|
||||||
|
return venv_python3;
|
||||||
|
}
|
||||||
|
let venv_python = sync_x.source_project_path.join(".venv/bin/python");
|
||||||
|
if venv_python.exists() {
|
||||||
|
return venv_python;
|
||||||
|
}
|
||||||
|
PathBuf::from("python3")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prepare_sync_x_workspace(source_project: &Path, work_dir: &Path) -> Result<()> {
|
||||||
|
fs::create_dir_all(work_dir)
|
||||||
|
.with_context(|| format!("create sync_x work dir {}", work_dir.display()))?;
|
||||||
|
|
||||||
|
for file in [
|
||||||
|
"main.py",
|
||||||
|
"isolate_cookies.py",
|
||||||
|
"requirements.txt",
|
||||||
|
"README.md",
|
||||||
|
"LICENSE",
|
||||||
|
] {
|
||||||
|
let src = source_project.join(file);
|
||||||
|
let dest = work_dir.join(file);
|
||||||
|
if !src.exists() {
|
||||||
|
if matches!(file, "main.py" | "isolate_cookies.py") {
|
||||||
|
return Err(anyhow!(
|
||||||
|
"sync_x source is missing required file: {}",
|
||||||
|
src.display()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
fs::copy(&src, &dest)
|
||||||
|
.with_context(|| format!("copy {} to {}", src.display(), dest.display()))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_python_script(
|
||||||
|
python_bin: &Path,
|
||||||
|
work_dir: &Path,
|
||||||
|
script: &str,
|
||||||
|
args: &[&str],
|
||||||
|
stdin_input: Option<&str>,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut cmd = Command::new(python_bin);
|
||||||
|
cmd.current_dir(work_dir)
|
||||||
|
.arg(script)
|
||||||
|
.args(args)
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped());
|
||||||
|
|
||||||
|
if stdin_input.is_some() {
|
||||||
|
cmd.stdin(Stdio::piped());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut child = cmd
|
||||||
|
.spawn()
|
||||||
|
.with_context(|| format!("run {} {}", python_bin.display(), script))?;
|
||||||
|
if let Some(input) = stdin_input {
|
||||||
|
if let Some(mut stdin) = child.stdin.take() {
|
||||||
|
stdin
|
||||||
|
.write_all(input.as_bytes())
|
||||||
|
.context("write stdin to python script")?;
|
||||||
|
if !input.ends_with('\n') {
|
||||||
|
stdin
|
||||||
|
.write_all(b"\n")
|
||||||
|
.context("write newline to python script")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let output = child.wait_with_output().context("wait for python script")?;
|
||||||
|
if !output.status.success() {
|
||||||
|
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
|
||||||
|
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
|
||||||
|
let tail = summarize_process_output(&stdout, &stderr);
|
||||||
|
return Err(anyhow!(
|
||||||
|
"{} {} failed (status {}):\n{}",
|
||||||
|
python_bin.display(),
|
||||||
|
script,
|
||||||
|
output.status,
|
||||||
|
tail
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn summarize_process_output(stdout: &str, stderr: &str) -> String {
|
||||||
|
let stderr_trimmed = stderr.trim();
|
||||||
|
if !stderr_trimmed.is_empty() {
|
||||||
|
return trim_tail(stderr_trimmed, 1200);
|
||||||
|
}
|
||||||
|
let stdout_trimmed = stdout.trim();
|
||||||
|
if !stdout_trimmed.is_empty() {
|
||||||
|
return trim_tail(stdout_trimmed, 1200);
|
||||||
|
}
|
||||||
|
"No output captured.".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn trim_tail(text: &str, max_chars: usize) -> String {
|
||||||
|
if text.len() <= max_chars {
|
||||||
|
return text.to_string();
|
||||||
|
}
|
||||||
|
let mut cutoff = 0usize;
|
||||||
|
for (idx, _) in text.char_indices() {
|
||||||
|
if idx >= text.len().saturating_sub(max_chars) {
|
||||||
|
cutoff = idx;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
format!("...{}", &text[cutoff..])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_sync_x_urls(path: &Path) -> Result<Vec<String>> {
|
||||||
|
let contents =
|
||||||
|
fs::read_to_string(path).with_context(|| format!("read bookmarks file {}", path.display()))?;
|
||||||
|
let mut seen = HashSet::new();
|
||||||
|
let mut urls = Vec::new();
|
||||||
|
for line in contents.lines() {
|
||||||
|
let trimmed = line.trim();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if !(trimmed.starts_with("http://") || trimmed.starts_with("https://")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if seen.insert(trimmed.to_string()) {
|
||||||
|
urls.push(trimmed.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(urls)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prepend_urls_to_read_later_sync(path: &Path, urls: &[String]) -> Result<(usize, usize)> {
|
||||||
|
let (preamble, mut entries) = read_entries(path)?;
|
||||||
|
let mut existing = HashSet::new();
|
||||||
|
for entry in &entries {
|
||||||
|
existing.insert(entry.block_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut new_entries = Vec::new();
|
||||||
|
let mut duplicate_count = 0usize;
|
||||||
|
for url in urls {
|
||||||
|
let entry = EntryBlock::from_text(url);
|
||||||
|
let block = entry.block_string();
|
||||||
|
if existing.insert(block) {
|
||||||
|
new_entries.push(entry);
|
||||||
|
} else {
|
||||||
|
duplicate_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !new_entries.is_empty() {
|
||||||
|
for entry in new_entries.iter().rev() {
|
||||||
|
entries.insert(0, entry.clone());
|
||||||
|
}
|
||||||
|
write_entries(path, &preamble, &entries)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((new_entries.len(), duplicate_count))
|
||||||
|
}
|
||||||
|
|
||||||
struct GitOutput {
|
struct GitOutput {
|
||||||
status: std::process::ExitStatus,
|
status: std::process::ExitStatus,
|
||||||
stdout: String,
|
stdout: String,
|
||||||
|
|
@ -4543,6 +4908,17 @@ fn load_config(path: &Path) -> Result<Config> {
|
||||||
.unwrap_or_else(|| Path::new("."))
|
.unwrap_or_else(|| Path::new("."))
|
||||||
.join("Misc/images_misc");
|
.join("Misc/images_misc");
|
||||||
let media_dir = config_file.media_dir.unwrap_or(default_media_dir);
|
let media_dir = config_file.media_dir.unwrap_or(default_media_dir);
|
||||||
|
let sync_x = config_file.sync_x.map(|sync_x| SyncXConfig {
|
||||||
|
source_project_path: resolve_user_id_path(&sync_x.source_project_path, config_dir),
|
||||||
|
work_dir: sync_x
|
||||||
|
.work_dir
|
||||||
|
.as_ref()
|
||||||
|
.map(|p| resolve_user_id_path(p, config_dir)),
|
||||||
|
python_bin: sync_x
|
||||||
|
.python_bin
|
||||||
|
.as_ref()
|
||||||
|
.map(|p| resolve_user_id_path(p, config_dir)),
|
||||||
|
});
|
||||||
Ok(Config {
|
Ok(Config {
|
||||||
token: config_file.token,
|
token: config_file.token,
|
||||||
user_id,
|
user_id,
|
||||||
|
|
@ -4553,6 +4929,7 @@ fn load_config(path: &Path) -> Result<Config> {
|
||||||
data_dir: config_file.data_dir,
|
data_dir: config_file.data_dir,
|
||||||
retry_interval_seconds: config_file.retry_interval_seconds,
|
retry_interval_seconds: config_file.retry_interval_seconds,
|
||||||
sync: config_file.sync,
|
sync: config_file.sync,
|
||||||
|
sync_x,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -5123,6 +5500,7 @@ mod tests {
|
||||||
data_dir: PathBuf::from("/tmp/data"),
|
data_dir: PathBuf::from("/tmp/data"),
|
||||||
retry_interval_seconds: None,
|
retry_interval_seconds: None,
|
||||||
sync: None,
|
sync: None,
|
||||||
|
sync_x: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -5464,4 +5842,53 @@ mod tests {
|
||||||
};
|
};
|
||||||
assert!(is_push_up_to_date(&output));
|
assert!(is_push_up_to_date(&output));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn read_sync_x_urls_keeps_unique_http_lines() {
|
||||||
|
let temp = TempDir::new().unwrap();
|
||||||
|
let path = temp.path().join("bookmarks.txt");
|
||||||
|
fs::write(
|
||||||
|
&path,
|
||||||
|
"https://a.example\n\nnot-a-url\nhttps://b.example\nhttps://a.example\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let urls = read_sync_x_urls(&path).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
urls,
|
||||||
|
vec![
|
||||||
|
"https://a.example".to_string(),
|
||||||
|
"https://b.example".to_string()
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prepend_urls_to_read_later_sync_preserves_input_order() {
|
||||||
|
let temp = TempDir::new().unwrap();
|
||||||
|
let path = temp.path().join("read-later.md");
|
||||||
|
fs::write(&path, "- https://already.example\n").unwrap();
|
||||||
|
let urls = vec![
|
||||||
|
"https://one.example".to_string(),
|
||||||
|
"https://two.example".to_string(),
|
||||||
|
"https://already.example".to_string(),
|
||||||
|
];
|
||||||
|
|
||||||
|
let (added, duplicates) = prepend_urls_to_read_later_sync(&path, &urls).unwrap();
|
||||||
|
assert_eq!(added, 2);
|
||||||
|
assert_eq!(duplicates, 1);
|
||||||
|
|
||||||
|
let (_, entries) = read_entries(&path).unwrap();
|
||||||
|
let blocks = entries
|
||||||
|
.iter()
|
||||||
|
.map(|entry| entry.block_string())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
assert_eq!(
|
||||||
|
blocks,
|
||||||
|
vec![
|
||||||
|
"- https://one.example".to_string(),
|
||||||
|
"- https://two.example".to_string(),
|
||||||
|
"- https://already.example".to_string(),
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
21
vendor/extract-x-bookmarks/LICENSE
vendored
Normal file
21
vendor/extract-x-bookmarks/LICENSE
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025-present thegeneralist01
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
46
vendor/extract-x-bookmarks/README.md
vendored
Normal file
46
vendor/extract-x-bookmarks/README.md
vendored
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
# extract-x-bookmarks
|
||||||
|
Written by ChatGPT, not me.
|
||||||
|
|
||||||
|
[Works fine](https://x.com/thegeneralist01/status/2003819489989926932).
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
- Python
|
||||||
|
- Dependencies in `requirements.txt`
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
- Create a virtual environment if you want (`uv venv .venv` or whatever), then activate it. (This is absolutely optional).
|
||||||
|
|
||||||
|
- Install dependencies:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
- Now, to get the **Cloudflare cookies,** this is quickest way I use:
|
||||||
|
- Download the [Cookie-Editor extension](https://cookie-editor.com/).
|
||||||
|
- Open your browser and go to [Twitter](https://x.com).
|
||||||
|
- Open the Cookie-Editor extension, press the export button (bottom right) and export as Header String. (It will copy it to the clipboard).
|
||||||
|
- Open the terminal, and run (the python file):
|
||||||
|
```bash
|
||||||
|
python isolate_cookies.py
|
||||||
|
```
|
||||||
|
- Paste the clipboard content.
|
||||||
|
- It will then put the two needed cookies into `creds.txt`, which the script will use.
|
||||||
|
- **If you want to do that without installing the extension,** the `creds.txt` file will have the following format:
|
||||||
|
```
|
||||||
|
auth_token=blablabla;ct0=blablabla
|
||||||
|
```
|
||||||
|
|
||||||
|
- A few things to know before running the script:
|
||||||
|
- It will create a `bookmarks.txt` file with the URLs of your bookmarks.
|
||||||
|
- The script fetches about 90 bookmarks per run. That means you might want to continually run it until you have no cookies left.
|
||||||
|
- A run writes (appends, really) URLs in a descending order (newest first).
|
||||||
|
- It might ask you whether to prepend or append the URLs - whether a new run should add URLs to the start or end of the file. **Generally, for a linear timeline, you want to append,** so: `a`.
|
||||||
|
- It will take some time in the end to **unbookmark** the fetched bookmarks. Each time 10 new bookmarks are unbookmarked, it will print a message.
|
||||||
|
|
||||||
|
- Run the script until you have all your bookmarks extracted:
|
||||||
|
```bash
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
Licensed under the [MIT License](LICENSE).
|
||||||
19
vendor/extract-x-bookmarks/isolate_cookies.py
vendored
Normal file
19
vendor/extract-x-bookmarks/isolate_cookies.py
vendored
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
cookie_str = input("Input your cookies in the Header String format: ").strip()
|
||||||
|
|
||||||
|
cookie_dict = {}
|
||||||
|
for item in cookie_str.split(";"):
|
||||||
|
part = item.strip()
|
||||||
|
if not part or "=" not in part:
|
||||||
|
continue
|
||||||
|
key, value = part.split("=", 1)
|
||||||
|
cookie_dict[key.strip()] = value.strip()
|
||||||
|
|
||||||
|
auth_token = cookie_dict.get("auth_token", "")
|
||||||
|
ct0 = cookie_dict.get("ct0", "")
|
||||||
|
if not auth_token or not ct0:
|
||||||
|
raise SystemExit("Missing auth_token or ct0 in the provided cookie header.")
|
||||||
|
|
||||||
|
login_string = f"auth_token={auth_token};ct0={ct0}"
|
||||||
|
|
||||||
|
with open("creds.txt", "w") as file:
|
||||||
|
file.write(login_string)
|
||||||
439
vendor/extract-x-bookmarks/main.py
vendored
Normal file
439
vendor/extract-x-bookmarks/main.py
vendored
Normal file
|
|
@ -0,0 +1,439 @@
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
from twitter.account import Account
|
||||||
|
|
||||||
|
def is_rate_limit_error(error):
|
||||||
|
"""
|
||||||
|
Check if an error is a rate limit error (429 Too Many Requests).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error: Exception object or error message
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if it's a rate limit error, False otherwise
|
||||||
|
"""
|
||||||
|
error_str = str(error).lower()
|
||||||
|
# Check for common rate limit indicators
|
||||||
|
rate_limit_indicators = [
|
||||||
|
'429',
|
||||||
|
'too many requests',
|
||||||
|
'rate limit',
|
||||||
|
'rate_limit',
|
||||||
|
'exceeded',
|
||||||
|
'quota',
|
||||||
|
'limit exceeded'
|
||||||
|
]
|
||||||
|
return any(indicator in error_str for indicator in rate_limit_indicators)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_rate_limit_error(error, retry_count, base_wait_time=60):
|
||||||
|
"""
|
||||||
|
Handle rate limit errors with exponential backoff.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error: The exception that occurred
|
||||||
|
retry_count: Number of times we've retried
|
||||||
|
base_wait_time: Base wait time in seconds (default 60s = 1 minute)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Wait time in seconds before retrying
|
||||||
|
"""
|
||||||
|
# Exponential backoff: 1min, 2min, 4min, 8min, etc.
|
||||||
|
wait_time = base_wait_time * (2 ** retry_count)
|
||||||
|
# Cap at 15 minutes (900 seconds)
|
||||||
|
wait_time = min(wait_time, 900)
|
||||||
|
|
||||||
|
print(f"\n ⚠ Rate limit detected (attempt {retry_count + 1})")
|
||||||
|
print(f" ⏳ Waiting {wait_time}s ({wait_time/60:.1f} minutes) before retry...")
|
||||||
|
|
||||||
|
return wait_time
|
||||||
|
|
||||||
|
|
||||||
|
def extract_bookmark_entries_from_response(response_data):
|
||||||
|
"""
|
||||||
|
Extract bookmark entries (tweet IDs and user info) from the response.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_data: The response data from account.bookmarks()
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of tuples: [(tweet_id, username), ...]
|
||||||
|
"""
|
||||||
|
bookmark_entries = []
|
||||||
|
seen_ids = set()
|
||||||
|
|
||||||
|
def add_entry(tweet_id, username):
|
||||||
|
tid = str(tweet_id).strip()
|
||||||
|
if not tid or tid in seen_ids:
|
||||||
|
return
|
||||||
|
seen_ids.add(tid)
|
||||||
|
bookmark_entries.append((tid, username))
|
||||||
|
|
||||||
|
try:
|
||||||
|
# First, check if response is a simple list of tweet IDs or tweet objects.
|
||||||
|
payloads = []
|
||||||
|
if isinstance(response_data, list):
|
||||||
|
# Check if it's a list of simple values (tweet IDs)
|
||||||
|
if len(response_data) > 0 and isinstance(response_data[0], (str, int)):
|
||||||
|
# Simple list of tweet IDs
|
||||||
|
for tid in response_data:
|
||||||
|
add_entry(tid, None)
|
||||||
|
return bookmark_entries
|
||||||
|
# Check if it's a list of tweet objects
|
||||||
|
elif len(response_data) > 0 and isinstance(response_data[0], dict):
|
||||||
|
# If it has 'id' or 'id_str' field, it might be a simple tweet object
|
||||||
|
if 'id' in response_data[0] or 'id_str' in response_data[0]:
|
||||||
|
for item in response_data:
|
||||||
|
tweet_id = item.get('id_str') or str(item.get('id', ''))
|
||||||
|
username = item.get('user', {}).get('screen_name') if 'user' in item else None
|
||||||
|
if tweet_id:
|
||||||
|
add_entry(tweet_id, username)
|
||||||
|
return bookmark_entries
|
||||||
|
|
||||||
|
# Otherwise, treat as paginated GraphQL response structure.
|
||||||
|
payloads = [item for item in response_data if isinstance(item, dict)]
|
||||||
|
elif isinstance(response_data, dict):
|
||||||
|
payloads = [response_data]
|
||||||
|
else:
|
||||||
|
return bookmark_entries
|
||||||
|
|
||||||
|
for data in payloads:
|
||||||
|
# Navigate through the nested GraphQL structure (similar to tweets structure).
|
||||||
|
timeline = data.get('data', {}).get('bookmark_timeline_v2', {}).get('timeline', {})
|
||||||
|
if not timeline:
|
||||||
|
# Try alternative path.
|
||||||
|
timeline = data.get('data', {}).get('user', {}).get('result', {}).get('timeline_v2', {}).get('timeline', {})
|
||||||
|
|
||||||
|
instructions = timeline.get('instructions', [])
|
||||||
|
|
||||||
|
for instruction in instructions:
|
||||||
|
if instruction.get('type') == 'TimelineAddEntries':
|
||||||
|
entries = instruction.get('entries', [])
|
||||||
|
for entry in entries:
|
||||||
|
content = entry.get('content', {})
|
||||||
|
# Extract bookmark entries
|
||||||
|
if content.get('entryType') == 'TimelineTimelineItem':
|
||||||
|
item_content = content.get('itemContent', {})
|
||||||
|
if item_content.get('itemType') == 'TimelineTweet':
|
||||||
|
tweet_result = item_content.get('tweet_results', {}).get('result', {})
|
||||||
|
# Get rest_id (the tweet ID)
|
||||||
|
tweet_id = tweet_result.get('rest_id')
|
||||||
|
|
||||||
|
# Get username from tweet result
|
||||||
|
username = None
|
||||||
|
# Try to get username from user info in tweet
|
||||||
|
user_info = tweet_result.get('core', {}).get('user_results', {}).get('result', {})
|
||||||
|
if user_info:
|
||||||
|
legacy_user = user_info.get('legacy', {})
|
||||||
|
if legacy_user:
|
||||||
|
username = legacy_user.get('screen_name')
|
||||||
|
|
||||||
|
if tweet_id:
|
||||||
|
add_entry(tweet_id, username)
|
||||||
|
|
||||||
|
return bookmark_entries
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ⚠ Warning: Error extracting bookmark entries: {e}")
|
||||||
|
return bookmark_entries
|
||||||
|
|
||||||
|
|
||||||
|
def extract_all_bookmarks(account, delay_between_requests=2.0):
|
||||||
|
"""
|
||||||
|
Extract all bookmarks from the account with proper rate limit handling.
|
||||||
|
Account.bookmarks() returns all bookmarks in a single call.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
account: Account instance from twitter.account
|
||||||
|
delay_between_requests: Delay in seconds between requests (not used for single call, but kept for consistency)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of tuples: [(tweet_id, username), ...] (newest first)
|
||||||
|
"""
|
||||||
|
all_bookmarks = []
|
||||||
|
retry_count = 0
|
||||||
|
|
||||||
|
print("Starting to extract bookmarks...")
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("Fetching bookmarks...", end=" ")
|
||||||
|
|
||||||
|
# Fetch all bookmarks (single call, no pagination needed)
|
||||||
|
try:
|
||||||
|
response_data = account.bookmarks()
|
||||||
|
retry_count = 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
print(f"\n ❌ Error fetching bookmarks: {error_msg}")
|
||||||
|
|
||||||
|
# Check if it's a rate limit error
|
||||||
|
if is_rate_limit_error(e):
|
||||||
|
wait_time = handle_rate_limit_error(e, retry_count)
|
||||||
|
time.sleep(wait_time)
|
||||||
|
retry_count += 1
|
||||||
|
# Retry the request
|
||||||
|
try:
|
||||||
|
response_data = account.bookmarks()
|
||||||
|
retry_count = 0
|
||||||
|
except Exception as retry_error:
|
||||||
|
print(f" ❌ Failed after retry: {retry_error}")
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
# For non-rate-limit errors, wait a bit and retry once
|
||||||
|
if retry_count < 2:
|
||||||
|
wait_time = delay_between_requests * 3
|
||||||
|
print(f" ⏳ Waiting {wait_time}s before retry...")
|
||||||
|
time.sleep(wait_time)
|
||||||
|
retry_count += 1
|
||||||
|
try:
|
||||||
|
response_data = account.bookmarks()
|
||||||
|
retry_count = 0
|
||||||
|
except Exception as retry_error:
|
||||||
|
print(f" ❌ Failed after retry: {retry_error}")
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
print(f" ❌ Max retries reached. Stopping.")
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Extract bookmark entries from response
|
||||||
|
all_bookmarks = extract_bookmark_entries_from_response(response_data)
|
||||||
|
|
||||||
|
if all_bookmarks:
|
||||||
|
print(f"✓ Retrieved {len(all_bookmarks)} bookmarks")
|
||||||
|
else:
|
||||||
|
print("⚠ No bookmarks found")
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n\n⚠ Extraction interrupted by user")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n\n❌ Error occurred: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print(f"Bookmark extraction complete!")
|
||||||
|
print(f" Total bookmarks found: {len(all_bookmarks)}")
|
||||||
|
print(f"{'='*80}\n")
|
||||||
|
|
||||||
|
return all_bookmarks
|
||||||
|
|
||||||
|
|
||||||
|
def save_bookmarks_and_unbookmark(
|
||||||
|
account,
|
||||||
|
bookmarks,
|
||||||
|
output_file="bookmarks.txt",
|
||||||
|
delay_between_requests=2.0,
|
||||||
|
write_mode="a",
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Save bookmark URLs to file (newest first) and unbookmark each one.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
account: Account instance from twitter.account
|
||||||
|
bookmarks: List of tuples [(tweet_id, username), ...]
|
||||||
|
output_file: Output file path
|
||||||
|
delay_between_requests: Delay in seconds between unbookmark requests
|
||||||
|
"""
|
||||||
|
print(f"\nSaving bookmarks to {output_file} and unbookmarking...")
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
# Read existing content if file exists
|
||||||
|
existing_content = ""
|
||||||
|
if os.path.exists(output_file):
|
||||||
|
with open(output_file, "r") as f:
|
||||||
|
existing_content = f.read()
|
||||||
|
|
||||||
|
# Choose whether to prepend or append.
|
||||||
|
if write_mode not in ['ask', 'p', 'a']:
|
||||||
|
raise ValueError("write_mode must be one of: ask, p, a")
|
||||||
|
|
||||||
|
if write_mode == "ask":
|
||||||
|
while True:
|
||||||
|
choice = input("Prepend (p) or append (a) new bookmarks? [p/a] (default a): ").strip().lower()
|
||||||
|
if choice == "":
|
||||||
|
choice = "a"
|
||||||
|
if choice in ['p', 'a']:
|
||||||
|
break
|
||||||
|
print(" ⚠ Invalid choice. Please enter 'p' for prepend or 'a' for append.")
|
||||||
|
else:
|
||||||
|
choice = write_mode
|
||||||
|
|
||||||
|
prepend = (choice == 'p')
|
||||||
|
|
||||||
|
# Collect new bookmark URLs (newest first)
|
||||||
|
new_bookmark_urls = []
|
||||||
|
unbookmark_count = 0
|
||||||
|
retry_count = 0
|
||||||
|
|
||||||
|
# Process bookmarks (they should already be in order, newest first)
|
||||||
|
for tweet_id, username in bookmarks:
|
||||||
|
# Construct URL
|
||||||
|
if username:
|
||||||
|
url = f"https://twitter.com/{username}/status/{tweet_id}"
|
||||||
|
else:
|
||||||
|
# Fallback if username not available
|
||||||
|
url = f"https://twitter.com/i/web/status/{tweet_id}"
|
||||||
|
|
||||||
|
# Add to new bookmarks list
|
||||||
|
new_bookmark_urls.append(url)
|
||||||
|
|
||||||
|
# Unbookmark the tweet
|
||||||
|
try:
|
||||||
|
account.unbookmark(tweet_id)
|
||||||
|
unbookmark_count += 1
|
||||||
|
retry_count = 0 # Reset retry count on success
|
||||||
|
|
||||||
|
if unbookmark_count % 10 == 0:
|
||||||
|
print(f" ✓ Processed {unbookmark_count}/{len(bookmarks)} bookmarks...")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
print(f"\n ⚠ Error unbookmarking tweet {tweet_id}: {error_msg}")
|
||||||
|
|
||||||
|
# Check if it's a rate limit error
|
||||||
|
if is_rate_limit_error(e):
|
||||||
|
wait_time = handle_rate_limit_error(e, retry_count)
|
||||||
|
time.sleep(wait_time)
|
||||||
|
retry_count += 1
|
||||||
|
# Retry the unbookmark
|
||||||
|
try:
|
||||||
|
account.unbookmark(tweet_id)
|
||||||
|
unbookmark_count += 1
|
||||||
|
retry_count = 0
|
||||||
|
except Exception as retry_error:
|
||||||
|
print(f" ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}")
|
||||||
|
else:
|
||||||
|
# For other errors, just log and continue
|
||||||
|
if retry_count < 2:
|
||||||
|
wait_time = delay_between_requests * 3
|
||||||
|
print(f" ⏳ Waiting {wait_time}s before retry...")
|
||||||
|
time.sleep(wait_time)
|
||||||
|
retry_count += 1
|
||||||
|
try:
|
||||||
|
account.unbookmark(tweet_id)
|
||||||
|
unbookmark_count += 1
|
||||||
|
retry_count = 0
|
||||||
|
except Exception as retry_error:
|
||||||
|
print(f" ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}")
|
||||||
|
else:
|
||||||
|
print(f" ❌ Skipping unbookmark for {tweet_id} after max retries")
|
||||||
|
|
||||||
|
# Rate limiting: wait before next unbookmark request
|
||||||
|
if delay_between_requests > 0:
|
||||||
|
time.sleep(delay_between_requests)
|
||||||
|
|
||||||
|
# Write bookmarks based on user's choice
|
||||||
|
with open(output_file, "w") as f:
|
||||||
|
if prepend:
|
||||||
|
# Write new bookmarks first (prepended), then existing content
|
||||||
|
for url in new_bookmark_urls:
|
||||||
|
f.write(f"{url}\n")
|
||||||
|
if existing_content:
|
||||||
|
f.write(existing_content)
|
||||||
|
else:
|
||||||
|
# Write existing content first, then new bookmarks (appended)
|
||||||
|
if existing_content:
|
||||||
|
f.write(existing_content)
|
||||||
|
for url in new_bookmark_urls:
|
||||||
|
f.write(f"{url}\n")
|
||||||
|
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print(f"Processing complete!")
|
||||||
|
print(f" Total bookmarks saved: {len(bookmarks)}")
|
||||||
|
print(f" Total unbookmarked: {unbookmark_count}")
|
||||||
|
print(f" Output file: {output_file}")
|
||||||
|
print(f"{'='*80}\n")
|
||||||
|
return {
|
||||||
|
"saved_count": len(bookmarks),
|
||||||
|
"unbookmarked_count": unbookmark_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description="Extract and unbookmark X/Twitter bookmarks.")
|
||||||
|
parser.add_argument("--output-file", default="bookmarks.txt", help="Path to output bookmarks file.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--delay-between-requests",
|
||||||
|
type=float,
|
||||||
|
default=2.0,
|
||||||
|
help="Seconds to wait between unbookmark requests.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--mode",
|
||||||
|
choices=["a", "p", "ask"],
|
||||||
|
default="a",
|
||||||
|
help="Write mode for bookmark file: append (a), prepend (p), or ask interactively.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--single-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Run one extraction pass only.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-runs",
|
||||||
|
type=int,
|
||||||
|
default=100,
|
||||||
|
help="Maximum number of extraction runs when syncing until empty.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--delay-between-runs",
|
||||||
|
type=float,
|
||||||
|
default=1.0,
|
||||||
|
help="Seconds to wait between extraction runs.",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
# Load cookies
|
||||||
|
with open("creds.txt", "r") as file:
|
||||||
|
cookie_str = file.read().strip()
|
||||||
|
cookie_dict = dict(item.split("=", 1) for item in cookie_str.split(";"))
|
||||||
|
|
||||||
|
# Initialize account
|
||||||
|
account = Account(cookies=cookie_dict)
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
delay_between_requests = args.delay_between_requests
|
||||||
|
output_file = args.output_file
|
||||||
|
|
||||||
|
total_saved = 0
|
||||||
|
total_unbookmarked = 0
|
||||||
|
runs = 0
|
||||||
|
|
||||||
|
while runs < args.max_runs:
|
||||||
|
runs += 1
|
||||||
|
print(f"\nRun {runs}: fetching bookmarks...")
|
||||||
|
bookmarks = extract_all_bookmarks(account, delay_between_requests=delay_between_requests)
|
||||||
|
|
||||||
|
if not bookmarks:
|
||||||
|
print("\nNo bookmarks found.")
|
||||||
|
break
|
||||||
|
|
||||||
|
# Save bookmarks to file and unbookmark them.
|
||||||
|
stats = save_bookmarks_and_unbookmark(
|
||||||
|
account,
|
||||||
|
bookmarks,
|
||||||
|
output_file=output_file,
|
||||||
|
delay_between_requests=delay_between_requests,
|
||||||
|
write_mode=args.mode,
|
||||||
|
)
|
||||||
|
total_saved += stats["saved_count"]
|
||||||
|
total_unbookmarked += stats["unbookmarked_count"]
|
||||||
|
print(f"\nSuccessfully processed {len(bookmarks)} bookmarks in run {runs}")
|
||||||
|
|
||||||
|
if args.single_run:
|
||||||
|
break
|
||||||
|
if stats["unbookmarked_count"] == 0:
|
||||||
|
print("No bookmarks were unbookmarked in this run; stopping to avoid an infinite loop.")
|
||||||
|
break
|
||||||
|
if runs < args.max_runs and args.delay_between_runs > 0:
|
||||||
|
time.sleep(args.delay_between_runs)
|
||||||
|
|
||||||
|
if runs >= args.max_runs:
|
||||||
|
print(f"\nReached max runs ({args.max_runs}) before bookmarks were fully exhausted.")
|
||||||
|
|
||||||
|
print(f"\nDone. Total saved: {total_saved}, total unbookmarked: {total_unbookmarked}")
|
||||||
1
vendor/extract-x-bookmarks/requirements.txt
vendored
Normal file
1
vendor/extract-x-bookmarks/requirements.txt
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
twitter-api-client == 0.10.22
|
||||||
Loading…
Add table
Add a link
Reference in a new issue