mirror of
https://github.com/thegeneralist01/extract-x-bookmarks
synced 2026-03-07 11:59:55 +01:00
first commit
This commit is contained in:
commit
8a699113a9
6 changed files with 437 additions and 0 deletions
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
*
|
||||||
|
!.gitignore
|
||||||
|
!LICENSE
|
||||||
|
!README.md
|
||||||
|
!isolate_cookies.py
|
||||||
|
!main.py
|
||||||
|
!requirements.txt
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025-present thegeneralist01
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
46
README.md
Normal file
46
README.md
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
# extract-x-bookmarks
|
||||||
|
Written by ChatGPT, not me.
|
||||||
|
|
||||||
|
[Works fine](https://x.com/thegeneralist01/status/2003819489989926932).
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
- Python
|
||||||
|
- Dependencies in `requirements.txt``
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
- Create a virtual environment if you want (`uv venv .venv` or whatever), then activate it. (This is absolutely optional).
|
||||||
|
|
||||||
|
- Install dependencies:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
- Now, to get the **Cloudflare cookies,** this is quickest way I use:
|
||||||
|
- Download the [Cookie-Editor extension](https://cookie-editor.com/).
|
||||||
|
- Open your browser and go to [Twitter](https://x.com).
|
||||||
|
- Open the Cookie-Editor extension, press the export button (bottom right) and export as Header String. (It will copy it to the clipboard).
|
||||||
|
- Open the terminal, and run (the python file):
|
||||||
|
```bash
|
||||||
|
python isolate_cookies.py
|
||||||
|
```
|
||||||
|
- Paste the clipboard content.
|
||||||
|
- It will then put the two needed cookies into `creds.txt`, which the script will use.
|
||||||
|
- **If you want to do that without installing the extension,** the `creds.txt` file will have the following format:
|
||||||
|
```
|
||||||
|
auth_token=blablabla;ct0=blablabla
|
||||||
|
```
|
||||||
|
|
||||||
|
- A few things to know before running the script:
|
||||||
|
- It will create a `bookmarks.txt` file with the URLs of your bookmarks.
|
||||||
|
- The script fetches about 90 bookmarks per run. That means you might want to continually run it until you have no cookies left.
|
||||||
|
- A run writes (appends, really) URLs in a descending order (newest first).
|
||||||
|
- It might ask you whether to prepend or append the URLs - whether a new run should add URLs to the start or end of the file. **Generally, for a linear timeline, you want to append,** so: `a`.
|
||||||
|
- It will take some time in the end to **unbookmark** the fetched bookmarks. Each time 10 new bookmarks are unbookmarked, it will print a message.
|
||||||
|
|
||||||
|
- To run the script:
|
||||||
|
```bash
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
Licensed under the [MIT License](LICENSE).
|
||||||
13
isolate_cookies.py
Normal file
13
isolate_cookies.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
cookie_str = input("Input your cookies in the Header String format: ")
|
||||||
|
|
||||||
|
cookie_dict = dict(item.split("=", 1) for item in cookie_str.split(";"))
|
||||||
|
|
||||||
|
output_cookies = {}
|
||||||
|
auth_token = cookie_dict['auth_token']
|
||||||
|
ct0 = cookie_dict['ct0']
|
||||||
|
|
||||||
|
login_string = f"auth_token={auth_token};ct0={ct0}"
|
||||||
|
|
||||||
|
with open("creds.txt", "w") as file:
|
||||||
|
file.write(login_string)
|
||||||
|
|
||||||
349
main.py
Normal file
349
main.py
Normal file
|
|
@ -0,0 +1,349 @@
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
from twitter.account import Account
|
||||||
|
|
||||||
|
def is_rate_limit_error(error):
|
||||||
|
"""
|
||||||
|
Check if an error is a rate limit error (429 Too Many Requests).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error: Exception object or error message
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if it's a rate limit error, False otherwise
|
||||||
|
"""
|
||||||
|
error_str = str(error).lower()
|
||||||
|
# Check for common rate limit indicators
|
||||||
|
rate_limit_indicators = [
|
||||||
|
'429',
|
||||||
|
'too many requests',
|
||||||
|
'rate limit',
|
||||||
|
'rate_limit',
|
||||||
|
'exceeded',
|
||||||
|
'quota',
|
||||||
|
'limit exceeded'
|
||||||
|
]
|
||||||
|
return any(indicator in error_str for indicator in rate_limit_indicators)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_rate_limit_error(error, retry_count, base_wait_time=60):
|
||||||
|
"""
|
||||||
|
Handle rate limit errors with exponential backoff.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error: The exception that occurred
|
||||||
|
retry_count: Number of times we've retried
|
||||||
|
base_wait_time: Base wait time in seconds (default 60s = 1 minute)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Wait time in seconds before retrying
|
||||||
|
"""
|
||||||
|
# Exponential backoff: 1min, 2min, 4min, 8min, etc.
|
||||||
|
wait_time = base_wait_time * (2 ** retry_count)
|
||||||
|
# Cap at 15 minutes (900 seconds)
|
||||||
|
wait_time = min(wait_time, 900)
|
||||||
|
|
||||||
|
print(f"\n ⚠ Rate limit detected (attempt {retry_count + 1})")
|
||||||
|
print(f" ⏳ Waiting {wait_time}s ({wait_time/60:.1f} minutes) before retry...")
|
||||||
|
|
||||||
|
return wait_time
|
||||||
|
|
||||||
|
|
||||||
|
def extract_bookmark_entries_from_response(response_data):
|
||||||
|
"""
|
||||||
|
Extract bookmark entries (tweet IDs and user info) from the response.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_data: The response data from account.bookmarks()
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of tuples: [(tweet_id, username), ...]
|
||||||
|
"""
|
||||||
|
bookmark_entries = []
|
||||||
|
try:
|
||||||
|
# First, check if response is a simple list of tweet IDs or tweet objects
|
||||||
|
if isinstance(response_data, list):
|
||||||
|
# Check if it's a list of simple values (tweet IDs)
|
||||||
|
if len(response_data) > 0 and isinstance(response_data[0], (str, int)):
|
||||||
|
# Simple list of tweet IDs
|
||||||
|
return [(str(tid), None) for tid in response_data]
|
||||||
|
# Check if it's a list of tweet objects
|
||||||
|
elif len(response_data) > 0 and isinstance(response_data[0], dict):
|
||||||
|
# If it has 'id' or 'id_str' field, it might be a simple tweet object
|
||||||
|
if 'id' in response_data[0] or 'id_str' in response_data[0]:
|
||||||
|
result = []
|
||||||
|
for item in response_data:
|
||||||
|
tweet_id = item.get('id_str') or str(item.get('id', ''))
|
||||||
|
username = item.get('user', {}).get('screen_name') if 'user' in item else None
|
||||||
|
if tweet_id:
|
||||||
|
result.append((tweet_id, username))
|
||||||
|
return result
|
||||||
|
# Otherwise, treat as GraphQL response structure
|
||||||
|
data = response_data[0] if len(response_data) > 0 else {}
|
||||||
|
elif isinstance(response_data, dict):
|
||||||
|
data = response_data
|
||||||
|
else:
|
||||||
|
return bookmark_entries
|
||||||
|
|
||||||
|
# Navigate through the nested GraphQL structure (similar to tweets structure)
|
||||||
|
timeline = data.get('data', {}).get('bookmark_timeline_v2', {}).get('timeline', {})
|
||||||
|
if not timeline:
|
||||||
|
# Try alternative path
|
||||||
|
timeline = data.get('data', {}).get('user', {}).get('result', {}).get('timeline_v2', {}).get('timeline', {})
|
||||||
|
|
||||||
|
instructions = timeline.get('instructions', [])
|
||||||
|
|
||||||
|
for instruction in instructions:
|
||||||
|
if instruction.get('type') == 'TimelineAddEntries':
|
||||||
|
entries = instruction.get('entries', [])
|
||||||
|
for entry in entries:
|
||||||
|
content = entry.get('content', {})
|
||||||
|
# Extract bookmark entries
|
||||||
|
if content.get('entryType') == 'TimelineTimelineItem':
|
||||||
|
item_content = content.get('itemContent', {})
|
||||||
|
if item_content.get('itemType') == 'TimelineTweet':
|
||||||
|
tweet_result = item_content.get('tweet_results', {}).get('result', {})
|
||||||
|
# Get rest_id (the tweet ID)
|
||||||
|
tweet_id = tweet_result.get('rest_id')
|
||||||
|
|
||||||
|
# Get username from tweet result
|
||||||
|
username = None
|
||||||
|
# Try to get username from user info in tweet
|
||||||
|
user_info = tweet_result.get('core', {}).get('user_results', {}).get('result', {})
|
||||||
|
if user_info:
|
||||||
|
legacy_user = user_info.get('legacy', {})
|
||||||
|
if legacy_user:
|
||||||
|
username = legacy_user.get('screen_name')
|
||||||
|
|
||||||
|
if tweet_id:
|
||||||
|
bookmark_entries.append((str(tweet_id), username))
|
||||||
|
|
||||||
|
return bookmark_entries
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ⚠ Warning: Error extracting bookmark entries: {e}")
|
||||||
|
return bookmark_entries
|
||||||
|
|
||||||
|
|
||||||
|
def extract_all_bookmarks(account, delay_between_requests=2.0):
|
||||||
|
"""
|
||||||
|
Extract all bookmarks from the account with proper rate limit handling.
|
||||||
|
Account.bookmarks() returns all bookmarks in a single call.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
account: Account instance from twitter.account
|
||||||
|
delay_between_requests: Delay in seconds between requests (not used for single call, but kept for consistency)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of tuples: [(tweet_id, username), ...] (newest first)
|
||||||
|
"""
|
||||||
|
all_bookmarks = []
|
||||||
|
retry_count = 0
|
||||||
|
|
||||||
|
print("Starting to extract bookmarks...")
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("Fetching bookmarks...", end=" ")
|
||||||
|
|
||||||
|
# Fetch all bookmarks (single call, no pagination needed)
|
||||||
|
try:
|
||||||
|
response_data = account.bookmarks()
|
||||||
|
retry_count = 0
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
print(f"\n ❌ Error fetching bookmarks: {error_msg}")
|
||||||
|
|
||||||
|
# Check if it's a rate limit error
|
||||||
|
if is_rate_limit_error(e):
|
||||||
|
wait_time = handle_rate_limit_error(e, retry_count)
|
||||||
|
time.sleep(wait_time)
|
||||||
|
retry_count += 1
|
||||||
|
# Retry the request
|
||||||
|
try:
|
||||||
|
response_data = account.bookmarks()
|
||||||
|
retry_count = 0
|
||||||
|
except Exception as retry_error:
|
||||||
|
print(f" ❌ Failed after retry: {retry_error}")
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
# For non-rate-limit errors, wait a bit and retry once
|
||||||
|
if retry_count < 2:
|
||||||
|
wait_time = delay_between_requests * 3
|
||||||
|
print(f" ⏳ Waiting {wait_time}s before retry...")
|
||||||
|
time.sleep(wait_time)
|
||||||
|
retry_count += 1
|
||||||
|
try:
|
||||||
|
response_data = account.bookmarks()
|
||||||
|
retry_count = 0
|
||||||
|
except Exception as retry_error:
|
||||||
|
print(f" ❌ Failed after retry: {retry_error}")
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
print(f" ❌ Max retries reached. Stopping.")
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Extract bookmark entries from response
|
||||||
|
all_bookmarks = extract_bookmark_entries_from_response(response_data)
|
||||||
|
|
||||||
|
if all_bookmarks:
|
||||||
|
print(f"✓ Retrieved {len(all_bookmarks)} bookmarks")
|
||||||
|
else:
|
||||||
|
print("⚠ No bookmarks found")
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n\n⚠ Extraction interrupted by user")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n\n❌ Error occurred: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print(f"Bookmark extraction complete!")
|
||||||
|
print(f" Total bookmarks found: {len(all_bookmarks)}")
|
||||||
|
print(f"{'='*80}\n")
|
||||||
|
|
||||||
|
return all_bookmarks
|
||||||
|
|
||||||
|
|
||||||
|
def save_bookmarks_and_unbookmark(account, bookmarks, output_file="bookmarks.txt", delay_between_requests=2.0):
|
||||||
|
"""
|
||||||
|
Save bookmark URLs to file (newest first) and unbookmark each one.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
account: Account instance from twitter.account
|
||||||
|
bookmarks: List of tuples [(tweet_id, username), ...]
|
||||||
|
output_file: Output file path
|
||||||
|
delay_between_requests: Delay in seconds between unbookmark requests
|
||||||
|
"""
|
||||||
|
print(f"\nSaving bookmarks to {output_file} and unbookmarking...")
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
# Read existing content if file exists
|
||||||
|
existing_content = ""
|
||||||
|
if os.path.exists(output_file):
|
||||||
|
with open(output_file, "r") as f:
|
||||||
|
existing_content = f.read()
|
||||||
|
|
||||||
|
# Ask user whether to prepend or append
|
||||||
|
while True:
|
||||||
|
choice = input("Prepend (p) or append (a) new bookmarks? [p/a]: ").strip().lower()
|
||||||
|
if choice in ['p', 'a']:
|
||||||
|
break
|
||||||
|
print(" ⚠ Invalid choice. Please enter 'p' for prepend or 'a' for append.")
|
||||||
|
|
||||||
|
prepend = (choice == 'p')
|
||||||
|
|
||||||
|
# Collect new bookmark URLs (newest first)
|
||||||
|
new_bookmark_urls = []
|
||||||
|
unbookmark_count = 0
|
||||||
|
retry_count = 0
|
||||||
|
|
||||||
|
# Process bookmarks (they should already be in order, newest first)
|
||||||
|
for tweet_id, username in bookmarks:
|
||||||
|
# Construct URL
|
||||||
|
if username:
|
||||||
|
url = f"https://twitter.com/{username}/status/{tweet_id}"
|
||||||
|
else:
|
||||||
|
# Fallback if username not available
|
||||||
|
url = f"https://twitter.com/i/web/status/{tweet_id}"
|
||||||
|
|
||||||
|
# Add to new bookmarks list
|
||||||
|
new_bookmark_urls.append(url)
|
||||||
|
|
||||||
|
# Unbookmark the tweet
|
||||||
|
try:
|
||||||
|
account.unbookmark(tweet_id)
|
||||||
|
unbookmark_count += 1
|
||||||
|
retry_count = 0 # Reset retry count on success
|
||||||
|
|
||||||
|
if unbookmark_count % 10 == 0:
|
||||||
|
print(f" ✓ Processed {unbookmark_count}/{len(bookmarks)} bookmarks...")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
print(f"\n ⚠ Error unbookmarking tweet {tweet_id}: {error_msg}")
|
||||||
|
|
||||||
|
# Check if it's a rate limit error
|
||||||
|
if is_rate_limit_error(e):
|
||||||
|
wait_time = handle_rate_limit_error(e, retry_count)
|
||||||
|
time.sleep(wait_time)
|
||||||
|
retry_count += 1
|
||||||
|
# Retry the unbookmark
|
||||||
|
try:
|
||||||
|
account.unbookmark(tweet_id)
|
||||||
|
unbookmark_count += 1
|
||||||
|
retry_count = 0
|
||||||
|
except Exception as retry_error:
|
||||||
|
print(f" ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}")
|
||||||
|
else:
|
||||||
|
# For other errors, just log and continue
|
||||||
|
if retry_count < 2:
|
||||||
|
wait_time = delay_between_requests * 3
|
||||||
|
print(f" ⏳ Waiting {wait_time}s before retry...")
|
||||||
|
time.sleep(wait_time)
|
||||||
|
retry_count += 1
|
||||||
|
try:
|
||||||
|
account.unbookmark(tweet_id)
|
||||||
|
unbookmark_count += 1
|
||||||
|
retry_count = 0
|
||||||
|
except Exception as retry_error:
|
||||||
|
print(f" ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}")
|
||||||
|
else:
|
||||||
|
print(f" ❌ Skipping unbookmark for {tweet_id} after max retries")
|
||||||
|
|
||||||
|
# Rate limiting: wait before next unbookmark request
|
||||||
|
if delay_between_requests > 0:
|
||||||
|
time.sleep(delay_between_requests)
|
||||||
|
|
||||||
|
# Write bookmarks based on user's choice
|
||||||
|
with open(output_file, "w") as f:
|
||||||
|
if prepend:
|
||||||
|
# Write new bookmarks first (prepended), then existing content
|
||||||
|
for url in new_bookmark_urls:
|
||||||
|
f.write(f"{url}\n")
|
||||||
|
if existing_content:
|
||||||
|
f.write(existing_content)
|
||||||
|
else:
|
||||||
|
# Write existing content first, then new bookmarks (appended)
|
||||||
|
if existing_content:
|
||||||
|
f.write(existing_content)
|
||||||
|
for url in new_bookmark_urls:
|
||||||
|
f.write(f"{url}\n")
|
||||||
|
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print(f"Processing complete!")
|
||||||
|
print(f" Total bookmarks saved: {len(bookmarks)}")
|
||||||
|
print(f" Total unbookmarked: {unbookmark_count}")
|
||||||
|
print(f" Output file: {output_file}")
|
||||||
|
print(f"{'='*80}\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Load cookies
|
||||||
|
with open("creds.txt", "r") as file:
|
||||||
|
cookie_str = file.read().strip()
|
||||||
|
cookie_dict = dict(item.split("=", 1) for item in cookie_str.split(";"))
|
||||||
|
|
||||||
|
# Initialize account
|
||||||
|
account = Account(cookies=cookie_dict)
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
DELAY_BETWEEN_REQUESTS = 2.0 # Seconds to wait between requests
|
||||||
|
OUTPUT_FILE = "bookmarks.txt"
|
||||||
|
|
||||||
|
# Extract all bookmarks
|
||||||
|
bookmarks = extract_all_bookmarks(account, delay_between_requests=DELAY_BETWEEN_REQUESTS)
|
||||||
|
|
||||||
|
if bookmarks:
|
||||||
|
# Save bookmarks to file and unbookmark them
|
||||||
|
save_bookmarks_and_unbookmark(
|
||||||
|
account,
|
||||||
|
bookmarks,
|
||||||
|
output_file=OUTPUT_FILE,
|
||||||
|
delay_between_requests=DELAY_BETWEEN_REQUESTS
|
||||||
|
)
|
||||||
|
print(f"\nSuccessfully processed {len(bookmarks)} bookmarks")
|
||||||
|
else:
|
||||||
|
print("\nNo bookmarks found.")
|
||||||
|
|
||||||
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
twitter-api-client == 0.10.22
|
||||||
Loading…
Add table
Add a link
Reference in a new issue