first commit

2026-07-21 18:55:31 +02:00 · 2025-12-24 17:19:20 +01:00 · 2025-12-24 17:19:20 +01:00 · 8a699113a9
commit 8a699113a9
6 changed files with 437 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,7 @@
 *
 !.gitignore
 !LICENSE
 !README.md
 !isolate_cookies.py
 !main.py
 !requirements.txt
--- a/21
+++ b/21
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2025-present thegeneralist01
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,46 @@
 # extract-x-bookmarks
 Written by ChatGPT, not me.
 [Works fine](https://x.com/thegeneralist01/status/2003819489989926932).
 ## Requirements
 - Python
 - Dependencies in `requirements.txt``
 ## Usage
 - Create a virtual environment if you want (`uv venv .venv` or whatever), then activate it. (This is absolutely optional).
 - Install dependencies:
 ```bash
 pip install -r requirements.txt
 ```
 - Now, to get the **Cloudflare cookies,** this is quickest way I use:
    - Download the [Cookie-Editor extension](https://cookie-editor.com/).
    - Open your browser and go to [Twitter](https://x.com).
    - Open the Cookie-Editor extension, press the export button (bottom right) and export as Header String. (It will copy it to the clipboard).
    - Open the terminal, and run (the python file):
    ```bash
    python isolate_cookies.py
    ```
    - Paste the clipboard content.
    - It will then put the two needed cookies into `creds.txt`, which the script will use.
        - **If you want to do that without installing the extension,** the `creds.txt` file will have the following format:
        ```
        auth_token=blablabla;ct0=blablabla
        ```
 - A few things to know before running the script:
    - It will create a `bookmarks.txt` file with the URLs of your bookmarks.
    - The script fetches about 90 bookmarks per run. That means you might want to continually run it until you have no cookies left.
        - A run writes (appends, really) URLs in a descending order (newest first).
    - It might ask you whether to prepend or append the URLs - whether a new run should add URLs to the start or end of the file. **Generally, for a linear timeline, you want to append,** so: `a`.
    - It will take some time in the end to **unbookmark** the fetched bookmarks. Each time 10 new bookmarks are unbookmarked, it will print a message.
 - To run the script:
 ```bash
 python main.py
 ```
 ## License
 Licensed under the [MIT License](LICENSE).
--- a/isolate_cookies.py
+++ b/isolate_cookies.py
@ -0,0 +1,13 @@
 cookie_str = input("Input your cookies in the Header String format: ")
 cookie_dict = dict(item.split("=", 1) for item in cookie_str.split(";"))
 output_cookies = {}
 auth_token = cookie_dict['auth_token']
 ct0 = cookie_dict['ct0']
 login_string = f"auth_token={auth_token};ct0={ct0}"
 with open("creds.txt", "w") as file:
    file.write(login_string)
--- a/main.py
+++ b/main.py
@ -0,0 +1,349 @@
 import json
 import time
 import os
 from twitter.account import Account
 def is_rate_limit_error(error):
    """
    Check if an error is a rate limit error (429 Too Many Requests).
    Args:
        error: Exception object or error message
    Returns:
        True if it's a rate limit error, False otherwise
    """
    error_str = str(error).lower()
    # Check for common rate limit indicators
    rate_limit_indicators = [
        '429',
        'too many requests',
        'rate limit',
        'rate_limit',
        'exceeded',
        'quota',
        'limit exceeded'
    ]
    return any(indicator in error_str for indicator in rate_limit_indicators)
 def handle_rate_limit_error(error, retry_count, base_wait_time=60):
    """
    Handle rate limit errors with exponential backoff.
    Args:
        error: The exception that occurred
        retry_count: Number of times we've retried
        base_wait_time: Base wait time in seconds (default 60s = 1 minute)
    Returns:
        Wait time in seconds before retrying
    """
    # Exponential backoff: 1min, 2min, 4min, 8min, etc.
    wait_time = base_wait_time * (2 ** retry_count)
    # Cap at 15 minutes (900 seconds)
    wait_time = min(wait_time, 900)
    print(f"\n  ⚠ Rate limit detected (attempt {retry_count + 1})")
    print(f"  ⏳ Waiting {wait_time}s ({wait_time/60:.1f} minutes) before retry...")
    return wait_time
 def extract_bookmark_entries_from_response(response_data):
    """
    Extract bookmark entries (tweet IDs and user info) from the response.
    Args:
        response_data: The response data from account.bookmarks()
    Returns:
        List of tuples: [(tweet_id, username), ...]
    """
    bookmark_entries = []
    try:
        # First, check if response is a simple list of tweet IDs or tweet objects
        if isinstance(response_data, list):
            # Check if it's a list of simple values (tweet IDs)
            if len(response_data) > 0 and isinstance(response_data[0], (str, int)):
                # Simple list of tweet IDs
                return [(str(tid), None) for tid in response_data]
            # Check if it's a list of tweet objects
            elif len(response_data) > 0 and isinstance(response_data[0], dict):
                # If it has 'id' or 'id_str' field, it might be a simple tweet object
                if 'id' in response_data[0] or 'id_str' in response_data[0]:
                    result = []
                    for item in response_data:
                        tweet_id = item.get('id_str') or str(item.get('id', ''))
                        username = item.get('user', {}).get('screen_name') if 'user' in item else None
                        if tweet_id:
                            result.append((tweet_id, username))
                    return result
            # Otherwise, treat as GraphQL response structure
            data = response_data[0] if len(response_data) > 0 else {}
        elif isinstance(response_data, dict):
            data = response_data
        else:
            return bookmark_entries
        # Navigate through the nested GraphQL structure (similar to tweets structure)
        timeline = data.get('data', {}).get('bookmark_timeline_v2', {}).get('timeline', {})
        if not timeline:
            # Try alternative path
            timeline = data.get('data', {}).get('user', {}).get('result', {}).get('timeline_v2', {}).get('timeline', {})
        instructions = timeline.get('instructions', [])
        for instruction in instructions:
            if instruction.get('type') == 'TimelineAddEntries':
                entries = instruction.get('entries', [])
                for entry in entries:
                    content = entry.get('content', {})
                    # Extract bookmark entries
                    if content.get('entryType') == 'TimelineTimelineItem':
                        item_content = content.get('itemContent', {})
                        if item_content.get('itemType') == 'TimelineTweet':
                            tweet_result = item_content.get('tweet_results', {}).get('result', {})
                            # Get rest_id (the tweet ID)
                            tweet_id = tweet_result.get('rest_id')
                            # Get username from tweet result
                            username = None
                            # Try to get username from user info in tweet
                            user_info = tweet_result.get('core', {}).get('user_results', {}).get('result', {})
                            if user_info:
                                legacy_user = user_info.get('legacy', {})
                                if legacy_user:
                                    username = legacy_user.get('screen_name')
                            if tweet_id:
                                bookmark_entries.append((str(tweet_id), username))
        return bookmark_entries
    except Exception as e:
        print(f"  ⚠ Warning: Error extracting bookmark entries: {e}")
        return bookmark_entries
 def extract_all_bookmarks(account, delay_between_requests=2.0):
    """
    Extract all bookmarks from the account with proper rate limit handling.
    Account.bookmarks() returns all bookmarks in a single call.
    Args:
        account: Account instance from twitter.account
        delay_between_requests: Delay in seconds between requests (not used for single call, but kept for consistency)
    Returns:
        List of tuples: [(tweet_id, username), ...] (newest first)
    """
    all_bookmarks = []
    retry_count = 0
    print("Starting to extract bookmarks...")
    print("-" * 50)
    try:
        print("Fetching bookmarks...", end=" ")
        # Fetch all bookmarks (single call, no pagination needed)
        try:
            response_data = account.bookmarks()
            retry_count = 0
        except Exception as e:
            error_msg = str(e)
            print(f"\n  ❌ Error fetching bookmarks: {error_msg}")
            # Check if it's a rate limit error
            if is_rate_limit_error(e):
                wait_time = handle_rate_limit_error(e, retry_count)
                time.sleep(wait_time)
                retry_count += 1
                # Retry the request
                try:
                    response_data = account.bookmarks()
                    retry_count = 0
                except Exception as retry_error:
                    print(f"  ❌ Failed after retry: {retry_error}")
                    raise
            else:
                # For non-rate-limit errors, wait a bit and retry once
                if retry_count < 2:
                    wait_time = delay_between_requests * 3
                    print(f"  ⏳ Waiting {wait_time}s before retry...")
                    time.sleep(wait_time)
                    retry_count += 1
                    try:
                        response_data = account.bookmarks()
                        retry_count = 0
                    except Exception as retry_error:
                        print(f"  ❌ Failed after retry: {retry_error}")
                        raise
                else:
                    print(f"  ❌ Max retries reached. Stopping.")
                    raise
        # Extract bookmark entries from response
        all_bookmarks = extract_bookmark_entries_from_response(response_data)
        if all_bookmarks:
            print(f"✓ Retrieved {len(all_bookmarks)} bookmarks")
        else:
            print("⚠ No bookmarks found")
    except KeyboardInterrupt:
        print("\n\n⚠ Extraction interrupted by user")
    except Exception as e:
        print(f"\n\n❌ Error occurred: {str(e)}")
        raise
    print(f"\n{'='*80}")
    print(f"Bookmark extraction complete!")
    print(f"  Total bookmarks found: {len(all_bookmarks)}")
    print(f"{'='*80}\n")
    return all_bookmarks
 def save_bookmarks_and_unbookmark(account, bookmarks, output_file="bookmarks.txt", delay_between_requests=2.0):
    """
    Save bookmark URLs to file (newest first) and unbookmark each one.
    Args:
        account: Account instance from twitter.account
        bookmarks: List of tuples [(tweet_id, username), ...]
        output_file: Output file path
        delay_between_requests: Delay in seconds between unbookmark requests
    """
    print(f"\nSaving bookmarks to {output_file} and unbookmarking...")
    print("-" * 50)
    # Read existing content if file exists
    existing_content = ""
    if os.path.exists(output_file):
        with open(output_file, "r") as f:
            existing_content = f.read()
    # Ask user whether to prepend or append
    while True:
        choice = input("Prepend (p) or append (a) new bookmarks? [p/a]: ").strip().lower()
        if choice in ['p', 'a']:
            break
        print("  ⚠ Invalid choice. Please enter 'p' for prepend or 'a' for append.")
    prepend = (choice == 'p')
    # Collect new bookmark URLs (newest first)
    new_bookmark_urls = []
    unbookmark_count = 0
    retry_count = 0
    # Process bookmarks (they should already be in order, newest first)
    for tweet_id, username in bookmarks:
        # Construct URL
        if username:
            url = f"https://twitter.com/{username}/status/{tweet_id}"
        else:
            # Fallback if username not available
            url = f"https://twitter.com/i/web/status/{tweet_id}"
        # Add to new bookmarks list
        new_bookmark_urls.append(url)
        # Unbookmark the tweet
        try:
            account.unbookmark(tweet_id)
            unbookmark_count += 1
            retry_count = 0  # Reset retry count on success
            if unbookmark_count % 10 == 0:
                print(f"  ✓ Processed {unbookmark_count}/{len(bookmarks)} bookmarks...")
        except Exception as e:
            error_msg = str(e)
            print(f"\n  ⚠ Error unbookmarking tweet {tweet_id}: {error_msg}")
            # Check if it's a rate limit error
            if is_rate_limit_error(e):
                wait_time = handle_rate_limit_error(e, retry_count)
                time.sleep(wait_time)
                retry_count += 1
                # Retry the unbookmark
                try:
                    account.unbookmark(tweet_id)
                    unbookmark_count += 1
                    retry_count = 0
                except Exception as retry_error:
                    print(f"  ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}")
            else:
                # For other errors, just log and continue
                if retry_count < 2:
                    wait_time = delay_between_requests * 3
                    print(f"  ⏳ Waiting {wait_time}s before retry...")
                    time.sleep(wait_time)
                    retry_count += 1
                    try:
                        account.unbookmark(tweet_id)
                        unbookmark_count += 1
                        retry_count = 0
                    except Exception as retry_error:
                        print(f"  ❌ Failed to unbookmark {tweet_id} after retry: {retry_error}")
                else:
                    print(f"  ❌ Skipping unbookmark for {tweet_id} after max retries")
        # Rate limiting: wait before next unbookmark request
        if delay_between_requests > 0:
            time.sleep(delay_between_requests)
    # Write bookmarks based on user's choice
    with open(output_file, "w") as f:
        if prepend:
            # Write new bookmarks first (prepended), then existing content
            for url in new_bookmark_urls:
                f.write(f"{url}\n")
            if existing_content:
                f.write(existing_content)
        else:
            # Write existing content first, then new bookmarks (appended)
            if existing_content:
                f.write(existing_content)
            for url in new_bookmark_urls:
                f.write(f"{url}\n")
    print(f"\n{'='*80}")
    print(f"Processing complete!")
    print(f"  Total bookmarks saved: {len(bookmarks)}")
    print(f"  Total unbookmarked: {unbookmark_count}")
    print(f"  Output file: {output_file}")
    print(f"{'='*80}\n")
 if __name__ == "__main__":
    # Load cookies
    with open("creds.txt", "r") as file:
        cookie_str = file.read().strip()
    cookie_dict = dict(item.split("=", 1) for item in cookie_str.split(";"))
    # Initialize account
    account = Account(cookies=cookie_dict)
    # Configuration
    DELAY_BETWEEN_REQUESTS = 2.0  # Seconds to wait between requests
    OUTPUT_FILE = "bookmarks.txt"
    # Extract all bookmarks
    bookmarks = extract_all_bookmarks(account, delay_between_requests=DELAY_BETWEEN_REQUESTS)
    if bookmarks:
        # Save bookmarks to file and unbookmark them
        save_bookmarks_and_unbookmark(
            account, 
            bookmarks, 
            output_file=OUTPUT_FILE,
            delay_between_requests=DELAY_BETWEEN_REQUESTS
        )
        print(f"\nSuccessfully processed {len(bookmarks)} bookmarks")
    else:
        print("\nNo bookmarks found.")
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1 @@
 twitter-api-client == 0.10.22