From cc380ec5bae4a78c589246d3565bc1f1e15e1d4a Mon Sep 17 00:00:00 2001 From: TheGeneralist <180094941+thegeneralist01@users.noreply.github.com> Date: Mon, 6 Apr 2026 11:05:32 +0200 Subject: [PATCH] fix: extract full tweet text from note_tweet field when available --- vendor/twitter/scrape_user_tweet_contents.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/vendor/twitter/scrape_user_tweet_contents.py b/vendor/twitter/scrape_user_tweet_contents.py index 0334b70..b992b94 100644 --- a/vendor/twitter/scrape_user_tweet_contents.py +++ b/vendor/twitter/scrape_user_tweet_contents.py @@ -481,8 +481,14 @@ def extract_tweet_data( # Extract legacy data (main tweet content) legacy = tweet_result.get("legacy", {}) - # Extract full text (bare) - tweet_data["full_text"] = legacy.get("full_text", "") + # Extract full text (bare) - prefer note_tweet text when present, as legacy.full_text is truncated for long tweets + note_tweet_text = ( + tweet_result.get("note_tweet", {}) + .get("note_tweet_results", {}) + .get("result", {}) + .get("text") + ) + tweet_data["full_text"] = note_tweet_text if note_tweet_text else legacy.get("full_text", "") # Extract is_quote_status (bare) tweet_data["is_quote_status"] = legacy.get("is_quote_status", False)