1
Fork 0
mirror of https://github.com/thegeneralist01/archivr synced 2026-05-30 08:36:47 +02:00

fix: extract full tweet text from note_tweet field when available

This commit is contained in:
TheGeneralist 2026-04-06 11:05:32 +02:00
parent 5552591f4f
commit cc380ec5ba
Signed by: thegeneralist01
SSH key fingerprint: SHA256:pp9qddbCNmVNoSjevdvQvM5z0DHN7LTa8qBMbcMq/R4

View file

@ -481,8 +481,14 @@ def extract_tweet_data(
# Extract legacy data (main tweet content) # Extract legacy data (main tweet content)
legacy = tweet_result.get("legacy", {}) legacy = tweet_result.get("legacy", {})
# Extract full text (bare) # Extract full text (bare) - prefer note_tweet text when present, as legacy.full_text is truncated for long tweets
tweet_data["full_text"] = legacy.get("full_text", "") note_tweet_text = (
tweet_result.get("note_tweet", {})
.get("note_tweet_results", {})
.get("result", {})
.get("text")
)
tweet_data["full_text"] = note_tweet_text if note_tweet_text else legacy.get("full_text", "")
# Extract is_quote_status (bare) # Extract is_quote_status (bare)
tweet_data["is_quote_status"] = legacy.get("is_quote_status", False) tweet_data["is_quote_status"] = legacy.get("is_quote_status", False)