mirror of
https://github.com/thegeneralist01/archivr
synced 2026-05-30 08:36:47 +02:00
fix: extract full tweet text from note_tweet field when available
This commit is contained in:
parent
5552591f4f
commit
cc380ec5ba
1 changed files with 8 additions and 2 deletions
10
vendor/twitter/scrape_user_tweet_contents.py
vendored
10
vendor/twitter/scrape_user_tweet_contents.py
vendored
|
|
@ -481,8 +481,14 @@ def extract_tweet_data(
|
||||||
# Extract legacy data (main tweet content)
|
# Extract legacy data (main tweet content)
|
||||||
legacy = tweet_result.get("legacy", {})
|
legacy = tweet_result.get("legacy", {})
|
||||||
|
|
||||||
# Extract full text (bare)
|
# Extract full text (bare) - prefer note_tweet text when present, as legacy.full_text is truncated for long tweets
|
||||||
tweet_data["full_text"] = legacy.get("full_text", "")
|
note_tweet_text = (
|
||||||
|
tweet_result.get("note_tweet", {})
|
||||||
|
.get("note_tweet_results", {})
|
||||||
|
.get("result", {})
|
||||||
|
.get("text")
|
||||||
|
)
|
||||||
|
tweet_data["full_text"] = note_tweet_text if note_tweet_text else legacy.get("full_text", "")
|
||||||
|
|
||||||
# Extract is_quote_status (bare)
|
# Extract is_quote_status (bare)
|
||||||
tweet_data["is_quote_status"] = legacy.get("is_quote_status", False)
|
tweet_data["is_quote_status"] = legacy.get("is_quote_status", False)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue