[twitter] download media from quoted tweet

3irdparty · Jul 18, 2020 · 74fa92b · 74fa92b
1 parent f878b4f
commit 74fa92b
Showing 1 changed file with 24 additions and 1 deletion.
diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py
@@ -51,7 +51,30 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
     api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
 
     info = json.loads(api_content)
-    media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
+    if 'extended_entities' in info['globalObjects']['tweets'][item_id]:
+        # if the tweet contains media, download them
+        media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
+
+    elif info['globalObjects']['tweets'][item_id].get('is_quote_status') == True:
+        # if the tweet does not contain media, but it quotes a tweet
+        # and the quoted tweet contains media, download them
+        item_id = info['globalObjects']['tweets'][item_id]['quoted_status_id_str']
+
+        api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id
+        api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})
+
+        info = json.loads(api_content)
+
+        if 'extended_entities' in info['globalObjects']['tweets'][item_id]:
+            media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
+        else:
+            # quoted tweet has no media
+            return
+
+    else:
+        # no media, no quoted tweet
+        return
+
     for medium in media:
         if 'video_info' in medium:
             # FIXME: we're assuming one tweet only contains one video here