Skip to content

Commit

Permalink
[twitter] download media from quoted tweet
Browse files Browse the repository at this point in the history
  • Loading branch information
soimort committed Jul 18, 2020
1 parent f878b4f commit 74fa92b
Showing 1 changed file with 24 additions and 1 deletion.
25 changes: 24 additions & 1 deletion src/you_get/extractors/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,30 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})

info = json.loads(api_content)
media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
if 'extended_entities' in info['globalObjects']['tweets'][item_id]:
# if the tweet contains media, download them
media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']

elif info['globalObjects']['tweets'][item_id].get('is_quote_status') == True:
# if the tweet does not contain media, but it quotes a tweet
# and the quoted tweet contains media, download them
item_id = info['globalObjects']['tweets'][item_id]['quoted_status_id_str']

api_url = 'https://api.twitter.com/2/timeline/conversation/%s.json?tweet_mode=extended' % item_id
api_content = get_content(api_url, headers={'authorization': authorization, 'x-guest-token': guest_token})

info = json.loads(api_content)

if 'extended_entities' in info['globalObjects']['tweets'][item_id]:
media = info['globalObjects']['tweets'][item_id]['extended_entities']['media']
else:
# quoted tweet has no media
return

else:
# no media, no quoted tweet
return

for medium in media:
if 'video_info' in medium:
# FIXME: we're assuming one tweet only contains one video here
Expand Down

0 comments on commit 74fa92b

Please sign in to comment.