mirror of
https://github.com/soimort/you-get.git
synced 2025-01-24 05:55:02 +03:00
[twitter] match correct screen_name and item_id in a conversation
This commit is contained in:
parent
e36404cf22
commit
2a1bb6978c
@ -30,9 +30,9 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
|
|||||||
return
|
return
|
||||||
|
|
||||||
html = get_html(url, faker=True)
|
html = get_html(url, faker=True)
|
||||||
screen_name = r1(r'data-screen-name="([^"]*)"', html) or \
|
screen_name = r1(r'twitter\.com/([^/]+)', url) or r1(r'data-screen-name="([^"]*)"', html) or \
|
||||||
r1(r'<meta name="twitter:title" content="([^"]*)"', html)
|
r1(r'<meta name="twitter:title" content="([^"]*)"', html)
|
||||||
item_id = r1(r'data-item-id="([^"]*)"', html) or \
|
item_id = r1(r'twitter\.com/[^/]+/status/(\d+)', url) or r1(r'data-item-id="([^"]*)"', html) or \
|
||||||
r1(r'<meta name="twitter:site:id" content="([^"]*)"', html)
|
r1(r'<meta name="twitter:site:id" content="([^"]*)"', html)
|
||||||
page_title = "{} [{}]".format(screen_name, item_id)
|
page_title = "{} [{}]".format(screen_name, item_id)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user