diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 75231110..39793c03 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -1,5 +1,4 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: develop @@ -16,12 +15,12 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.5, 3.6, 3.7, 3.8, 3.9, "3.10", pypy3] + python-version: [3.7, 3.8, 3.9, '3.10', '3.11', pypy-3.8, pypy-3.9] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/Makefile b/Makefile index c0f9cf0e..fe4a238c 100644 --- a/Makefile +++ b/Makefile @@ -43,5 +43,7 @@ install: $(SETUP) install --user --prefix= release: - zenity --question - $(SETUP) sdist bdist_wheel upload --sign + #zenity --question + $(SETUP) sdist bdist_wheel + echo 'Upload new version to PyPI using:' + echo ' twine upload --sign dist/you-get-VERSION.tar.gz dist/you_get-VERSION-py3-none-any.whl' diff --git a/README.md b/README.md index d2fdaa99..44c102cd 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,9 @@ [![PyPI version](https://img.shields.io/pypi/v/you-get.svg)](https://pypi.python.org/pypi/you-get/) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -**NOTICE: Read [this](https://github.com/soimort/you-get/blob/develop/CONTRIBUTING.md) if you are looking for the conventional "Issues" tab.** +**NOTICE (30 May 2022): Support for Python 3.5, 3.6 and 3.7 will eventually be dropped. ([see details here](https://github.com/soimort/you-get/wiki/TLS-1.3-post-handshake-authentication-(PHA)))** + +**NOTICE (8 Mar 2019): Read [this](https://github.com/soimort/you-get/blob/develop/CONTRIBUTING.md) if you are looking for the conventional "Issues" tab.** --- @@ -53,9 +55,9 @@ Are you a Python programmer? Then check out [the source](https://github.com/soim ### Prerequisites -The following dependencies are necessary: +The following dependencies are recommended: -* **[Python](https://www.python.org/downloads/)** 3.2 or above +* **[Python](https://www.python.org/downloads/)** 3.7.4 or above * **[FFmpeg](https://www.ffmpeg.org/)** 1.0 or above * (Optional) [RTMPDump](https://rtmpdump.mplayerhq.hu/) @@ -89,6 +91,14 @@ $ python3 setup.py install --user to install `you-get` to a permanent path. +You can also use the [pipenv](https://pipenv.pypa.io/en/latest) to install the `you-get` in the Python virtual environment. + +``` +$ pipenv install -e . +$ pipenv run you-get --version +you-get: version 0.4.1555, a tiny downloader that scrapes the web. +``` + ### Option 4: Git clone This is the recommended way for all developers, even if you don't often code in Python. diff --git a/README.rst b/README.rst index 3c23ab5e..376abcf7 100644 --- a/README.rst +++ b/README.rst @@ -52,7 +52,7 @@ source `__ and fork it! .. |PyPI version| image:: https://badge.fury.io/py/you-get.png :target: http://badge.fury.io/py/you-get -.. |Build Status| image:: https://api.travis-ci.org/soimort/you-get.png - :target: https://travis-ci.org/soimort/you-get +.. |Build Status| image:: https://github.com/soimort/you-get/workflows/develop/badge.svg + :target: https://github.com/soimort/you-get/actions .. |Gitter| image:: https://badges.gitter.im/Join%20Chat.svg :target: https://gitter.im/soimort/you-get?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge diff --git a/src/you_get/common.py b/src/you_get/common.py index 473c3155..4095dc52 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -138,13 +138,14 @@ auto_rename = False insecure = False m3u8 = False postfix = False +prefix = None fake_headers = { - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', # noqa + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'UTF-8,*;q=0.5', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'en-US,en;q=0.8', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.74 Safari/537.36 Edg/79.0.309.43', # noqa + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.183' # Latest Edge } if sys.stdout.isatty(): @@ -342,10 +343,37 @@ def undeflate(data): return decompressobj.decompress(data)+decompressobj.flush() +# an http.client implementation of get_content() +# because urllib does not support "Connection: keep-alive" +def getHttps(host, url, headers, debuglevel=0): + import http.client + + conn = http.client.HTTPSConnection(host) + conn.set_debuglevel(debuglevel) + conn.request("GET", url, headers=headers) + resp = conn.getresponse() + set_cookie = resp.getheader('set-cookie') + + data = resp.read() + try: + data = ungzip(data) # gzip + data = undeflate(data) # deflate + except: + pass + + conn.close() + return str(data, encoding='utf-8'), set_cookie + + # DEPRECATED in favor of get_content() def get_response(url, faker=False): logging.debug('get_response: %s' % url) - + ctx = None + if insecure: + # ignore ssl errors + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE # install cookies if cookies: opener = request.build_opener(request.HTTPCookieProcessor(cookies)) @@ -353,10 +381,10 @@ def get_response(url, faker=False): if faker: response = request.urlopen( - request.Request(url, headers=fake_headers), None + request.Request(url, headers=fake_headers), None, context=ctx, ) else: - response = request.urlopen(url) + response = request.urlopen(url, context=ctx) data = response.read() if response.info().get('Content-Encoding') == 'gzip': @@ -987,6 +1015,8 @@ def download_urls( title = tr(get_filename(title)) if postfix and 'vid' in kwargs: title = "%s [%s]" % (title, kwargs['vid']) + if prefix is not None: + title = "[%s] %s" % (prefix, title) output_filename = get_output_filename(urls, title, ext, output_dir, merge) output_filepath = os.path.join(output_dir, output_filename) @@ -1536,9 +1566,13 @@ def script_main(download, download_playlist, **kwargs): help='Do not download captions (subtitles, lyrics, danmaku, ...)' ) download_grp.add_argument( - '--postfix', action='store_true', default=False, + '--post', '--postfix', dest='postfix', action='store_true', default=False, help='Postfix downloaded files with unique identifiers' ) + download_grp.add_argument( + '--pre', '--prefix', dest='prefix', metavar='PREFIX', default=None, + help='Prefix downloaded files with string' + ) download_grp.add_argument( '-f', '--force', action='store_true', default=False, help='Force overwriting existing files' @@ -1632,7 +1666,7 @@ def script_main(download, download_playlist, **kwargs): download_grp.add_argument('--itag', help=argparse.SUPPRESS) download_grp.add_argument('-m', '--m3u8', action='store_true', default=False, - help = 'download vide using an m3u8 url') + help = 'download video using an m3u8 url') parser.add_argument('URL', nargs='*', help=argparse.SUPPRESS) @@ -1662,6 +1696,7 @@ def script_main(download, download_playlist, **kwargs): global insecure global m3u8 global postfix + global prefix output_filename = args.output_filename extractor_proxy = args.extractor_proxy @@ -1699,6 +1734,7 @@ def script_main(download, download_playlist, **kwargs): insecure = True postfix = args.postfix + prefix = args.prefix if args.no_proxy: set_http_proxy('') @@ -1785,20 +1821,10 @@ def google_search(url): url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords) page = get_content(url, headers=fake_headers) videos = re.findall( - r'

([^<]+)<', page + r'(https://www\.youtube\.com/watch\?v=[\w-]+)', page ) - vdurs = re.findall(r'([^<]+)<', page) - durs = [r1(r'(\d+:\d+)', unescape_html(dur)) for dur in vdurs] - print('Google Videos search:') - for v in zip(videos, durs): - print('- video: {} [{}]'.format( - unescape_html(v[0][1]), - v[1] if v[1] else '?' - )) - print('# you-get %s' % log.sprint(v[0][0], log.UNDERLINE)) - print() print('Best matched result:') - return(videos[0][0]) + return(videos[0]) def url_to_module(url): diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 49334d5b..6335e6dd 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -12,8 +12,12 @@ class Bilibili(VideoExtractor): # Bilibili media encoding options, in descending quality order. stream_types = [ - {'id': 'hdflv2', 'quality': 125, 'audio_quality': 30280, - 'container': 'FLV', 'video_resolution': '3840p', 'desc': '真彩 HDR'}, + {'id': 'hdflv2_8k', 'quality': 127, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '4320p', 'desc': '超高清 8K'}, + {'id': 'hdflv2_dolby', 'quality': 126, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '3840p', 'desc': '杜比视界'}, + {'id': 'hdflv2_hdr', 'quality': 125, 'audio_quality': 30280, + 'container': 'FLV', 'video_resolution': '2160p', 'desc': '真彩 HDR'}, {'id': 'hdflv2_4k', 'quality': 120, 'audio_quality': 30280, 'container': 'FLV', 'video_resolution': '2160p', 'desc': '超清 4K'}, {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280, @@ -38,6 +42,8 @@ class Bilibili(VideoExtractor): {'id': 'jpg', 'quality': 0}, ] + codecids = {7: 'AVC', 12: 'HEVC', 13: 'AV1'} + @staticmethod def height_to_quality(height, qn): if height <= 360 and qn <= 16: @@ -66,7 +72,7 @@ class Bilibili(VideoExtractor): @staticmethod def bilibili_api(avid, cid, qn=0): - return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=16&fourk=1' % (avid, cid, qn) + return 'https://api.bilibili.com/x/player/playurl?avid=%s&cid=%s&qn=%s&type=&otype=json&fnver=0&fnval=4048&fourk=1' % (avid, cid, qn) @staticmethod def bilibili_audio_api(sid): @@ -112,6 +118,10 @@ class Bilibili(VideoExtractor): def bilibili_space_channel_api(mid, cid, pn=1, ps=100): return 'https://api.bilibili.com/x/space/channel/video?mid=%s&cid=%s&pn=%s&ps=%s&order=0&jsonp=jsonp' % (mid, cid, pn, ps) + @staticmethod + def bilibili_space_collection_api(mid, cid, pn=1, ps=30): + return 'https://api.bilibili.com/x/polymer/space/seasons_archives_list?mid=%s&season_id=%s&sort_reverse=false&page_num=%s&page_size=%s' % (mid, cid, pn, ps) + @staticmethod def bilibili_series_archives_api(mid, sid, pn=1, ps=100): return 'https://api.bilibili.com/x/series/archives?mid=%s&series_id=%s&pn=%s&ps=%s&only_normal=true&sort=asc&jsonp=jsonp' % (mid, sid, pn, ps) @@ -141,6 +151,8 @@ class Bilibili(VideoExtractor): def prepare(self, **kwargs): self.stream_qualities = {s['quality']: s for s in self.stream_types} + self.streams.clear() + self.dash_streams.clear() try: html_content = get_content(self.url, headers=self.bilibili_headers(referer=self.url)) @@ -171,6 +183,11 @@ class Bilibili(VideoExtractor): self.url = 'https://www.bilibili.com/%s' % match1(self.url, r'/s/(.+)') html_content = get_content(self.url, headers=self.bilibili_headers()) + # redirect: festival + elif re.match(r'https?://(www\.)?bilibili\.com/festival/(.+)', self.url): + self.url = 'https://www.bilibili.com/video/%s' % match1(self.url, r'bvid=([^&]+)') + html_content = get_content(self.url, headers=self.bilibili_headers()) + # sort it out if re.match(r'https?://(www\.)?bilibili\.com/audio/au(\d+)', self.url): sort = 'audio' @@ -182,7 +199,7 @@ class Bilibili(VideoExtractor): sort = 'live' elif re.match(r'https?://vc\.bilibili\.com/video/(\d+)', self.url): sort = 'vc' - elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(BV(\S+)))', self.url): + elif re.match(r'https?://(www\.)?bilibili\.com/video/(av(\d+)|(bv(\S+))|(BV(\S+)))', self.url): sort = 'video' elif re.match(r'https?://h\.?bilibili\.com/(\d+)', self.url): sort = 'h' @@ -197,30 +214,47 @@ class Bilibili(VideoExtractor): playinfo_text = match1(html_content, r'__playinfo__=(.*?)', cont) - try: - info = json.loads(data.group(1)) - post = info['entry_data']['PostPage'][0] - assert post['items'] - except: - # with logged-in cookies - data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);', cont) - if data is not None: - log.e('[Warning] Cookies needed.') - post = json.loads(data.group(1)) + api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id + try: + api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}}) + post = json.loads(api_cont) + except: + log.wtf('[Error] Please specify a cookie file.') - for item in post['items']: - code = item['code'] - carousel_media = item.get('carousel_media') or [item] - for i, media in enumerate(carousel_media): - title = '%s [%s]' % (code, i) - image_url = media['image_versions2']['candidates'][0]['url'] - ext = image_url.split('?')[0].split('.')[-1] - size = int(get_head(image_url)['Content-Length']) + for item in post['items']: + code = item['code'] + carousel_media = item.get('carousel_media') or [item] + for i, media in enumerate(carousel_media): + title = '%s [%s]' % (code, i) + image_url = media['image_versions2']['candidates'][0]['url'] + ext = image_url.split('?')[0].split('.')[-1] + size = int(get_head(image_url)['Content-Length']) + + print_info(site_info, title, ext, size) + if not info_only: + download_urls(urls=[image_url], + title=title, + ext=ext, + total_size=size, + output_dir=output_dir) + + # download videos (if any) + if 'video_versions' in media: + video_url = media['video_versions'][0]['url'] + ext = video_url.split('?')[0].split('.')[-1] + size = int(get_head(video_url)['Content-Length']) print_info(site_info, title, ext, size) if not info_only: - download_urls(urls=[image_url], + download_urls(urls=[video_url], title=title, ext=ext, total_size=size, output_dir=output_dir) - # download videos (if any) - if 'video_versions' in media: - video_url = media['video_versions'][0]['url'] - ext = video_url.split('?')[0].split('.')[-1] - size = int(get_head(video_url)['Content-Length']) - - print_info(site_info, title, ext, size) - if not info_only: - download_urls(urls=[video_url], - title=title, - ext=ext, - total_size=size, - output_dir=output_dir) - site_info = "Instagram.com" download = instagram_download download_playlist = playlist_not_supported('instagram') diff --git a/src/you_get/extractors/iqiyi.py b/src/you_get/extractors/iqiyi.py index d138a49f..16bf45d3 100644 --- a/src/you_get/extractors/iqiyi.py +++ b/src/you_get/extractors/iqiyi.py @@ -131,10 +131,10 @@ class Iqiyi(VideoExtractor): html = get_html(self.url) tvid = r1(r'#curid=(.+)_', self.url) or \ r1(r'tvid=([^&]+)', self.url) or \ - r1(r'data-player-tvid="([^"]+)"', html) or r1(r'tv(?:i|I)d=(.+?)\&', html) or r1(r'param\[\'tvid\'\]\s*=\s*"(.+?)"', html) + r1(r'data-player-tvid="([^"]+)"', html) or r1(r'tv(?:i|I)d=(\w+?)\&', html) or r1(r'param\[\'tvid\'\]\s*=\s*"(.+?)"', html) videoid = r1(r'#curid=.+_(.*)$', self.url) or \ r1(r'vid=([^&]+)', self.url) or \ - r1(r'data-player-videoid="([^"]+)"', html) or r1(r'vid=(.+?)\&', html) or r1(r'param\[\'vid\'\]\s*=\s*"(.+?)"', html) + r1(r'data-player-videoid="([^"]+)"', html) or r1(r'vid=(\w+?)\&', html) or r1(r'param\[\'vid\'\]\s*=\s*"(.+?)"', html) self.vid = (tvid, videoid) info_u = 'http://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid=' + tvid json_res = get_content(info_u) @@ -203,8 +203,13 @@ class Iqiyi(VideoExtractor): # For legacy main() #Here's the change!! - download_url_ffmpeg(urls[0], self.title, 'mp4', output_dir=kwargs['output_dir'], merge=kwargs['merge'], stream=False) - + # ffmpeg fails to parse. + # download_url_ffmpeg(urls[0], self.title, 'mp4', output_dir=kwargs['output_dir'], merge=kwargs['merge'], stream=False) + #Here's the way works out + urls = general_m3u8_extractor(urls[0]) + # ffmpeg fail to convert the output video with mkv extension, due to sort of timestamp problem + download_urls(urls, self.title, 'mp4', 0, **kwargs) + if not kwargs['caption']: print('Skipping captions.') return diff --git a/src/you_get/extractors/ixigua.py b/src/you_get/extractors/ixigua.py index 2f11e7f9..f2fd953e 100644 --- a/src/you_get/extractors/ixigua.py +++ b/src/you_get/extractors/ixigua.py @@ -18,121 +18,97 @@ headers = { } -def int_overflow(val): - maxint = 2147483647 - if not -maxint - 1 <= val <= maxint: - val = (val + (maxint + 1)) % (2 * (maxint + 1)) - maxint - 1 - return val - - -def unsigned_right_shitf(n, i): - if n < 0: - n = ctypes.c_uint32(n).value - if i < 0: - return -int_overflow(n << abs(i)) - return int_overflow(n >> i) - - -def get_video_url_from_video_id(video_id): - """Splicing URLs according to video ID to get video details""" - # from js - data = [""] * 256 - for index, _ in enumerate(data): - t = index - for i in range(8): - t = -306674912 ^ unsigned_right_shitf(t, 1) if 1 & t else unsigned_right_shitf(t, 1) - data[index] = t - - def tmp(): - rand_num = random.random() - path = "/video/urls/v/1/toutiao/mp4/{video_id}?r={random_num}".format(video_id=video_id, - random_num=str(rand_num)[2:]) - e = o = r = -1 - i, a = 0, len(path) - while i < a: - e = ord(path[i]) - i += 1 - if e < 128: - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ e)] - else: - if e < 2048: - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (192 | e >> 6 & 31))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))] - else: - if 55296 <= e < 57344: - e = (1023 & e) + 64 - i += 1 - o = 1023 & t.url(i) - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (240 | e >> 8 & 7))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 2 & 63))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | o >> 6 & 15 | (3 & e) << 4))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & o))] - else: - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (224 | e >> 12 & 15))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | e >> 6 & 63))] - r = unsigned_right_shitf(r, 8) ^ data[255 & (r ^ (128 | 63 & e))] - - return "https://ib.365yg.com{path}&s={param}".format(path=path, param=unsigned_right_shitf(r ^ -1, 0)) - - while 1: - url = tmp() - if url.split("=")[-1][0] != "-": # 参数s不能为负数 - return url - - -def ixigua_download(url, output_dir='.', merge=True, info_only=False, **kwargs): +def ixigua_download(url, output_dir='.', merge=True, info_only=False, stream_id='', **kwargs): # example url: https://www.ixigua.com/i6631065141750268420/#mid=63024814422 - resp = urlopen_with_retry(request.Request(url)) + headers['cookie'] = "MONITOR_WEB_ID=7892c49b-296e-4499-8704-e47c1b15123; " \ + "ixigua-a-s=1; ttcid=af99669b6304453480454f1507011d5c234; BD_REF=1; " \ + "__ac_nonce=060d88ff000a75e8d17eb; __ac_signature=_02B4Z6wo100f01kX9ZpgAAIDAKIBBQUIPYT5F2WIAAPG2ad; " \ + "ttwid=1%7CcIsVF_3vqSIk4XErhPB0H2VaTxT0tdsTMRbMjrJOPN8%7C1624806049%7C08ce7dd6f7d20506a41ba0a331ef96a6505d96731e6ad9f6c8c709f53f227ab1; " + + resp = urlopen_with_retry(request.Request(url, headers=headers)) html = resp.read().decode('utf-8') _cookies = [] for c in resp.getheader('Set-Cookie').split("httponly,"): _cookies.append(c.strip().split(' ')[0]) - headers['cookie'] = ' '.join(_cookies) + headers['cookie'] += ' '.join(_cookies) - conf = loads(match1(html, r"window\.config = (.+);")) - if not conf: - log.e("Get window.config from url failed, url: {}".format(url)) + match_txt = match1(html, r"', html) + m = re.match('(https?://)?([^/]+)(/.*)', url) + host = m.group(2) + if host != 'www.tiktok.com': # non-canonical URL + vid = r1(r'/video/(\d+)', url) + url = 'https://www.tiktok.com/@/video/%s/' % vid + host = 'www.tiktok.com' + else: + url = m.group(3).split('?')[0] + vid = url.split('/')[3] # should be a string of numbers + + html, set_cookie = getHttps(host, url, headers=headers) + tt_chain_token = r1('tt_chain_token=([^;]+);', set_cookie) + headers['Cookie'] = 'tt_chain_token=%s' % tt_chain_token + + data = r1(r'window\[\'SIGI_STATE\'\]=(.*?);window\[\'SIGI_RETRY\'\]', html) or \ + r1(r'', html) info = json.loads(data) - wid = info['props']['initialProps']['$wid'] - cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid) + downloadAddr = info['ItemModule'][vid]['video']['downloadAddr'] + author = info['ItemModule'][vid]['author'] # same as uniqueId + nickname = info['UserModule']['users'][author]['nickname'] + title = '%s [%s]' % (nickname or author, vid) - # here's the cookie - headers['Cookie'] = cookie - - # try again - html = get_content(url, headers=headers) - data = r1(r'', html) - info = json.loads(data) - wid = info['props']['initialProps']['$wid'] - cookie = 'tt_webid=%s; tt_webid_v2=%s' % (wid, wid) - - videoData = info['props']['pageProps']['itemInfo']['itemStruct'] - videoId = videoData['id'] - videoUrl = videoData['video']['downloadAddr'] - uniqueId = videoData['author'].get('uniqueId') - nickName = videoData['author'].get('nickname') - - title = '%s [%s]' % (nickName or uniqueId, videoId) - - # we also need the referer - headers['Referer'] = referUrl - - mime, ext, size = url_info(videoUrl, headers=headers) + mime, ext, size = url_info(downloadAddr, headers=headers) print_info(site_info, title, mime, size) if not info_only: - download_urls([videoUrl], title, ext, size, output_dir=output_dir, merge=merge, headers=headers) + download_urls([downloadAddr], title, ext, size, output_dir=output_dir, merge=merge, headers=headers) site_info = "TikTok.com" download = tiktok_download diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py index 23468211..4a439fe8 100644 --- a/src/you_get/extractors/twitter.py +++ b/src/you_get/extractors/twitter.py @@ -23,7 +23,7 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) if re.match(r'https?://mobile', url): # normalize mobile URL url = 'https://' + match1(url, r'//mobile\.(.+)') - if re.match(r'https?://twitter\.com/i/moments/', url): # moments + if re.match(r'https?://twitter\.com/i/moments/', url): # FIXME: moments html = get_html(url, faker=True) paths = re.findall(r'data-permalink-path="([^"]+)"', html) for path in paths: @@ -34,70 +34,48 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs) **kwargs) return - html = get_html(url, faker=True) # now it seems faker must be enabled - screen_name = r1(r'twitter\.com/([^/]+)', url) or r1(r'data-screen-name="([^"]*)"', html) or \ - r1(r'', video_page).group(1)) + except: + ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) stream_list = ytInitialPlayerResponse['streamingData']['formats'] #stream_list = ytInitialPlayerResponse['streamingData']['adaptiveFormats'] @@ -262,7 +262,10 @@ class YouTube(VideoExtractor): # Parse video page instead video_page = get_content('https://www.youtube.com/watch?v=%s' % self.vid) - ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) + try: # FIXME: we should extract ytInitialPlayerResponse more reliably + ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) + except: + ytInitialPlayerResponse = json.loads(re.search('ytInitialPlayerResponse\s*=\s*([^\n]+?});', video_page).group(1)) self.title = ytInitialPlayerResponse["videoDetails"]["title"] if re.search('([^"]*/base\.js)"', video_page): diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index e8639e89..4bbbd177 100755 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -128,7 +128,7 @@ def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'): def ffmpeg_concat_ts_to_mkv(files, output='output.mkv'): print('Merging video parts... ', end="", flush=True) - params = [FFMPEG] + LOGLEVEL + ['-isync', '-y', '-i'] + params = [FFMPEG] + LOGLEVEL + ['-y', '-i'] params.append('concat:') for file in files: if os.path.isfile(file): diff --git a/src/you_get/version.py b/src/you_get/version.py index e8b65aae..440488a9 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.1555' +__version__ = '0.4.1650' diff --git a/tests/test.py b/tests/test.py index f3d45b97..c0f3836a 100644 --- a/tests/test.py +++ b/tests/test.py @@ -10,7 +10,9 @@ from you_get.extractors import ( acfun, bilibili, soundcloud, - tiktok + tiktok, + twitter, + miaopai ) @@ -28,7 +30,7 @@ class YouGetTests(unittest.TestCase): youtube.download( 'http://www.youtube.com/watch?v=pzKerr0JIPA', info_only=True ) - youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True) + #youtube.download('http://youtu.be/pzKerr0JIPA', info_only=True) #youtube.download( # 'http://www.youtube.com/attribution_link?u=/watch?v%3DldAKIzq7bvs%26feature%3Dshare', # noqa # info_only=True @@ -40,8 +42,8 @@ class YouGetTests(unittest.TestCase): def test_acfun(self): acfun.download('https://www.acfun.cn/v/ac11701912', info_only=True) - def test_bilibili(self): - bilibili.download('https://space.bilibili.com/72270557/channel/seriesdetail?sid=218844', info_only=True) + #def test_bilibili(self): + # bilibili.download('https://www.bilibili.com/video/BV1sL4y177sC', info_only=True) #def test_soundcloud(self): ## single song @@ -53,11 +55,16 @@ class YouGetTests(unittest.TestCase): # 'https://soundcloud.com/anthony-flieger/sets/cytus', info_only=True #) - #def tests_tiktok(self): - # tiktok.download('https://www.tiktok.com/@nmb48_official/video/6850796940293164290', info_only=True) - # tiktok.download('https://t.tiktok.com/i18n/share/video/6850796940293164290/', info_only=True) - # tiktok.download('https://vt.tiktok.com/UGJR4R/', info_only=True) + def test_tiktok(self): + tiktok.download('https://www.tiktok.com/@nmb48_official/video/6850796940293164290', info_only=True) + tiktok.download('https://www.tiktok.com/@/video/6850796940293164290', info_only=True) + tiktok.download('https://t.tiktok.com/i18n/share/video/6850796940293164290/', info_only=True) + def test_twitter(self): + twitter.download('https://twitter.com/elonmusk/status/1530516552084234244', info_only=True) + + def test_weibo(self): + miaopai.download('https://video.weibo.com/show?fid=1034:4825403706245135', info_only=True) if __name__ == '__main__': unittest.main() diff --git a/you-get.json b/you-get.json index a9ef1dd5..bb94ba00 100644 --- a/you-get.json +++ b/you-get.json @@ -18,11 +18,6 @@ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.2", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9",