diff --git a/README.md b/README.md index a99e57fd..40a26803 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Interested? [Install it](#installation) now and [get started by examples](#getti Are you a Python programmer? Then check out [the source](https://github.com/soimort/you-get) and fork it! -![](http://i.imgur.com/GfthFAz.png) +![](https://i.imgur.com/GfthFAz.png) ## Installation @@ -128,7 +128,7 @@ $ you-get https://github.com/soimort/you-get/archive/master.zip or use [chocolatey package manager](https://chocolatey.org): ``` -> choco upgrade you-get +> choco upgrade you-get ``` In order to get the latest ```develop``` branch without messing up the PIP, you can try: @@ -339,6 +339,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | **Tumblr** | |✓|✓|✓| | TED | |✓| | | | SoundCloud | | | |✓| +| SHOWROOM | |✓| | | | Pinterest | | |✓| | | MusicPlayOn | |✓| | | | MTV81 | |✓| | | @@ -372,7 +373,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 爆米花网 | |✓| | | | **bilibili
哔哩哔哩** | |✓| | | | Dilidili | |✓| | | -| 豆瓣 | | | |✓| +| 豆瓣 | |✓| |✓| | 斗鱼 | |✓| | | | Panda
熊猫 | |✓| | | | 凤凰视频 | |✓| | | @@ -406,6 +407,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the | 花瓣 | | |✓| | | Naver
네이버 | |✓| | | | 芒果TV | |✓| | | +| 火猫TV | |✓| | | For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page. diff --git a/src/you_get/common.py b/src/you_get/common.py index c2b585a6..9faaa939 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -27,7 +27,9 @@ SITES = { 'google' : 'google', 'heavy-music' : 'heavymusic', 'huaban' : 'huaban', + 'huomao' : 'huomaotv', 'iask' : 'sina', + 'icourses' : 'icourses', 'ifeng' : 'ifeng', 'imgur' : 'imgur', 'in' : 'alive', @@ -340,6 +342,45 @@ def get_content(url, headers={}, decoded=True): return data +def post_content(url, headers={}, post_data={}, decoded=True): + """Post the content of a URL via sending a HTTP POST request. + + Args: + url: A URL. + headers: Request headers used by the client. + decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type. + + Returns: + The content as a string. + """ + + logging.debug('post_content: %s \n post_data: %s' % (url, post_data)) + + req = request.Request(url, headers=headers) + if cookies: + cookies.add_cookie_header(req) + req.headers.update(req.unredirected_hdrs) + post_data_enc = bytes(parse.urlencode(post_data), 'utf-8') + response = request.urlopen(req, data = post_data_enc) + data = response.read() + + # Handle HTTP compression for gzip and deflate (zlib) + content_encoding = response.getheader('Content-Encoding') + if content_encoding == 'gzip': + data = ungzip(data) + elif content_encoding == 'deflate': + data = undeflate(data) + + # Decode the response body + if decoded: + charset = match1(response.getheader('Content-Type'), r'charset=([\w-]+)') + if charset is not None: + data = data.decode(charset) + else: + data = data.decode('utf-8') + + return data + def url_size(url, faker = False, headers = {}): if faker: response = request.urlopen(request.Request(url, headers = fake_headers), None) @@ -507,7 +548,11 @@ def url_save(url, filepath, bar, refer = None, is_part = False, faker = False, h os.remove(filepath) # on Windows rename could fail if destination filepath exists os.rename(temp_filepath, filepath) -def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = False, headers = {}): +def url_save_chunked(url, filepath, bar, dyn_callback=None, chunk_size=0, ignore_range=False, refer=None, is_part=False, faker=False, headers={}): + def dyn_update_url(received): + if callable(dyn_callback): + logging.debug('Calling callback %s for new URL from %s' % (dyn_callback.__name__, received)) + return dyn_callback(received) if os.path.exists(filepath): if not force: if not is_part: @@ -545,19 +590,26 @@ def url_save_chunked(url, filepath, bar, refer = None, is_part = False, faker = else: headers = {} if received: - headers['Range'] = 'bytes=' + str(received) + '-' + url = dyn_update_url(received) + if not ignore_range: + headers['Range'] = 'bytes=' + str(received) + '-' if refer: headers['Referer'] = refer - response = request.urlopen(request.Request(url, headers = headers), None) + response = request.urlopen(request.Request(url, headers=headers), None) with open(temp_filepath, open_mode) as output: + this_chunk = received while True: buffer = response.read(1024 * 256) if not buffer: break output.write(buffer) received += len(buffer) + if chunk_size and (received - this_chunk) >= chunk_size: + url = dyn_callback(received) + this_chunk = received + response = request.urlopen(request.Request(url, headers=headers), None) if bar: bar.update_received(len(buffer)) @@ -806,7 +858,7 @@ def download_urls(urls, title, ext, total_size, output_dir='.', refer=None, merg print() -def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}): +def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=None, merge=True, faker=False, headers = {}, **kwargs): assert urls if dry_run: print('Real URLs:\n%s\n' % urls) @@ -820,7 +872,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No filename = '%s.%s' % (title, ext) filepath = os.path.join(output_dir, filename) - if total_size and ext in ('ts'): + if total_size: if not force and os.path.exists(filepath[:-3] + '.mkv'): print('Skipping %s: file already exists' % filepath[:-3] + '.mkv') print() @@ -835,7 +887,7 @@ def download_urls_chunked(urls, title, ext, total_size, output_dir='.', refer=No print('Downloading %s ...' % tr(filename)) filepath = os.path.join(output_dir, filename) parts.append(filepath) - url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers) + url_save_chunked(url, filepath, bar, refer = refer, faker = faker, headers = headers, **kwargs) bar.done() if not merge: diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py index e69bc2fd..61b6a0d1 100755 --- a/src/you_get/extractors/__init__.py +++ b/src/you_get/extractors/__init__.py @@ -24,6 +24,7 @@ from .funshion import * from .google import * from .heavymusic import * from .huaban import * +from .icourses import * from .ifeng import * from .imgur import * from .infoq import * diff --git a/src/you_get/extractors/acfun.py b/src/you_get/extractors/acfun.py index 4638cb8f..87e005fb 100644 --- a/src/you_get/extractors/acfun.py +++ b/src/you_get/extractors/acfun.py @@ -73,14 +73,14 @@ def acfun_download(url, output_dir='.', merge=True, info_only=False, **kwargs): assert re.match(r'http://[^\.]+.acfun.[^\.]+/\D/\D\D(\d+)', url) html = get_html(url) - title = r1(r'

([^<>]+)<', html) + title = r1(r'data-title="([^"]+)"', html) title = unescape_html(title) title = escape_file_path(title) assert title - video = re.search('data-vid="(\d+)"\s*data-scode=""[^<]*title="([^"]+)"', html) - vid = video.group(1) - title = title + ' - ' + video.group(2) + vid = r1('data-vid="(\d+)"', html) + up = r1('data-name="([^"]+)"', html) + title = title + ' - ' + up acfun_download_by_vid(vid, title, output_dir=output_dir, merge=merge, diff --git a/src/you_get/extractors/baidu.py b/src/you_get/extractors/baidu.py index aa9caa0c..d5efaf0b 100644 --- a/src/you_get/extractors/baidu.py +++ b/src/you_get/extractors/baidu.py @@ -7,8 +7,10 @@ from ..common import * from .embed import * from .universal import * + def baidu_get_song_data(sid): - data = json.loads(get_html('http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker = True))['data'] + data = json.loads(get_html( + 'http://music.baidu.com/data/music/fmlink?songIds=%s' % sid, faker=True))['data'] if data['xcode'] != '': # inside china mainland @@ -17,22 +19,28 @@ def baidu_get_song_data(sid): # outside china mainland return None + def baidu_get_song_url(data): return data['songLink'] + def baidu_get_song_artist(data): return data['artistName'] + def baidu_get_song_album(data): return data['albumName'] + def baidu_get_song_title(data): return data['songName'] + def baidu_get_song_lyric(data): lrc = data['lrcLink'] return None if lrc is '' else "http://music.baidu.com%s" % lrc + def baidu_download_song(sid, output_dir='.', merge=True, info_only=False): data = baidu_get_song_data(sid) if data is not None: @@ -51,7 +59,8 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False): type, ext, size = url_info(url, faker=True) print_info(site_info, title, type, size) if not info_only: - download_urls([url], file_name, ext, size, output_dir, merge=merge, faker=True) + download_urls([url], file_name, ext, size, + output_dir, merge=merge, faker=True) try: type, ext, size = url_info(lrc, faker=True) @@ -61,12 +70,14 @@ def baidu_download_song(sid, output_dir='.', merge=True, info_only=False): except: pass -def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False): - html = get_html('http://music.baidu.com/album/%s' % aid, faker = True) + +def baidu_download_album(aid, output_dir='.', merge=True, info_only=False): + html = get_html('http://music.baidu.com/album/%s' % aid, faker=True) album_name = r1(r'

(.+?)<\/h2>', html) artist = r1(r'', html) output_dir = '%s/%s - %s' % (output_dir, artist, album_name) - ids = json.loads(r1(r'', html).replace('"', '').replace(';', '"'))['ids'] + ids = json.loads(r1(r'', + html).replace('"', '').replace(';', '"'))['ids'] track_nr = 1 for id in ids: song_data = baidu_get_song_data(id) @@ -75,38 +86,29 @@ def baidu_download_album(aid, output_dir = '.', merge = True, info_only = False) song_lrc = baidu_get_song_lyric(song_data) file_name = '%02d.%s' % (track_nr, song_title) - type, ext, size = url_info(song_url, faker = True) + type, ext, size = url_info(song_url, faker=True) print_info(site_info, song_title, type, size) if not info_only: - download_urls([song_url], file_name, ext, size, output_dir, merge = merge, faker = True) + download_urls([song_url], file_name, ext, size, + output_dir, merge=merge, faker=True) if song_lrc: - type, ext, size = url_info(song_lrc, faker = True) + type, ext, size = url_info(song_lrc, faker=True) print_info(site_info, song_title, type, size) if not info_only: - download_urls([song_lrc], file_name, ext, size, output_dir, faker = True) + download_urls([song_lrc], file_name, ext, + size, output_dir, faker=True) track_nr += 1 -def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info_only = False, **kwargs): - if re.match(r'http://imgsrc.baidu.com', url): - universal_download(url, output_dir, merge=merge, info_only=info_only) - return - elif re.match(r'http://pan.baidu.com', url): - html = get_html(url) +def baidu_download(url, output_dir='.', stream_type=None, merge=True, info_only=False, **kwargs): - title = r1(r'server_filename="([^"]+)"', html) - if len(title.split('.')) > 1: - title = ".".join(title.split('.')[:-1]) - - real_url = r1(r'\\"dlink\\":\\"([^"]*)\\"', html).replace('\\\\/', '/') - type, ext, size = url_info(real_url, faker = True) - - print_info(site_info, title, ext, size) + if re.match(r'http://pan.baidu.com', url): + real_url, title, ext, size = baidu_pan_download(url) if not info_only: - download_urls([real_url], title, ext, size, output_dir, merge = merge) - + download_urls([real_url], title, ext, size, + output_dir, url, merge=merge, faker=True) elif re.match(r'http://music.baidu.com/album/\d+', url): id = r1(r'http://music.baidu.com/album/(\d+)', url) baidu_download_album(id, output_dir, merge, info_only) @@ -124,17 +126,20 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info html = get_html(url) title = r1(r'title:"([^"]+)"', html) - items = re.findall(r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html) + items = re.findall( + r'//imgsrc.baidu.com/forum/w[^"]+/([^/"]+)', html) urls = ['http://imgsrc.baidu.com/forum/pic/item/' + i for i in set(items)] # handle albums kw = r1(r'kw=([^&]+)', html) or r1(r"kw:'([^']+)'", html) tid = r1(r'tid=(\d+)', html) or r1(r"tid:'([^']+)'", html) - album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % (kw, tid) + album_url = 'http://tieba.baidu.com/photo/g/bw/picture/list?kw=%s&tid=%s' % ( + kw, tid) album_info = json.loads(get_content(album_url)) for i in album_info['data']['pic_list']: - urls.append('http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg') + urls.append( + 'http://imgsrc.baidu.com/forum/pic/item/' + i['pic_id'] + '.jpg') ext = 'jpg' size = float('Inf') @@ -144,6 +149,170 @@ def baidu_download(url, output_dir = '.', stream_type = None, merge = True, info download_urls(urls, title, ext, size, output_dir=output_dir, merge=False) + +def baidu_pan_download(url): + errno_patt = r'errno":([^"]+),' + refer_url = "" + fake_headers = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'UTF-8,*;q=0.5', + 'Accept-Encoding': 'gzip,deflate,sdch', + 'Accept-Language': 'en-US,en;q=0.8', + 'Host': 'pan.baidu.com', + 'Origin': 'http://pan.baidu.com', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36', + 'Referer': refer_url + } + if cookies: + print('Use user specified cookies') + else: + print('Generating cookies...') + fake_headers['Cookie'] = baidu_pan_gen_cookies(url) + refer_url = "http://pan.baidu.com" + html = get_content(url, fake_headers, decoded=True) + isprotected = False + sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse( + html) + if sign == None: + if re.findall(r'\baccess-code\b', html): + isprotected = True + sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk = baidu_pan_protected_share( + url) + # raise NotImplementedError("Password required!") + if isprotected != True: + raise AssertionError("Share not found or canceled: %s" % url) + if bdstoken == None: + bdstoken = "" + if isprotected != True: + sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse( + html) + request_url = "http://pan.baidu.com/api/sharedownload?sign=%s×tamp=%s&bdstoken=%s&channel=chunlei&clienttype=0&web=1&app_id=%s" % ( + sign, timestamp, bdstoken, appid) + refer_url = url + post_data = { + 'encrypt': 0, + 'product': 'share', + 'uk': uk, + 'primaryid': primary_id, + 'fid_list': '[' + fs_id + ']' + } + if isprotected == True: + post_data['sekey'] = psk + response_content = post_content(request_url, fake_headers, post_data, True) + errno = match1(response_content, errno_patt) + if errno != "0": + raise AssertionError( + "Server refused to provide download link! (Errno:%s)" % errno) + real_url = r1(r'dlink":"([^"]+)"', response_content).replace('\\/', '/') + title = r1(r'server_filename":"([^"]+)"', response_content) + assert real_url + type, ext, size = url_info(real_url, faker=True) + title_wrapped = json.loads('{"wrapper":"%s"}' % title) + title = title_wrapped['wrapper'] + logging.debug(real_url) + print_info(site_info, title, ext, size) + print('Hold on...') + time.sleep(5) + return real_url, title, ext, size + + +def baidu_pan_parse(html): + sign_patt = r'sign":"([^"]+)"' + timestamp_patt = r'timestamp":([^"]+),' + appid_patt = r'app_id":"([^"]+)"' + bdstoken_patt = r'bdstoken":"([^"]+)"' + fs_id_patt = r'fs_id":([^"]+),' + uk_patt = r'uk":([^"]+),' + errno_patt = r'errno":([^"]+),' + primary_id_patt = r'shareid":([^"]+),' + sign = match1(html, sign_patt) + timestamp = match1(html, timestamp_patt) + appid = match1(html, appid_patt) + bdstoken = match1(html, bdstoken_patt) + fs_id = match1(html, fs_id_patt) + uk = match1(html, uk_patt) + primary_id = match1(html, primary_id_patt) + return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk + + +def baidu_pan_gen_cookies(url, post_data=None): + from http import cookiejar + cookiejar = cookiejar.CookieJar() + opener = request.build_opener(request.HTTPCookieProcessor(cookiejar)) + resp = opener.open('http://pan.baidu.com') + if post_data != None: + resp = opener.open(url, bytes(parse.urlencode(post_data), 'utf-8')) + return cookjar2hdr(cookiejar) + + +def baidu_pan_protected_share(url): + print('This share is protected by password!') + inpwd = input('Please provide unlock password: ') + inpwd = inpwd.replace(' ', '').replace('\t', '') + print('Please wait...') + post_pwd = { + 'pwd': inpwd, + 'vcode': None, + 'vstr': None + } + from http import cookiejar + import time + cookiejar = cookiejar.CookieJar() + opener = request.build_opener(request.HTTPCookieProcessor(cookiejar)) + resp = opener.open('http://pan.baidu.com') + resp = opener.open(url) + init_url = resp.geturl() + verify_url = 'http://pan.baidu.com/share/verify?%s&t=%s&channel=chunlei&clienttype=0&web=1' % ( + init_url.split('?', 1)[1], int(time.time())) + refer_url = init_url + fake_headers = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'UTF-8,*;q=0.5', + 'Accept-Encoding': 'gzip,deflate,sdch', + 'Accept-Language': 'en-US,en;q=0.8', + 'Host': 'pan.baidu.com', + 'Origin': 'http://pan.baidu.com', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:13.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2500.0 Safari/537.36', + 'Referer': refer_url + } + opener.addheaders = dict2triplet(fake_headers) + pwd_resp = opener.open(verify_url, bytes( + parse.urlencode(post_pwd), 'utf-8')) + pwd_resp_str = ungzip(pwd_resp.read()).decode('utf-8') + pwd_res = json.loads(pwd_resp_str) + if pwd_res['errno'] != 0: + raise AssertionError( + 'Server returned an error: %s (Incorrect password?)' % pwd_res['errno']) + pg_resp = opener.open('http://pan.baidu.com/share/link?%s' % + init_url.split('?', 1)[1]) + content = ungzip(pg_resp.read()).decode('utf-8') + sign, timestamp, bdstoken, appid, primary_id, fs_id, uk = baidu_pan_parse( + content) + psk = query_cookiejar(cookiejar, 'BDCLND') + psk = parse.unquote(psk) + fake_headers['Cookie'] = cookjar2hdr(cookiejar) + return sign, timestamp, bdstoken, appid, primary_id, fs_id, uk, fake_headers, psk + + +def cookjar2hdr(cookiejar): + cookie_str = '' + for i in cookiejar: + cookie_str = cookie_str + i.name + '=' + i.value + ';' + return cookie_str[:-1] + + +def query_cookiejar(cookiejar, name): + for i in cookiejar: + if i.name == name: + return i.value + + +def dict2triplet(dictin): + out_triplet = [] + for i in dictin: + out_triplet.append((i, dictin[i])) + return out_triplet + site_info = "Baidu.com" download = baidu_download download_playlist = playlist_not_supported("baidu") diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index c18290b8..122dea0b 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -119,66 +119,70 @@ def bilibili_live_download_by_cid(cid, title, output_dir='.', merge=True, info_o def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) - if re.match(r'https?://bangumi\.bilibili\.com/', url): - # quick hack for bangumi URLs - url = r1(r'"([^"]+)" class="v-av-link"', html) - html = get_content(url) - title = r1_of([r'', r']*>\s*([^<>]+)\s*

'], html) if title: title = unescape_html(title) title = escape_file_path(title) - flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', - r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) - assert flashvars - flashvars = flashvars.replace(': ', '=') - t, cid = flashvars.split('=', 1) - cid = cid.split('&')[0] - if t == 'cid': - if re.match(r'https?://live\.bilibili\.com/', url): - title = r1(r'\s*([^<>]+)\s*', html) - bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) + if re.match(r'https?://bangumi\.bilibili\.com/', url): + # quick hack for bangumi URLs + episode_id = r1(r'data-current-episode-id="(\d+)"', html) + cont = post_content('http://bangumi.bilibili.com/web_api/get_source', + post_data={'episode_id': episode_id}) + cid = json.loads(cont)['result']['cid'] + bilibili_download_by_cid(str(cid), title, output_dir=output_dir, merge=merge, info_only=info_only) - else: - # multi-P - cids = [] - pages = re.findall('', html) - for i, page in enumerate(pages): - html = get_html("http://www.bilibili.com%s" % page) - flashvars = r1_of([r'(cid=\d+)', - r'flashvars="([^"]+)"', - r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) - if flashvars: - t, cid = flashvars.split('=', 1) - cids.append(cid.split('&')[0]) - if url.endswith(page): - cids = [cid.split('&')[0]] - titles = [titles[i]] - break - - # no multi-P - if not pages: - cids = [cid] - titles = [r1(r'', html) or title] - - for i in range(len(cids)): - bilibili_download_by_cid(cids[i], - titles[i], - output_dir=output_dir, - merge=merge, - info_only=info_only) - - elif t == 'vid': - sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) - elif t == 'ykid': - youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) - elif t == 'uid': - tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) else: - raise NotImplementedError(flashvars) + flashvars = r1_of([r'(cid=\d+)', r'(cid: \d+)', r'flashvars="([^"]+)"', + r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) + assert flashvars + flashvars = flashvars.replace(': ', '=') + t, cid = flashvars.split('=', 1) + cid = cid.split('&')[0] + if t == 'cid': + if re.match(r'https?://live\.bilibili\.com/', url): + title = r1(r'\s*([^<>]+)\s*', html) + bilibili_live_download_by_cid(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) + + else: + # multi-P + cids = [] + pages = re.findall('', html) + for i, page in enumerate(pages): + html = get_html("http://www.bilibili.com%s" % page) + flashvars = r1_of([r'(cid=\d+)', + r'flashvars="([^"]+)"', + r'"https://[a-z]+\.bilibili\.com/secure,(cid=\d+)(?:&aid=\d+)?"'], html) + if flashvars: + t, cid = flashvars.split('=', 1) + cids.append(cid.split('&')[0]) + if url.endswith(page): + cids = [cid.split('&')[0]] + titles = [titles[i]] + break + + # no multi-P + if not pages: + cids = [cid] + titles = [r1(r'', html) or title] + + for i in range(len(cids)): + bilibili_download_by_cid(cids[i], + titles[i], + output_dir=output_dir, + merge=merge, + info_only=info_only) + + elif t == 'vid': + sina_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + elif t == 'ykid': + youku_download_by_vid(cid, title=title, output_dir=output_dir, merge=merge, info_only=info_only) + elif t == 'uid': + tudou_download_by_id(cid, title, output_dir=output_dir, merge=merge, info_only=info_only) + else: + raise NotImplementedError(flashvars) if not info_only and not dry_run: if not kwargs['caption']: diff --git a/src/you_get/extractors/dailymotion.py b/src/you_get/extractors/dailymotion.py index 8b701cd1..2e96c160 100644 --- a/src/you_get/extractors/dailymotion.py +++ b/src/you_get/extractors/dailymotion.py @@ -4,6 +4,11 @@ __all__ = ['dailymotion_download'] from ..common import * +def extract_m3u(url): + content = get_content(url) + m3u_url = re.findall(r'http://.*', content)[0] + return match1(m3u_url, r'([^#]+)') + def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): """Downloads Dailymotion videos by URL. """ @@ -13,7 +18,7 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \ match1(html, r'"title"\s*:\s*"([^"]+)"') - for quality in ['720','480','380','240','auto']: + for quality in ['1080','720','480','380','240','auto']: try: real_url = info[quality][0]["url"] if real_url: @@ -21,11 +26,12 @@ def dailymotion_download(url, output_dir = '.', merge = True, info_only = False, except KeyError: pass - type, ext, size = url_info(real_url) + m3u_url = extract_m3u(real_url) + mime, ext, size = 'video/mp4', 'mp4', 0 - print_info(site_info, title, type, size) + print_info(site_info, title, mime, size) if not info_only: - download_urls([real_url], title, ext, size, output_dir, merge = merge) + download_url_ffmpeg(m3u_url, title, ext, output_dir=output_dir, merge=merge) site_info = "Dailymotion.com" download = dailymotion_download diff --git a/src/you_get/extractors/douban.py b/src/you_get/extractors/douban.py index 187e99c0..1a4a67d1 100644 --- a/src/you_get/extractors/douban.py +++ b/src/you_get/extractors/douban.py @@ -7,12 +7,23 @@ from ..common import * def douban_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html = get_html(url) - if 'subject' in url: + + if re.match(r'https?://movie', url): + title = match1(html, 'name="description" content="([^"]+)') + tid = match1(url, 'trailer/(\d+)') + real_url = 'https://movie.douban.com/trailer/video_url?tid=%s' % tid + type, ext, size = url_info(real_url) + + print_info(site_info, title, type, size) + if not info_only: + download_urls([real_url], title, ext, size, output_dir, merge = merge) + + elif 'subject' in url: titles = re.findall(r'data-title="([^"]*)">', html) song_id = re.findall(r'
  • ([^<]{1,9999})') + + print_info(site_info, title, 'm3u8', float('inf')) + + if not info_only: + download_url_ffmpeg(m3u8_url, title, 'm3u8', None, output_dir=output_dir, merge=merge) + + +site_info = 'huomao.com' +download = huomaotv_download +download_playlist = playlist_not_supported('huomao') diff --git a/src/you_get/extractors/icourses.py b/src/you_get/extractors/icourses.py new file mode 100644 index 00000000..cb2ff74a --- /dev/null +++ b/src/you_get/extractors/icourses.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +from ..common import * +from urllib import parse +import random +from time import sleep +import xml.etree.ElementTree as ET +import datetime +import hashlib +import base64 +import logging +from urllib import error +import re + +__all__ = ['icourses_download'] + + +def icourses_download(url, merge=False, output_dir='.', **kwargs): + icourses_parser = ICousesExactor(url=url) + real_url = icourses_parser.icourses_cn_url_parser(**kwargs) + title = icourses_parser.title + if real_url is not None: + for tries in range(0, 5): + try: + _, type_, size = url_info(real_url, faker=True) + break + except error.HTTPError: + logging.warning('Failed to fetch the video file! Retrying...') + sleep(random.Random().randint(0, 5)) # Prevent from blockage + real_url = icourses_parser.icourses_cn_url_parser() + title = icourses_parser.title + print_info(site_info, title, type_, size) + if not kwargs['info_only']: + download_urls_chunked([real_url], title, 'flv', + total_size=size, output_dir=output_dir, refer=url, merge=merge, faker=True, ignore_range=True, chunk_size=15000000, dyn_callback=icourses_parser.icourses_cn_url_parser) + + +# Why not using VideoExtractor: This site needs specical download method +class ICousesExactor(object): + + def __init__(self, url): + self.url = url + self.title = '' + return + + def icourses_playlist_download(self, **kwargs): + html = get_content(self.url) + page_type_patt = r'showSectionNode\(this,(\d+),(\d+)\)' + video_js_number = r'changeforvideo\((.*?)\)' + fs_flag = r'' + page_navi_vars = re.search(pattern=page_type_patt, string=html) + dummy_page = 'http://www.icourses.cn/jpk/viewCharacterDetail.action?sectionId={}&courseId={}'.format( + page_navi_vars.group(2), page_navi_vars.group(1)) + html = get_content(dummy_page) + fs_status = match1(html, fs_flag) + video_list = re.findall(pattern=video_js_number, string=html) + for video in video_list: + video_args = video.replace('\'', '').split(',') + video_url = 'http://www.icourses.cn/jpk/changeforVideo.action?resId={}&courseId={}&firstShowFlag={}'.format( + video_args[0], video_args[1], fs_status or '1') + sleep(random.Random().randint(0, 5)) # Prevent from blockage + icourses_download(video_url, **kwargs) + + def icourses_cn_url_parser(self, received=0, **kwargs): + PLAYER_BASE_VER = '150606-1' + ENCRYPT_MOD_VER = '151020' + ENCRYPT_SALT = '3DAPmXsZ4o' # It took really long time to find this... + html = get_content(self.url) + if re.search(pattern=r'showSectionNode\(.*\)', string=html): + logging.warning('Switching to playlist mode!') + return self.icourses_playlist_download(**kwargs) + flashvars_patt = r'var\ flashvars\=((.|\n)*)};' + server_time_patt = r'MPlayer.swf\?v\=(\d+)' + uuid_patt = r'uuid:(\d+)' + other_args_patt = r'other:"(.*)"' + res_url_patt = r'IService:\'([^\']+)' + title_a_patt = r'
    (.*?)' + title_b_patt = r'
    ((.|\n)*?)
    ' + title_a = match1(html, title_a_patt).strip() + title_b = match1(html, title_b_patt).strip() + title = title_a + title_b # WIP, FIXME + title = re.sub('( +|\n|\t|\r|\ \;)', '', + unescape_html(title).replace(' ', '')) + server_time = match1(html, server_time_patt) + flashvars = match1(html, flashvars_patt) + uuid = match1(flashvars, uuid_patt) + other_args = match1(flashvars, other_args_patt) + res_url = match1(flashvars, res_url_patt) + url_parts = {'v': server_time, 'other': other_args, + 'uuid': uuid, 'IService': res_url} + req_url = '%s?%s' % (res_url, parse.urlencode(url_parts)) + logging.debug('Requesting video resource location...') + xml_resp = get_html(req_url) + xml_obj = ET.fromstring(xml_resp) + logging.debug('The result was {}'.format(xml_obj.get('status'))) + if xml_obj.get('status') != 'success': + raise ValueError('Server returned error!') + if received: + play_type = 'seek' + else: + play_type = 'play' + received -= 1 + common_args = {'lv': PLAYER_BASE_VER, 'ls': play_type, + 'lt': datetime.datetime.now().strftime('%m-%d/%H:%M:%S'), + 'start': received + 1} + media_host = xml_obj.find(".//*[@name='host']").text + media_url = media_host + xml_obj.find(".//*[@name='url']").text + # This is what they called `SSLModule`... But obviously, just a kind of + # encryption, takes absolutely no effect in protecting data intergrity + if xml_obj.find(".//*[@name='ssl']").text != 'true': + logging.debug('The encryption mode is disabled') + # when the so-called `SSLMode` is not activated, the parameters, `h` + # and `p` can be found in response + arg_h = xml_obj.find(".//*[@name='h']").text + assert arg_h + arg_r = xml_obj.find(".//*[@name='p']").text or ENCRYPT_MOD_VER + url_args = common_args.copy() + url_args.update({'h': arg_h, 'r': arg_r}) + final_url = '{}?{}'.format( + media_url, parse.urlencode(url_args)) + self.title = title + return final_url + # when the `SSLMode` is activated, we need to receive the timestamp and the + # time offset (?) value from the server + logging.debug('The encryption mode is in effect') + ssl_callback = get_html( + '{}/ssl/ssl.shtml'.format(media_host)).split(',') + ssl_timestamp = int(datetime.datetime.strptime( + ssl_callback[1], "%b %d %H:%M:%S %Y").timestamp() + int(ssl_callback[0])) + sign_this = ENCRYPT_SALT + \ + parse.urlparse(media_url).path + str(ssl_timestamp) + arg_h = base64.b64encode(hashlib.md5( + bytes(sign_this, 'utf-8')).digest()) + # Post-processing, may subject to change, so leaving this alone... + arg_h = arg_h.decode('utf-8').strip('=').replace('+', + '-').replace('/', '_') + arg_r = ssl_timestamp + url_args = common_args.copy() + url_args.update({'h': arg_h, 'r': arg_r, 'p': ENCRYPT_MOD_VER}) + final_url = '{}?{}'.format( + media_url, parse.urlencode(url_args)) + logging.debug('Crafted URL: {}'.format(final_url)) + self.title = title + return final_url + + +site_info = 'icourses.cn' +download = icourses_download +# download_playlist = icourses_playlist_download diff --git a/src/you_get/extractors/mgtv.py b/src/you_get/extractors/mgtv.py index aeb42490..3ce62efe 100644 --- a/src/you_get/extractors/mgtv.py +++ b/src/you_get/extractors/mgtv.py @@ -12,11 +12,11 @@ import re class MGTV(VideoExtractor): name = "芒果 (MGTV)" - # Last updated: 2015-11-24 + # Last updated: 2016-11-13 stream_types = [ - {'id': 'hd', 'container': 'flv', 'video_profile': '超清'}, - {'id': 'sd', 'container': 'flv', 'video_profile': '高清'}, - {'id': 'ld', 'container': 'flv', 'video_profile': '标清'}, + {'id': 'hd', 'container': 'ts', 'video_profile': '超清'}, + {'id': 'sd', 'container': 'ts', 'video_profile': '高清'}, + {'id': 'ld', 'container': 'ts', 'video_profile': '标清'}, ] id_dic = {i['video_profile']:(i['id']) for i in stream_types} @@ -27,7 +27,7 @@ class MGTV(VideoExtractor): def get_vid_from_url(url): """Extracts video ID from URL. """ - return match1(url, 'http://www.mgtv.com/v/\d/\d+/\w+/(\d+).html') + return match1(url, 'http://www.mgtv.com/b/\d+/(\d+).html') #---------------------------------------------------------------------- @staticmethod @@ -44,10 +44,15 @@ class MGTV(VideoExtractor): content = get_content(content['info']) #get the REAL M3U url, maybe to be changed later? segment_list = [] + segments_size = 0 for i in content.split(): if not i.startswith('#'): #not the best way, better we use the m3u8 package segment_list.append(base_url + i) - return segment_list + # use ext-info for fast size calculate + elif i.startswith('#EXT-MGTV-File-SIZE:'): + segments_size += int(i[i.rfind(':')+1:]) + + return m3u_url, segments_size, segment_list def download_playlist_by_url(self, url, **kwargs): pass @@ -69,28 +74,25 @@ class MGTV(VideoExtractor): quality_id = self.id_dic[s['video_profile']] url = stream_available[s['video_profile']] url = re.sub( r'(\&arange\=\d+)', '', url) #Un-Hum - segment_list_this = self.get_mgtv_real_url(url) - - container_this_stream = '' - size_this_stream = 0 + m3u8_url, m3u8_size, segment_list_this = self.get_mgtv_real_url(url) + stream_fileid_list = [] for i in segment_list_this: - _, container_this_stream, size_this_seg = url_info(i) - size_this_stream += size_this_seg stream_fileid_list.append(os.path.basename(i).split('.')[0]) - + #make pieces pieces = [] for i in zip(stream_fileid_list, segment_list_this): pieces.append({'fileid': i[0], 'segs': i[1],}) self.streams[quality_id] = { - 'container': 'flv', + 'container': s['container'], 'video_profile': s['video_profile'], - 'size': size_this_stream, - 'pieces': pieces + 'size': m3u8_size, + 'pieces': pieces, + 'm3u8_url': m3u8_url } - + if not kwargs['info_only']: self.streams[quality_id]['src'] = segment_list_this @@ -107,6 +109,44 @@ class MGTV(VideoExtractor): # Extract stream with the best quality stream_id = self.streams_sorted[0]['id'] + def download(self, **kwargs): + + if 'stream_id' in kwargs and kwargs['stream_id']: + stream_id = kwargs['stream_id'] + else: + stream_id = 'null' + + # print video info only + if 'info_only' in kwargs and kwargs['info_only']: + if stream_id != 'null': + if 'index' not in kwargs: + self.p(stream_id) + else: + self.p_i(stream_id) + else: + # Display all available streams + if 'index' not in kwargs: + self.p([]) + else: + stream_id = self.streams_sorted[0]['id'] if 'id' in self.streams_sorted[0] else self.streams_sorted[0]['itag'] + self.p_i(stream_id) + + # default to use the best quality + if stream_id == 'null': + stream_id = self.streams_sorted[0]['id'] + + stream_info = self.streams[stream_id] + + if not kwargs['info_only']: + if player: + # with m3u8 format because some video player can process urls automatically (e.g. mpv) + launch_player(player, [stream_info['m3u8_url']]) + else: + download_urls(stream_info['src'], self.title, stream_info['container'], stream_info['size'], + output_dir=kwargs['output_dir'], + merge=kwargs['merge'], + av=stream_id in self.dash_streams) + site = MGTV() download = site.download_by_url download_playlist = site.download_playlist_by_url \ No newline at end of file diff --git a/src/you_get/extractors/netease.py b/src/you_get/extractors/netease.py index 63ee59b8..d5f3b1fa 100644 --- a/src/you_get/extractors/netease.py +++ b/src/you_get/extractors/netease.py @@ -54,13 +54,15 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals os.mkdir(new_dir) cover_url = j['result']['coverImgUrl'] download_urls([cover_url], "cover", "jpg", 0, new_dir) - - for i in j['result']['tracks']: - netease_song_download(i, output_dir=new_dir, info_only=info_only) + + prefix_width = len(str(len(j['result']['tracks']))) + for n, i in enumerate(j['result']['tracks']): + playlist_prefix = '%%.%dd_' % prefix_width % n + netease_song_download(i, output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix) try: # download lyrics assert kwargs['caption'] l = loads(get_content("http://music.163.com/api/song/lyric/?id=%s&lv=-1&csrf_token=" % i['id'], headers={"Referer": "http://music.163.com/"})) - netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only) + netease_lyric_download(i, l["lrc"]["lyric"], output_dir=new_dir, info_only=info_only, playlist_prefix=playlist_prefix) except: pass elif "song" in url: @@ -85,10 +87,10 @@ def netease_cloud_music_download(url, output_dir='.', merge=True, info_only=Fals j = loads(get_content("http://music.163.com/api/mv/detail/?id=%s&ids=[%s]&csrf_token=" % (rid, rid), headers={"Referer": "http://music.163.com/"})) netease_video_download(j['data'], output_dir=output_dir, info_only=info_only) -def netease_lyric_download(song, lyric, output_dir='.', info_only=False): +def netease_lyric_download(song, lyric, output_dir='.', info_only=False, playlist_prefix=""): if info_only: return - title = "%s. %s" % (song['position'], song['name']) + title = "%s%s. %s" % (playlist_prefix, song['position'], song['name']) filename = '%s.lrc' % get_filename(title) print('Saving %s ...' % filename, end="", flush=True) with open(os.path.join(output_dir, filename), @@ -103,8 +105,8 @@ def netease_video_download(vinfo, output_dir='.', info_only=False): netease_download_common(title, url_best, output_dir=output_dir, info_only=info_only) -def netease_song_download(song, output_dir='.', info_only=False): - title = "%s. %s" % (song['position'], song['name']) +def netease_song_download(song, output_dir='.', info_only=False, playlist_prefix=""): + title = "%s%s. %s" % (playlist_prefix, song['position'], song['name']) songNet = 'p' + song['mp3Url'].split('/')[2][1:] if 'hMusic' in song and song['hMusic'] != None: diff --git a/src/you_get/extractors/qq.py b/src/you_get/extractors/qq.py index 9ca8af82..f1707527 100644 --- a/src/you_get/extractors/qq.py +++ b/src/you_get/extractors/qq.py @@ -7,22 +7,67 @@ from .qie import download as qieDownload from urllib.parse import urlparse,parse_qs def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): - api = "http://h5vv.video.qq.com/getinfo?otype=json&platform=10901&vid=%s" % vid - content = get_html(api) - output_json = json.loads(match1(content, r'QZOutputJson=(.*)')[:-1]) - url = output_json['vl']['vi'][0]['ul']['ui'][0]['url'] - fvkey = output_json['vl']['vi'][0]['fvkey'] - mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None) - if mp4: - mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4' - else: - mp4 = output_json['vl']['vi'][0]['fn'] - url = '%s/%s?vkey=%s' % ( url, mp4, fvkey ) - _, ext, size = url_info(url, faker=True) + info_api = 'http://vv.video.qq.com/getinfo?otype=json&appver=3%2E2%2E19%2E333&platform=11&defnpayver=1&vid=' + vid + info = get_html(info_api) + video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) + parts_vid = video_json['vl']['vi'][0]['vid'] + parts_ti = video_json['vl']['vi'][0]['ti'] + parts_prefix = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] + parts_formats = video_json['fl']['fi'] + # find best quality + # only looking for fhd(1080p) and shd(720p) here. + # 480p usually come with a single file, will be downloaded as fallback. + best_quality = '' + for part_format in parts_formats: + if part_format['name'] == 'fhd': + best_quality = 'fhd' + break - print_info(site_info, title, ext, size) - if not info_only: - download_urls([url], title, ext, size, output_dir=output_dir, merge=merge) + if part_format['name'] == 'shd': + best_quality = 'shd' + + for part_format in parts_formats: + if (not best_quality == '') and (not part_format['name'] == best_quality): + continue + part_format_id = part_format['id'] + part_format_sl = part_format['sl'] + if part_format_sl == 0: + part_urls= [] + total_size = 0 + try: + # For fhd(1080p), every part is about 100M and 6 minutes + # try 100 parts here limited download longest single video of 10 hours. + for part in range(1,100): + filename = vid + '.p' + str(part_format_id % 1000) + '.' + str(part) + '.mp4' + key_api = "http://vv.video.qq.com/getkey?otype=json&platform=11&format=%s&vid=%s&filename=%s" % (part_format_id, parts_vid, filename) + #print(filename) + #print(key_api) + part_info = get_html(key_api) + key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1]) + #print(key_json) + vkey = key_json['key'] + url = '%s/%s?vkey=%s' % (parts_prefix, filename, vkey) + part_urls.append(url) + _, ext, size = url_info(url, faker=True) + total_size += size + except: + pass + print_info(site_info, parts_ti, ext, total_size) + if not info_only: + download_urls(part_urls, parts_ti, ext, total_size, output_dir=output_dir, merge=merge) + else: + fvkey = output_json['vl']['vi'][0]['fvkey'] + mp4 = output_json['vl']['vi'][0]['cl'].get('ci', None) + if mp4: + mp4 = mp4[0]['keyid'].replace('.10', '.p') + '.mp4' + else: + mp4 = output_json['vl']['vi'][0]['fn'] + url = '%s/%s?vkey=%s' % ( parts_prefix, mp4, fvkey ) + _, ext, size = url_info(url, faker=True) + + print_info(site_info, title, ext, size) + if not info_only: + download_urls([url], title, ext, size, output_dir=output_dir, merge=merge) def qq_download(url, output_dir='.', merge=True, info_only=False, **kwargs): diff --git a/src/you_get/extractors/yixia.py b/src/you_get/extractors/yixia.py index ca5c4bd6..7d5ba290 100644 --- a/src/you_get/extractors/yixia.py +++ b/src/you_get/extractors/yixia.py @@ -51,11 +51,11 @@ def yixia_download(url, output_dir = '.', merge = True, info_only = False, **kwa yixia_download_by_scid = yixia_miaopai_download_by_scid site_info = "Yixia Miaopai" - if re.match(r'http://www.miaopai.com/show/channel/\w+', url): #PC + if re.match(r'http://www.miaopai.com/show/channel/.+', url): #PC scid = match1(url, r'http://www.miaopai.com/show/channel/(.+)\.htm') - elif re.match(r'http://www.miaopai.com/show/\w+', url): #PC + elif re.match(r'http://www.miaopai.com/show/.+', url): #PC scid = match1(url, r'http://www.miaopai.com/show/(.+)\.htm') - elif re.match(r'http://m.miaopai.com/show/channel/\w+', url): #Mobile + elif re.match(r'http://m.miaopai.com/show/channel/.+', url): #Mobile scid = match1(url, r'http://m.miaopai.com/show/channel/(.+)\.htm') elif 'xiaokaxiu.com' in hostname: #Xiaokaxiu diff --git a/src/you_get/extractors/youku.py b/src/you_get/extractors/youku.py index 1fb09e8c..853a75ba 100644 --- a/src/you_get/extractors/youku.py +++ b/src/you_get/extractors/youku.py @@ -314,9 +314,6 @@ class Youku(VideoExtractor): q = q ) ksegs += [i['server'] for i in json.loads(get_content(u))] - - if (parse_host(ksegs[len(ksegs)-1])[0] == "vali.cp31.ott.cibntv.net"): - ksegs.pop(len(ksegs)-1) except error.HTTPError as e: # Use fallback stream data in case of HTTP 404 log.e('[Error] ' + str(e)) diff --git a/src/you_get/extractors/youtube.py b/src/you_get/extractors/youtube.py index 0ef390ed..64af5c14 100644 --- a/src/you_get/extractors/youtube.py +++ b/src/you_get/extractors/youtube.py @@ -155,6 +155,8 @@ class YouTube(VideoExtractor): try: ytplayer_config = json.loads(re.search('ytplayer.config\s*=\s*([^\n]+?});', video_page).group(1)) self.html5player = 'https:' + ytplayer_config['assets']['js'] + # Workaround: get_video_info returns bad s. Why? + stream_list = ytplayer_config['args']['url_encoded_fmt_stream_map'].split(',') except: self.html5player = None @@ -236,7 +238,7 @@ class YouTube(VideoExtractor): start = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',') m, s = divmod(finish, 60); h, m = divmod(m, 60) finish = '{:0>2}:{:0>2}:{:06.3f}'.format(int(h), int(m), s).replace('.', ',') - content = text.firstChild.nodeValue + content = unescape_html(text.firstChild.nodeValue) srt += '%s\n' % str(seq) srt += '%s --> %s\n' % (start, finish) diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py index 1c0ba1a3..a8599e52 100644 --- a/src/you_get/processor/ffmpeg.py +++ b/src/you_get/processor/ffmpeg.py @@ -125,7 +125,7 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'): params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i'] params.append(output + '.txt') - params += ['-c', 'copy', output] + params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output] subprocess.check_call(params) os.remove(output + '.txt') @@ -212,15 +212,6 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'): if not (output_dir == '.'): output = output_dir + '/' + output - ffmpeg_params = [] - #should these exist... - if params is not None: - if len(params) > 0: - for k, v in params: - ffmpeg_params.append(k) - ffmpeg_params.append(v) - - print('Downloading streaming content with FFmpeg, press q to stop recording...') ffmpeg_params = [FFMPEG] + ['-y', '-re', '-i'] ffmpeg_params.append(files) #not the same here!!!! @@ -230,6 +221,12 @@ def ffmpeg_download_stream(files, title, ext, params={}, output_dir='.'): else: ffmpeg_params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc'] + if params is not None: + if len(params) > 0: + for k, v in params: + ffmpeg_params.append(k) + ffmpeg_params.append(v) + ffmpeg_params.append(output) print(' '.join(ffmpeg_params)) diff --git a/src/you_get/util/fs.py b/src/you_get/util/fs.py index 36e0b29d..4f415bf0 100644 --- a/src/you_get/util/fs.py +++ b/src/you_get/util/fs.py @@ -10,6 +10,7 @@ def legitimize(text, os=platform.system()): text = text.translate({ 0: None, ord('/'): '-', + ord('|'): '-', }) if os == 'Windows': @@ -20,7 +21,6 @@ def legitimize(text, os=platform.system()): ord('*'): '-', ord('?'): '-', ord('\\'): '-', - ord('|'): '-', ord('\"'): '\'', # Reserved in Windows VFAT ord('+'): '-', diff --git a/src/you_get/version.py b/src/you_get/version.py index 6d91656c..28919906 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.4.555' +__version__ = '0.4.595' diff --git a/tests/test.py b/tests/test.py index 638206af..0fa2979a 100644 --- a/tests/test.py +++ b/tests/test.py @@ -21,9 +21,6 @@ class YouGetTests(unittest.TestCase): def test_mixcloud(self): mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True) - def test_vimeo(self): - vimeo.download("http://vimeo.com/56810854", info_only=True) - def test_youtube(self): youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True) youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)