From 81d153e4b8931857d0e1d610d8b031c4b90a4aa0 Mon Sep 17 00:00:00 2001 From: cnbeining Date: Wed, 21 Oct 2015 00:13:17 -0400 Subject: [PATCH 1/7] [miomio] quick fix #716 --- src/you_get/extractors/miomio.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/you_get/extractors/miomio.py b/src/you_get/extractors/miomio.py index cc943b64..cafaf549 100644 --- a/src/you_get/extractors/miomio.py +++ b/src/you_get/extractors/miomio.py @@ -4,9 +4,9 @@ __all__ = ['miomio_download'] from ..common import * -from .sina import sina_download_by_xml from .tudou import tudou_download_by_id from .youku import youku_download_by_vid +from xml.dom.minidom import parseString def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kwargs): html = get_html(url) @@ -20,13 +20,35 @@ def miomio_download(url, output_dir = '.', merge = True, info_only = False, **kw youku_download_by_vid(id, title=title, output_dir=output_dir, merge=merge, info_only=info_only) elif t == 'tudou': tudou_download_by_id(id, title, output_dir=output_dir, merge=merge, info_only=info_only) - elif t == 'sina' or t=='video': + elif t == 'sina' or t == 'video': url = "http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?vid=" + id - xml = get_content (url, headers=fake_headers, decoded=True) - sina_download_by_xml(xml, title, output_dir=output_dir, merge=merge, info_only=info_only) + xml_data = get_content(url, headers=fake_headers, decoded=True) + url_list = sina_xml_to_url_list(xml_data) + + size_full = 0 + for url in url_list: + type_, ext, size = url_info(url) + size_full += size + + print_info(site_info, title, type_, size_full) + if not info_only: + download_urls([url], title, ext, total_size=None, output_dir=output_dir, merge=merge) else: raise NotImplementedError(flashvars) +#---------------------------------------------------------------------- +def sina_xml_to_url_list(xml_data): + """str->list + Convert XML to URL List. + From Biligrab. + """ + rawurl = [] + dom = parseString(xml_data) + for node in dom.getElementsByTagName('durl'): + url = node.getElementsByTagName('url')[0] + rawurl.append(url.childNodes[0].data) + return rawurl + site_info = "MioMio.tv" download = miomio_download download_playlist = playlist_not_supported('miomio') From ee917b3920aca1bcfed3fe38bf148336001eb3c3 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 13:35:25 +0200 Subject: [PATCH 2/7] [embed] add more patterns for Tudou - Example link: http://tieba.baidu.com/p/4114753102 --- src/you_get/extractors/embed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index c4f47411..37c8b106 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -21,7 +21,8 @@ youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)', """ http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99 """ -tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&' +tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&', + 'www\.tudou\.com/v/([[a-zA-Z0-9_]+)/v\.swf' ] """ From f8b00642861cd9ba076b51d326d52579ddd8bbd5 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 15:00:46 +0200 Subject: [PATCH 3/7] [common] add google_search() --- src/you_get/common.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/you_get/common.py b/src/you_get/common.py index a5277b40..7df48c43 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -1125,10 +1125,29 @@ def script_main(script_name, download, download_playlist = None): else: sys.exit(1) +def google_search(url): + keywords = r1(r'https?://(.*)', url) + url = 'https://www.google.com/search?tbm=vid&q=%s' % parse.quote(keywords) + page = get_content(url, headers=fake_headers) + videos = re.findall(r'([^<]+)<', page) + durs = re.findall(r'[^<]+(\d+:\d+)', page) + print("Google Videos search:") + for v in zip(videos, durs): + print("- video: %s [%s]" % (unescape_html(v[0][1]), v[1])) + print("# you-get %s" % log.sprint(v[0][0], log.UNDERLINE)) + print() + print("Best matched result:") + return(videos[0][0]) + def url_to_module(url): - video_host = r1(r'https?://([^/]+)/', url) - video_url = r1(r'https?://[^/]+(.*)', url) - assert video_host and video_url, 'invalid url: ' + url + try: + video_host = r1(r'https?://([^/]+)/', url) + video_url = r1(r'https?://[^/]+(.*)', url) + assert video_host and video_url + except: + url = google_search(url) + video_host = r1(r'https?://([^/]+)/', url) + video_url = r1(r'https?://[^/]+(.*)', url) if video_host.endswith('.com.cn'): video_host = video_host[:-3] From 117268999278ddfbdc3350feca13ef17a32d37dd Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 16:45:06 +0200 Subject: [PATCH 4/7] [util.log] update --- src/you_get/util/log.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/you_get/util/log.py b/src/you_get/util/log.py index 3a391093..5c8504f5 100644 --- a/src/you_get/util/log.py +++ b/src/you_get/util/log.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # This file is Python 2 compliant. -from .. import __name__ as library_name +from ..version import script_name import os, sys @@ -10,7 +10,8 @@ IS_ANSI_TERMINAL = os.getenv('TERM') in ( 'linux', 'screen', 'vt100', - 'xterm') + 'xterm', +) # ANSI escape code # See @@ -70,7 +71,7 @@ def print_err(text, *colors): def print_log(text, *colors): """Print a log message to standard error.""" - sys.stderr.write(sprint("{}: {}".format(library_name, text), *colors) + "\n") + sys.stderr.write(sprint("{}: {}".format(script_name, text), *colors) + "\n") def i(message): """Print a normal log message.""" From b53a2e66760abc26828e6b5b567604187bd10de7 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 16:45:51 +0200 Subject: [PATCH 5/7] [you-get] update --- you-get | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/you-get b/you-get index f04cbc0d..e7645b34 100755 --- a/you-get +++ b/you-get @@ -1,6 +1,4 @@ #!/usr/bin/env python -# This file is Python 2 compliant. - import os, sys _srcdir = 'src/' @@ -17,8 +15,7 @@ if sys.version_info[0] == 3: import you_get if __name__ == '__main__': you_get.main(repo_path=_filepath) -else: +else: # Python 2 from you_get.util import log - log.wtf(""" - [Fatal] Python 3 is required. - If Python 3 is already installed on your machine, try to run this script using 'python3 you-get'.""") + log.e("[fatal] Python 3 is required!") + log.wtf("try to run this script using 'python3 you-get'.") From d60eafeacb079c85168f029dc874f9d53710e17f Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 17:01:31 +0200 Subject: [PATCH 6/7] redefine version (0.4.x) --- src/you_get/__main__.py | 3 ++- src/you_get/common.py | 16 ++++++++++------ src/you_get/util/git.py | 20 ++++++++++++++++++++ src/you_get/version.py | 2 +- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/you_get/__main__.py b/src/you_get/__main__.py index 2847d8f7..b7ec6f04 100644 --- a/src/you_get/__main__.py +++ b/src/you_get/__main__.py @@ -20,6 +20,7 @@ _help = """Usage: {} [OPTION]... [URL]... TODO """.format(script_name) +# TBD def main_dev(**kwargs): """Main entry point. you-get-dev @@ -88,7 +89,7 @@ def main(**kwargs): you-get (legacy) """ from .common import main - main() + main(**kwargs) if __name__ == '__main__': main() diff --git a/src/you_get/common.py b/src/you_get/common.py index 7df48c43..0f5080eb 100755 --- a/src/you_get/common.py +++ b/src/you_get/common.py @@ -91,6 +91,7 @@ from importlib import import_module from .version import __version__ from .util import log, term +from .util.git import get_version from .util.strings import get_filename, unescape_html from . import json_output as json_output_ @@ -981,8 +982,11 @@ def download_main(download, download_playlist, urls, playlist, **kwargs): else: download(url, **kwargs) -def script_main(script_name, download, download_playlist = None): - version = 'You-Get %s, a video downloader.' % __version__ +def script_main(script_name, download, download_playlist, **kwargs): + def version(): + log.i('version %s' % get_version(kwargs['repo_path'] + if 'repo_path' in kwargs else __version__)) + help = 'Usage: %s [OPTION]... [URL]...\n' % script_name help += '''\nStartup options: -V | --version Display the version and exit. @@ -1035,10 +1039,10 @@ def script_main(script_name, download, download_playlist = None): traceback = False for o, a in opts: if o in ('-V', '--version'): - print(version) + version() sys.exit() elif o in ('-h', '--help'): - print(version) + version() print(help) sys.exit() elif o in ('-f', '--force'): @@ -1176,5 +1180,5 @@ def any_download_playlist(url, **kwargs): m, url = url_to_module(url) m.download_playlist(url, **kwargs) -def main(): - script_main('you-get', any_download, any_download_playlist) +def main(**kwargs): + script_main('you-get', any_download, any_download_playlist, **kwargs) diff --git a/src/you_get/util/git.py b/src/you_get/util/git.py index 6891709e..9e4a1001 100644 --- a/src/you_get/util/git.py +++ b/src/you_get/util/git.py @@ -1,6 +1,8 @@ #!/usr/bin/env python import os +import subprocess +from ..version import __version__ def get_head(repo_path): """Get (branch, commit) from HEAD of a git repo.""" @@ -11,3 +13,21 @@ def get_head(repo_path): return branch, commit except: return None + +def get_version(repo_path): + try: + version = __version__.split('.') + major, minor = version[0], version[1] + + p = subprocess.Popen(['git', 'rev-list', 'HEAD', '--count'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + raw, err = p.communicate() + c_head = int(raw.decode('ascii')) + q = subprocess.Popen(['git', 'rev-list', 'master', '--count'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + raw, err = q.communicate() + c_master = int(raw.decode('ascii')) + cc = c_head - c_master + return '%s.%s.%s' % (major, minor, cc) + except: + return __version__ diff --git a/src/you_get/version.py b/src/you_get/version.py index fe141a99..355ac932 100644 --- a/src/you_get/version.py +++ b/src/you_get/version.py @@ -1,4 +1,4 @@ #!/usr/bin/env python script_name = 'you-get' -__version__ = '0.3.36' +__version__ = '0.4.0' From fad3fa81808a93d18f72686da3d9f43ed72ba2c1 Mon Sep 17 00:00:00 2001 From: Mort Yao Date: Wed, 21 Oct 2015 17:40:56 +0200 Subject: [PATCH 7/7] [embed] add more patterns for Tudou - Example link: http://tieba.baidu.com/shipin/bw/video/play?kw=akb48&v_id=a35619448853a42b942231e1 --- src/you_get/extractors/embed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py index 37c8b106..491917f4 100644 --- a/src/you_get/extractors/embed.py +++ b/src/you_get/extractors/embed.py @@ -22,7 +22,7 @@ youku_embed_patterns = [ 'youku\.com/v_show/id_([a-zA-Z0-9=]+)', http://www.tudou.com/programs/view/html5embed.action?type=0&code=3LS_URGvl54&lcode=&resourceId=0_06_05_99 """ tudou_embed_patterns = [ 'tudou\.com[a-zA-Z0-9\/\?=\&\.\;]+code=([[a-zA-Z0-9_]+)\&', - 'www\.tudou\.com/v/([[a-zA-Z0-9_]+)/v\.swf' + 'www\.tudou\.com/v/([[a-zA-Z0-9_]+)/[^"]*v\.swf' ] """