diff --git a/README.md b/README.md
index a4f4fcd9..60cb125a 100644
--- a/README.md
+++ b/README.md
@@ -360,6 +360,7 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| PPTV聚力 | |✓| | |
| 齐鲁网 | |✓| | |
| QQ
腾讯视频 | |✓| | |
+| 企鹅直播 | |✓| | |
| 阡陌视频 | |✓| | |
| THVideo | |✓| | |
| Sina
新浪视频
微博秒拍视频 |
|✓| | |
@@ -373,6 +374,8 @@ Use `--url`/`-u` to get a list of downloadable resource URLs extracted from the
| 战旗TV | |✓| | |
| 央视网 | |✓| | |
| 花瓣 | | |✓| |
+| Naver
네이버 | |✓| | |
+| 芒果TV | |✓| | |
For all other sites not on the list, the universal extractor will take care of finding and downloading interesting resources from the page.
diff --git a/src/you_get/common.py b/src/you_get/common.py
index 6c65bd49..100f3869 100755
--- a/src/you_get/common.py
+++ b/src/you_get/common.py
@@ -52,6 +52,7 @@ SITES = {
'mixcloud' : 'mixcloud',
'mtv81' : 'mtv81',
'musicplayon' : 'musicplayon',
+ 'naver' : 'naver',
'7gogo' : 'nanagogo',
'nicovideo' : 'nicovideo',
'panda' : 'panda',
@@ -97,6 +98,7 @@ import logging
import os
import platform
import re
+import socket
import sys
import time
from urllib import request, parse, error
@@ -307,7 +309,14 @@ def get_content(url, headers={}, decoded=True):
if cookies:
cookies.add_cookie_header(req)
req.headers.update(req.unredirected_hdrs)
- response = request.urlopen(req)
+
+ for i in range(10):
+ try:
+ response = request.urlopen(req)
+ break
+ except socket.timeout:
+ logging.debug('request attempt %s timeout' % str(i + 1))
+
data = response.read()
# Handle HTTP compression for gzip and deflate (zlib)
@@ -1062,11 +1071,12 @@ def script_main(script_name, download, download_playlist, **kwargs):
-x | --http-proxy Use an HTTP proxy for downloading.
-y | --extractor-proxy Use an HTTP proxy for extracting only.
--no-proxy Never use a proxy.
+ -t | --timeout Set socket timeout.
-d | --debug Show traceback and other debug info.
'''
- short_opts = 'Vhfiuc:ndF:O:o:p:x:y:'
- opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=']
+ short_opts = 'Vhfiuc:ndF:O:o:p:x:y:t:'
+ opts = ['version', 'help', 'force', 'info', 'url', 'cookies', 'no-caption', 'no-merge', 'no-proxy', 'debug', 'json', 'format=', 'stream=', 'itag=', 'output-filename=', 'output-dir=', 'player=', 'http-proxy=', 'extractor-proxy=', 'lang=', 'timeout=']
if download_playlist:
short_opts = 'l' + short_opts
opts = ['playlist'] + opts
@@ -1096,6 +1106,7 @@ def script_main(script_name, download, download_playlist, **kwargs):
proxy = None
extractor_proxy = None
traceback = False
+ timeout = 600
for o, a in opts:
if o in ('-V', '--version'):
version()
@@ -1169,6 +1180,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
extractor_proxy = a
elif o in ('--lang',):
lang = a
+ elif o in ('-t', '--timeout'):
+ timeout = int(a)
else:
log.e("try 'you-get --help' for more options")
sys.exit(2)
@@ -1178,6 +1191,8 @@ def script_main(script_name, download, download_playlist, **kwargs):
set_http_proxy(proxy)
+ socket.setdefaulttimeout(timeout)
+
try:
if stream_id:
if not extractor_proxy:
diff --git a/src/you_get/extractors/__init__.py b/src/you_get/extractors/__init__.py
index 1bb7a7ab..20a7f7cf 100755
--- a/src/you_get/extractors/__init__.py
+++ b/src/you_get/extractors/__init__.py
@@ -45,6 +45,7 @@ from .mixcloud import *
from .mtv81 import *
from .musicplayon import *
from .nanagogo import *
+from .naver import *
from .netease import *
from .nicovideo import *
from .panda import *
@@ -52,6 +53,7 @@ from .pinterest import *
from .pixnet import *
from .pptv import *
from .qianmo import *
+from .qie import *
from .qq import *
from .sina import *
from .sohu import *
diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py
index fd463c92..a177e663 100644
--- a/src/you_get/extractors/embed.py
+++ b/src/you_get/extractors/embed.py
@@ -8,6 +8,7 @@ from .netease import netease_download
from .qq import qq_download_by_vid
from .sina import sina_download_by_vid
from .tudou import tudou_download_by_id
+from .vimeo import vimeo_download_by_id
from .yinyuetai import yinyuetai_download_by_id
from .youku import youku_download_by_vid
@@ -39,6 +40,9 @@ iqiyi_embed_patterns = [ 'player\.video\.qiyi\.com/([^/]+)/[^/]+/[^/]+/[^/]+\.sw
netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
+vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
+
+
def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
content = get_content(url, headers=fake_headers)
found = False
@@ -69,6 +73,11 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
found = True
netease_download(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+ urls = matchall(content, vimeo_embed_patters)
+ for url in urls:
+ found = True
+ vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
+
if not found:
raise NotImplementedError(url)
diff --git a/src/you_get/extractors/ku6.py b/src/you_get/extractors/ku6.py
index d9a1ef12..7f28c75b 100644
--- a/src/you_get/extractors/ku6.py
+++ b/src/you_get/extractors/ku6.py
@@ -27,13 +27,30 @@ def ku6_download_by_id(id, title = None, output_dir = '.', merge = True, info_on
download_urls(urls, title, ext, size, output_dir, merge = merge)
def ku6_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
- patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
- r'http://v.ku6.com/show/(.*)\.\.\.html',
- r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
- id = r1_of(patterns, url)
+ id = None
+
+ if match1(url, r'http://baidu.ku6.com/watch/(.*)\.html') is not None:
+ id = baidu_ku6(url)
+ else:
+ patterns = [r'http://v.ku6.com/special/show_\d+/(.*)\.\.\.html',
+ r'http://v.ku6.com/show/(.*)\.\.\.html',
+ r'http://my.ku6.com/watch\?.*v=(.*)\.\..*']
+ id = r1_of(patterns, url)
ku6_download_by_id(id, output_dir = output_dir, merge = merge, info_only = info_only)
+def baidu_ku6(url):
+ id = None
+
+ h1 = get_html(url)
+ isrc = match1(h1, r'
')
title = title.strip() if title else vid
+ elif 'live.qq.com' in url:
+ qieDownload(url,output_dir=output_dir, merge=merge, info_only=info_only)
+ exit()
elif 'iframe/player.html' in url:
vid = match1(url, r'\bvid=(\w+)')
# for embedded URLs; don't know what the title is
diff --git a/src/you_get/extractors/twitter.py b/src/you_get/extractors/twitter.py
index 251cb8d5..09956141 100644
--- a/src/you_get/extractors/twitter.py
+++ b/src/you_get/extractors/twitter.py
@@ -5,6 +5,13 @@ __all__ = ['twitter_download']
from ..common import *
from .vine import vine_download
+def extract_m3u(source):
+ r1 = get_content(source)
+ s1 = re.findall(r'(/ext_tw_video/.*)', r1)
+ r2 = get_content('https://video.twimg.com%s' % s1[-1])
+ s2 = re.findall(r'(/ext_tw_video/.*)', r2)
+ return ['https://video.twimg.com%s' % i for i in s2]
+
def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
html = get_html(url)
screen_name = r1(r'data-screen-name="([^"]*)"', html) or \
@@ -62,12 +69,20 @@ def twitter_download(url, output_dir='.', merge=True, info_only=False, **kwargs)
vmap = get_content(vmap_url)
source = r1(r'\s*', vmap)
if not item_id: page_title = i['tweet_id']
+ elif 'scribe_playlist_url' in i:
+ scribe_playlist_url = i['scribe_playlist_url']
+ return vine_download(scribe_playlist_url, output_dir, merge=merge, info_only=info_only)
- mime, ext, size = url_info(source)
+ if source.endswith('.mp4'):
+ urls = [source]
+ else:
+ urls = extract_m3u(source)
+ size = urls_size(urls)
+ mime, ext = 'video/mp4', 'mp4'
print_info(site_info, page_title, mime, size)
if not info_only:
- download_urls([source], page_title, ext, size, output_dir, merge=merge)
+ download_urls(urls, page_title, ext, size, output_dir, merge=merge)
site_info = "Twitter.com"
download = twitter_download
diff --git a/src/you_get/extractors/videomega.py b/src/you_get/extractors/videomega.py
index 75e88cd9..34fb5205 100644
--- a/src/you_get/extractors/videomega.py
+++ b/src/you_get/extractors/videomega.py
@@ -1,47 +1,44 @@
#!/usr/bin/env python
-from ..common import *
-from ..extractor import VideoExtractor
+__all__ = ['videomega_download']
+from ..common import *
import ssl
-class Videomega(VideoExtractor):
- name = "Videomega"
+def videomega_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
+ # Hot-plug cookie handler
+ ssl_context = request.HTTPSHandler(
+ context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
+ cookie_handler = request.HTTPCookieProcessor()
+ opener = request.build_opener(ssl_context, cookie_handler)
+ opener.addheaders = [('Referer', url),
+ ('Cookie', 'noadvtday=0')]
+ request.install_opener(opener)
- stream_types = [
- {'id': 'original'}
- ]
+ if re.search(r'view\.php', url):
+ php_url = url
+ else:
+ content = get_content(url)
+ m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content)
+ ref = m.group(1)
+ width, height = m.group(2), m.group(3)
+ php_url = 'http://videomega.tv/view.php?ref=%s&width=%s&height=%s' % (ref, width, height)
+ content = get_content(php_url)
- def prepare(self, **kwargs):
- # Hot-plug cookie handler
- ssl_context = request.HTTPSHandler(
- context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
- cookie_handler = request.HTTPCookieProcessor()
- opener = request.build_opener(ssl_context, cookie_handler)
- opener.addheaders = [('Referer', self.url),
- ('Cookie', 'noadvtday=0')]
- request.install_opener(opener)
+ title = match1(content, r'(.*)')
+ js = match1(content, r'(eval.*)')
+ t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)')
+ t = re.sub(r'(\w)', r'{\1}', t)
+ t = t.translate({87 + i: str(i) for i in range(10, 36)})
+ s = match1(js, r"'([^']+)'\.split").split('|')
+ src = t.format(*s)
- ref = match1(self.url, r'ref=(\w+)')
- php_url = 'http://videomega.tv/view.php?ref=' + ref
- content = get_content(php_url)
+ type, ext, size = url_info(src, faker=True)
- self.title = match1(content, r'(.*)')
- js = match1(content, r'(eval.*)')
- t = match1(js, r'\$\("\d+"\)\.\d+\("\d+","([^"]+)"\)')
- t = re.sub(r'(\w)', r'{\1}', t)
- t = t.translate({87 + i: str(i) for i in range(10, 36)})
- s = match1(js, r"'([^']+)'\.split").split('|')
- self.streams['original'] = {
- 'url': t.format(*s)
- }
+ print_info(site_info, title, type, size)
+ if not info_only:
+ download_urls([src], title, ext, size, output_dir, merge=merge, faker=True)
- def extract(self, **kwargs):
- for i in self.streams:
- s = self.streams[i]
- _, s['container'], s['size'] = url_info(s['url'])
- s['src'] = [s['url']]
-
-site = Videomega()
-download = site.download_by_url
-download_playlist = site.download_by_url
+site_info = "Videomega.tv"
+download = videomega_download
+download_playlist = playlist_not_supported('videomega')
diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py
index 320eb642..e7ee35d6 100644
--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@@ -169,7 +169,7 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
params.append(output + '.txt')
- params += ['-c', 'copy', output]
+ params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
subprocess.check_call(params)
os.remove(output + '.txt')
diff --git a/src/you_get/version.py b/src/you_get/version.py
index 0e7b6632..5a9d5581 100644
--- a/src/you_get/version.py
+++ b/src/you_get/version.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python
script_name = 'you-get'
-__version__ = '0.4.390'
+__version__ = '0.4.424'
diff --git a/you-get b/you-get
index 85f3f754..8529388f 100755
--- a/you-get
+++ b/you-get
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import os, sys
-_srcdir = 'src/'
+_srcdir = '%s/src/' % os.path.dirname(os.path.realpath(__file__))
_filepath = os.path.dirname(sys.argv[0])
sys.path.insert(1, os.path.join(_filepath, _srcdir))