From 606e0a786e2ab631288d2f4567ed1d37334ae52e Mon Sep 17 00:00:00 2001
From: Zhiming Wang <zmwangx@gmail.com>
Date: Sun, 4 Dec 2016 19:36:17 -0500
Subject: [PATCH 1/8] [lizhi] overhaul

Lizhi extractor has stopped working. In particular, there are two major
changes:

- URL format change: no more #/ in URL paths;
- The /api/audio/{radio_id}/{audio_id} API now returns 404.

This is a rewrite based on the /api/radio_audios API.
---
 src/you_get/extractors/lizhi.py | 74 ++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 28 deletions(-)

diff --git a/src/you_get/extractors/lizhi.py b/src/you_get/extractors/lizhi.py
index 56dbf756..65988a9f 100644
--- a/src/you_get/extractors/lizhi.py
+++ b/src/you_get/extractors/lizhi.py
@@ -4,37 +4,55 @@ __all__ = ['lizhi_download']
 import json
 from ..common import *
 
-def lizhi_download_playlist(url, output_dir = '.', merge = True, info_only = False, **kwargs):
-    # like this http://www.lizhi.fm/#/31365/
-    #api desc: s->start l->length band->some radio
-    #http://www.lizhi.fm/api/radio_audios?s=0&l=100&band=31365
-    band_id = match1(url,r'#/(\d+)')
-    #try to get a considerable large l to reduce html parsing task.
-    api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band='+band_id
-    content_json = json.loads(get_content(api_url))
-    for sound in content_json:
-        title = sound["name"]
-        res_url = sound["url"]
-        songtype, ext, size = url_info(res_url,faker=True)
-        print_info(site_info, title, songtype, size)
-        if not info_only:
-            #no referer no speed!
-            download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)    
-    pass
+# radio_id: e.g. 549759 from http://www.lizhi.fm/549759/
+#
+# Returns a list of tuples (audio_id, title, url) for each episode
+# (audio) in the radio playlist. url is the direct link to the audio
+# file.
+def lizhi_extract_playlist_info(radio_id):
+    # /api/radio_audios API parameters:
+    #
+    # - s: starting episode
+    # - l: count (per page)
+    # - band: radio_id
+    #
+    # We use l=65535 for poor man's pagination (that is, no pagination
+    # at all -- hope all fits on a single page).
+    #
+    # TODO: Use /api/radio?band={radio_id} to get number of episodes
+    # (au_cnt), then handle pagination properly.
+    api_url = 'http://www.lizhi.fm/api/radio_audios?s=0&l=65535&band=%s' % radio_id
+    api_response = json.loads(get_content(api_url))
+    return [(ep['id'], ep['name'], ep['url']) for ep in api_response]
 
-def lizhi_download(url, output_dir = '.', merge = True, info_only = False, **kwargs):
-    # url like http://www.lizhi.fm/#/549759/18864883431656710
-    api_id = match1(url,r'#/(\d+/\d+)')
-    api_url = 'http://www.lizhi.fm/api/audio/'+api_id
-    content_json = json.loads(get_content(api_url))
-    title = content_json["audio"]["name"]
-    res_url = content_json["audio"]["url"]
-    songtype, ext, size = url_info(res_url,faker=True)
-    print_info(site_info, title, songtype, size)
+def lizhi_download_audio(audio_id, title, url, output_dir='.', info_only=False):
+    filetype, ext, size = url_info(url)
+    print_info(site_info, title, filetype, size)
     if not info_only:
-        #no referer no speed!
-        download_urls([res_url], title, ext, size, output_dir, merge=merge ,refer = 'http://www.lizhi.fm',faker=True)    
+        download_urls([url], title, ext, size, output_dir=output_dir)
 
+def lizhi_download_playlist(url, output_dir='.', info_only=False, **kwargs):
+    # Sample URL: http://www.lizhi.fm/549759/
+    radio_id = match1(url,r'/(\d+)')
+    if not radio_id:
+        raise NotImplementedError('%s not supported' % url)
+    for audio_id, title, url in lizhi_extract_playlist_info(radio_id):
+        lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
+
+def lizhi_download(url, output_dir='.', info_only=False, **kwargs):
+    # Sample URL: http://www.lizhi.fm/549759/18864883431656710/
+    m = re.search(r'/(?P<radio_id>\d+)/(?P<audio_id>\d+)', url)
+    if not m:
+        raise NotImplementedError('%s not supported' % url)
+    radio_id = m.group('radio_id')
+    audio_id = m.group('audio_id')
+    # Look for the audio_id among the full list of episodes
+    for aid, title, url in lizhi_extract_playlist_info(radio_id):
+        if aid == audio_id:
+            lizhi_download_audio(audio_id, title, url, output_dir=output_dir, info_only=info_only)
+            break
+    else:
+        raise NotImplementedError('Audio #%s not found in playlist #%s' % (audio_id, radio_id))
 
 site_info = "lizhi.fm"
 download = lizhi_download

From a6d3c13684cff5811e3c1c6bac93698355cc3a43 Mon Sep 17 00:00:00 2001
From: Zhiming Wang <zmwangx@gmail.com>
Date: Mon, 5 Dec 2016 23:45:28 -0500
Subject: [PATCH 2/8] [embed] add support for bilibili's embedded player

Sample embed: for http://www.bilibili.com/video/av5079467/:

  <embed
    height="415" width="544" quality="high"
    allowfullscreen="true" type="application/x-shockwave-flash"
    src="http://static.hdslb.com/miniloader.swf"
    flashvars="aid=5079467&page=1"
    pluginspage="http://www.adobe.com/shockwave/download/download.cgi?P1_Prod_Version=ShockwaveFlash"
  ></embed>
---
 src/you_get/extractors/embed.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/you_get/extractors/embed.py b/src/you_get/extractors/embed.py
index fc4015c4..3bdb924c 100644
--- a/src/you_get/extractors/embed.py
+++ b/src/you_get/extractors/embed.py
@@ -2,6 +2,7 @@ __all__ = ['embed_download']
 
 from ..common import *
 
+from .bilibili import bilibili_download
 from .iqiyi import iqiyi_download_by_vid
 from .le import letvcloud_download_by_vu
 from .netease import netease_download
@@ -42,6 +43,11 @@ netease_embed_patterns = [ '(http://\w+\.163\.com/movie/[^\'"]+)' ]
 
 vimeo_embed_patters = [ 'player\.vimeo\.com/video/(\d+)' ]
 
+"""
+check the share button on http://www.bilibili.com/video/av5079467/
+"""
+bilibili_embed_patterns = [ 'static\.hdslb\.com/miniloader\.swf.*aid=(\d+)' ]
+
 
 def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwargs):
     content = get_content(url, headers=fake_headers)
@@ -78,6 +84,12 @@ def embed_download(url, output_dir = '.', merge = True, info_only = False ,**kwa
         found = True
         vimeo_download_by_id(url, title=title, output_dir=output_dir, merge=merge, info_only=info_only)
 
+    aids = matchall(content, bilibili_embed_patterns)
+    for aid in aids:
+        found = True
+        url = 'http://www.bilibili.com/video/av%s/' % aid
+        bilibili_download(url, output_dir=output_dir, merge=merge, info_only=info_only)
+
     if not found:
         raise NotImplementedError(url)
 

From 9905620b5297483e5e10195aad90a14be1d360fd Mon Sep 17 00:00:00 2001
From: Valdemar Erk <valdemarerk@gmail.com>
Date: Fri, 16 Dec 2016 09:36:29 +0100
Subject: [PATCH 3/8] Fix for magisto

---
 src/you_get/extractors/magisto.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/you_get/extractors/magisto.py b/src/you_get/extractors/magisto.py
index 2a53be02..b2e8e502 100644
--- a/src/you_get/extractors/magisto.py
+++ b/src/you_get/extractors/magisto.py
@@ -3,15 +3,19 @@
 __all__ = ['magisto_download']
 
 from ..common import *
+import json
 
 def magisto_download(url, output_dir='.', merge=True, info_only=False, **kwargs):
     html = get_html(url)
-
-    title1 = r1(r'<meta name="twitter:title" content="([^"]*)"', html)
-    title2 = r1(r'<meta name="twitter:description" content="([^"]*)"', html)
-    video_hash = r1(r'http://www.magisto.com/video/([^/]+)', url)
-    title = "%s %s - %s" % (title1, title2, video_hash)
-    url = r1(r'<source type="[^"]+" src="([^"]*)"', html)
+    
+    video_hash = r1(r'video\/([a-zA-Z0-9]+)', url)
+    api_url = 'https://www.magisto.com/api/video/{}'.format(video_hash)
+    content = get_html(api_url)
+    data = json.loads(content)
+    title1 = data['title']
+    title2 = data['creator']
+    title = "%s - %s" % (title1, title2)
+    url = data['video_direct_url']
     type, ext, size = url_info(url)
 
     print_info(site_info, title, type, size)

From af4db738a2f2e9e23ef192145a0ece286f1a4c67 Mon Sep 17 00:00:00 2001
From: Mort Yao <soi@mort.ninja>
Date: Sat, 24 Dec 2016 15:49:47 +0100
Subject: [PATCH 4/8] [test] remove mixcloud

---
 tests/test.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test.py b/tests/test.py
index 0fa2979a..020455b0 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -18,9 +18,6 @@ class YouGetTests(unittest.TestCase):
     def test_magisto(self):
         magisto.download("http://www.magisto.com/album/video/f3x9AAQORAkfDnIFDA", info_only=True)
 
-    def test_mixcloud(self):
-        mixcloud.download("http://www.mixcloud.com/DJVadim/north-america-are-you-ready/", info_only=True)
-
     def test_youtube(self):
         youtube.download("http://www.youtube.com/watch?v=pzKerr0JIPA", info_only=True)
         youtube.download("http://youtu.be/pzKerr0JIPA", info_only=True)

From b493af9a69878544ddc6a1fdb71ca61b48bd57ab Mon Sep 17 00:00:00 2001
From: Zhiming Wang <zmwangx@gmail.com>
Date: Thu, 15 Dec 2016 23:37:35 -0500
Subject: [PATCH 5/8] [ffmpeg] fix concat list when output dir is not pwd

Relative paths in the concat list are considered relative to the parent
directory of the script, not the calling directory. This isn't entirely
obvious from the documentation, but it is easy to infer from the concat
demuxer's concept of "safety", and easy to test (confirmed on FFmpeg
3.2.2). See https://ffmpeg.org/ffmpeg-all.html#concat-1 for details.

This commit fixes the wrong relative paths when --output-dir is
specified and not pwd.

This commit also
- Factors out common concat list writer code;
- Slightly simplifies the code to collect FFmpeg params (on Py35+ we can
  further simplify by unpacking LOGLEVEL with the star operator right in
  the list literal).
---
 src/you_get/processor/ffmpeg.py | 56 ++++++++++++++-------------------
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py
index a8599e52..433aff3f 100644
--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@@ -26,6 +26,18 @@ LOGLEVEL = ['-loglevel', 'quiet']
 def has_ffmpeg_installed():
     return FFMPEG is not None
 
+# Given a list of segments and the output path, generates the concat
+# list and returns the path to the concat list.
+def generate_concat_list(files, output):
+    concat_list_path = output + '.txt'
+    concat_list_dir = os.path.dirname(concat_list_path)
+    with open(concat_list_path, 'w', encoding='utf-8') as concat_list:
+        for file in files:
+            if os.path.isfile(file):
+                relpath = os.path.relpath(file, start=concat_list_dir)
+                concat_list.write('file %s\n' % parameterize(relpath))
+    return concat_list_path
+
 def ffmpeg_concat_av(files, output, ext):
     print('Merging video parts... ', end="", flush=True)
     params = [FFMPEG] + LOGLEVEL
@@ -52,17 +64,9 @@ def ffmpeg_convert_ts_to_mkv(files, output='output.mkv'):
 def ffmpeg_concat_mp4_to_mpg(files, output='output.mpg'):
     # Use concat demuxer on FFmpeg >= 1.1
     if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
-        concat_list = open(output + '.txt', 'w', encoding="utf-8")
-        for file in files:
-            if os.path.isfile(file):
-                concat_list.write("file %s\n" % parameterize(file))
-        concat_list.close()
-
-        params = [FFMPEG] + LOGLEVEL
-        params.extend(['-f', 'concat', '-safe', '-1', '-y', '-i'])
-        params.append(output + '.txt')
-        params += ['-c', 'copy', output]
-
+        concat_list = generate_concat_list(files, output)
+        params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
+                                        '-i', concat_list, '-c', 'copy', output]
         if subprocess.call(params) == 0:
             os.remove(output + '.txt')
             return True
@@ -115,18 +119,10 @@ def ffmpeg_concat_flv_to_mp4(files, output='output.mp4'):
     print('Merging video parts... ', end="", flush=True)
     # Use concat demuxer on FFmpeg >= 1.1
     if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
-        concat_list = open(output + '.txt', 'w', encoding="utf-8")
-        for file in files:
-            if os.path.isfile(file):
-                # for escaping rules, see:
-                # https://www.ffmpeg.org/ffmpeg-utils.html#Quoting-and-escaping
-                concat_list.write("file %s\n" % parameterize(file))
-        concat_list.close()
-
-        params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
-        params.append(output + '.txt')
-        params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
-
+        concat_list = generate_concat_list(files, output)
+        params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
+                                        '-i', concat_list, '-c', 'copy',
+                                        '-bsf:a', 'aac_adtstoasc', output]
         subprocess.check_call(params)
         os.remove(output + '.txt')
         return True
@@ -162,16 +158,10 @@ def ffmpeg_concat_mp4_to_mp4(files, output='output.mp4'):
     print('Merging video parts... ', end="", flush=True)
     # Use concat demuxer on FFmpeg >= 1.1
     if FFMPEG == 'ffmpeg' and (FFMPEG_VERSION[0] >= 2 or (FFMPEG_VERSION[0] == 1 and FFMPEG_VERSION[1] >= 1)):
-        concat_list = open(output + '.txt', 'w', encoding="utf-8")
-        for file in files:
-            if os.path.isfile(file):
-                concat_list.write("file %s\n" % parameterize(file))
-        concat_list.close()
-
-        params = [FFMPEG] + LOGLEVEL + ['-f', 'concat', '-safe', '-1', '-y', '-i']
-        params.append(output + '.txt')
-        params += ['-c', 'copy', '-bsf:a', 'aac_adtstoasc', output]
-
+        concat_list = generate_concat_list(files, output)
+        params = [FFMPEG] + LOGLEVEL + ['-y', '-f', 'concat', '-safe', '-1',
+                                        '-i', concat_list, '-c', 'copy',
+                                        '-bsf:a', 'aac_adtstoasc', output]
         subprocess.check_call(params)
         os.remove(output + '.txt')
         return True

From f7b6f6b40f97813206252f9c41dbe05bda592918 Mon Sep 17 00:00:00 2001
From: Zhiming Wang <zmwangx@gmail.com>
Date: Sun, 25 Dec 2016 13:48:00 -0500
Subject: [PATCH 6/8] ffmpeg: set loglevel to info in debug mode

Occasionally, the FFmpeg invocation fails (which could be due to bugs in
you-get; see #1558 for instance), but -loglevel quiet means nothing is
printed other than the exit status (pretty much always 1) in Python's
traceback, which is not helpful at all.

This commit restores FFmpeg's regular output (-loglevel info) when
--debug is specified. We're not using verbose, debug or trace because
those levels are mostly only useful for debugging FFmpeg itself, which
is not our goal.

Due to lack of meaningful API to access the global logging level, this
is a hack based on two assumptions:

1. When --debug is enabled, the root logger level is set to DEBUG;
2. processor.ffmpeg is lazily imported, after command line options are
   parsed.
---
 src/you_get/processor/ffmpeg.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
 mode change 100644 => 100755 src/you_get/processor/ffmpeg.py

diff --git a/src/you_get/processor/ffmpeg.py b/src/you_get/processor/ffmpeg.py
old mode 100644
new mode 100755
index a8599e52..f5b3cd38
--- a/src/you_get/processor/ffmpeg.py
+++ b/src/you_get/processor/ffmpeg.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+import logging
 import os.path
 import subprocess
 from ..util.strings import parameterize
@@ -21,7 +22,10 @@ def get_usable_ffmpeg(cmd):
         return None
 
 FFMPEG, FFMPEG_VERSION = get_usable_ffmpeg('ffmpeg') or get_usable_ffmpeg('avconv') or (None, None)
-LOGLEVEL = ['-loglevel', 'quiet']
+if logging.getLogger().isEnabledFor(logging.DEBUG):
+    LOGLEVEL = ['-loglevel', 'info']
+else:
+    LOGLEVEL = ['-loglevel', 'quiet']
 
 def has_ffmpeg_installed():
     return FFMPEG is not None

From 927a1cb91f854cb5260f67b15d9811f763955407 Mon Sep 17 00:00:00 2001
From: liujianshan <liujianshan@aipai.com>
Date: Thu, 29 Dec 2016 19:47:53 +0800
Subject: [PATCH 7/8] Fix soku.com vid download error problem

---
 src/you_get/extractor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/you_get/extractor.py b/src/you_get/extractor.py
index 594b908e..332440dd 100644
--- a/src/you_get/extractor.py
+++ b/src/you_get/extractor.py
@@ -206,7 +206,7 @@ class VideoExtractor():
                           output_dir=kwargs['output_dir'],
                           merge=kwargs['merge'],
                           av=stream_id in self.dash_streams)
-            if not kwargs['caption']:
+            if 'caption' not in kwargs or not kwargs['caption']:
                 print('Skipping captions.')
                 return
             for lang in self.caption_tracks:

From 76399e8561c421ead7a590ef857a98eccb16af61 Mon Sep 17 00:00:00 2001
From: ChenYuan <github@zju.email>
Date: Sun, 1 Jan 2017 00:44:56 +0800
Subject: [PATCH 8/8] fix bilibili bangumi

modify the regex to get eposide id
---
 src/you_get/extractors/bilibili.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py
index 122dea0b..aecb072c 100644
--- a/src/you_get/extractors/bilibili.py
+++ b/src/you_get/extractors/bilibili.py
@@ -127,7 +127,7 @@ def bilibili_download(url, output_dir='.', merge=True, info_only=False, **kwargs
 
     if re.match(r'https?://bangumi\.bilibili\.com/', url):
         # quick hack for bangumi URLs
-        episode_id = r1(r'data-current-episode-id="(\d+)"', html)
+        episode_id = r1(r'first_ep_id = "(\d+)"', html)
         cont = post_content('http://bangumi.bilibili.com/web_api/get_source',
                             post_data={'episode_id': episode_id})
         cid = json.loads(cont)['result']['cid']