mirror of
https://github.com/soimort/you-get.git
synced 2025-02-03 08:43:58 +03:00
fix playlist download
This commit is contained in:
parent
5295d1160c
commit
b95b1a10ee
@ -76,7 +76,7 @@ class Youku(VideoExtractor):
|
|||||||
for x in xs:
|
for x in xs:
|
||||||
if x not in mem:
|
if x not in mem:
|
||||||
mem.add(x)
|
mem.add(x)
|
||||||
yield(x)
|
return mem
|
||||||
|
|
||||||
def get_vid_from_url(url):
|
def get_vid_from_url(url):
|
||||||
"""Extracts video ID from URL.
|
"""Extracts video ID from URL.
|
||||||
@ -89,7 +89,7 @@ class Youku(VideoExtractor):
|
|||||||
def get_playlist_id_from_url(url):
|
def get_playlist_id_from_url(url):
|
||||||
"""Extracts playlist ID from URL.
|
"""Extracts playlist ID from URL.
|
||||||
"""
|
"""
|
||||||
return match1(url, r'youku\.com/playlist_show/id_([a-zA-Z0-9=]+)')
|
return match1(url, r'youku\.com/albumlist/show\?id=([a-zA-Z0-9=]+)')
|
||||||
|
|
||||||
def download_playlist_by_url(self, url, **kwargs):
|
def download_playlist_by_url(self, url, **kwargs):
|
||||||
self.url = url
|
self.url = url
|
||||||
@ -97,16 +97,19 @@ class Youku(VideoExtractor):
|
|||||||
try:
|
try:
|
||||||
playlist_id = self.__class__.get_playlist_id_from_url(self.url)
|
playlist_id = self.__class__.get_playlist_id_from_url(self.url)
|
||||||
assert playlist_id
|
assert playlist_id
|
||||||
|
video_page = get_content('http://list.youku.com/albumlist/show?id=%s' % playlist_id)
|
||||||
video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id)
|
|
||||||
videos = Youku.oset(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page))
|
videos = Youku.oset(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page))
|
||||||
|
|
||||||
# Parse multi-page playlists
|
# Parse multi-page playlists
|
||||||
for extra_page_url in Youku.oset(re.findall('href="(http://www\.youku\.com/playlist_show/id_%s_[^?"]+)' % playlist_id, video_page)):
|
last_page_url = re.findall(r'href="(/albumlist/show\?id=%s[^"]+)" title="末页"' % playlist_id, video_page)[0]
|
||||||
extra_page = get_content(extra_page_url)
|
num_pages = int(re.findall(r'page=([0-9]+)\.htm', last_page_url)[0])
|
||||||
|
if (num_pages > 0):
|
||||||
|
# download one by one
|
||||||
|
for pn in range(1, num_pages + 1):
|
||||||
|
extra_page_url = re.sub(r'page=([0-9]+)\.htm', r'page=%s.htm' % pn, last_page_url)
|
||||||
|
extra_page = get_content('http://list.youku.com' + extra_page_url)
|
||||||
videos |= Youku.oset(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page))
|
videos |= Youku.oset(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page))
|
||||||
|
except Exception as e:
|
||||||
except:
|
print(e)
|
||||||
# Show full list of episodes
|
# Show full list of episodes
|
||||||
if match1(url, r'youku\.com/show_page/id_([a-zA-Z0-9=]+)'):
|
if match1(url, r'youku\.com/show_page/id_([a-zA-Z0-9=]+)'):
|
||||||
ep_id = match1(url, r'youku\.com/show_page/id_([a-zA-Z0-9=]+)')
|
ep_id = match1(url, r'youku\.com/show_page/id_([a-zA-Z0-9=]+)')
|
||||||
|
Loading…
Reference in New Issue
Block a user