mirror of
https://github.com/soimort/you-get.git
synced 2025-03-13 03:17:44 +03:00
[youku] implement oset(xs), which removes duplicates from a list and keeps the original order
This commit is contained in:
parent
da4dcc9b85
commit
facee031a8
@ -66,6 +66,14 @@ class Youku(VideoExtractor):
|
|||||||
def parse_m3u8(m3u8):
|
def parse_m3u8(m3u8):
|
||||||
return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8)
|
return re.findall(r'(http://[^?]+)\?ts_start=0', m3u8)
|
||||||
|
|
||||||
|
def oset(xs):
|
||||||
|
"""Turns a list into an ordered set. (removes duplicates)"""
|
||||||
|
mem = set()
|
||||||
|
for x in xs:
|
||||||
|
if x not in mem:
|
||||||
|
mem.add(x)
|
||||||
|
yield(x)
|
||||||
|
|
||||||
def get_vid_from_url(url):
|
def get_vid_from_url(url):
|
||||||
"""Extracts video ID from URL.
|
"""Extracts video ID from URL.
|
||||||
"""
|
"""
|
||||||
@ -87,12 +95,12 @@ class Youku(VideoExtractor):
|
|||||||
assert playlist_id
|
assert playlist_id
|
||||||
|
|
||||||
video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id)
|
video_page = get_content('http://www.youku.com/playlist_show/id_%s' % playlist_id)
|
||||||
videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page))
|
videos = Youku.oset(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page))
|
||||||
|
|
||||||
# Parse multi-page playlists
|
# Parse multi-page playlists
|
||||||
for extra_page_url in set(re.findall('href="(http://www\.youku\.com/playlist_show/id_%s_[^?"]+)' % playlist_id, video_page)):
|
for extra_page_url in Youku.oset(re.findall('href="(http://www\.youku\.com/playlist_show/id_%s_[^?"]+)' % playlist_id, video_page)):
|
||||||
extra_page = get_content(extra_page_url)
|
extra_page = get_content(extra_page_url)
|
||||||
videos |= set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page))
|
videos |= Youku.oset(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page))
|
||||||
|
|
||||||
except:
|
except:
|
||||||
# Show full list of episodes
|
# Show full list of episodes
|
||||||
@ -101,7 +109,7 @@ class Youku(VideoExtractor):
|
|||||||
url = 'http://www.youku.com/show_episode/id_%s' % ep_id
|
url = 'http://www.youku.com/show_episode/id_%s' % ep_id
|
||||||
|
|
||||||
video_page = get_content(url)
|
video_page = get_content(url)
|
||||||
videos = set(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page))
|
videos = Youku.oset(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', video_page))
|
||||||
|
|
||||||
self.title = r1(r'<meta name="title" content="([^"]+)"', video_page) or \
|
self.title = r1(r'<meta name="title" content="([^"]+)"', video_page) or \
|
||||||
r1(r'<title>([^<]+)', video_page)
|
r1(r'<title>([^<]+)', video_page)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user