avoid unnecessary downloading page 1

This commit is contained in:
Chuntao Hong 2016-07-20 12:09:20 +08:00
parent b95b1a10ee
commit cb2878b8cf

View File

@ -104,12 +104,11 @@ class Youku(VideoExtractor):
num_pages = int(re.findall(r'page=([0-9]+)\.htm', last_page_url)[0]) num_pages = int(re.findall(r'page=([0-9]+)\.htm', last_page_url)[0])
if (num_pages > 0): if (num_pages > 0):
# download one by one # download one by one
for pn in range(1, num_pages + 1): for pn in range(2, num_pages + 1):
extra_page_url = re.sub(r'page=([0-9]+)\.htm', r'page=%s.htm' % pn, last_page_url) extra_page_url = re.sub(r'page=([0-9]+)\.htm', r'page=%s.htm' % pn, last_page_url)
extra_page = get_content('http://list.youku.com' + extra_page_url) extra_page = get_content('http://list.youku.com' + extra_page_url)
videos |= Youku.oset(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page)) videos |= Youku.oset(re.findall(r'href="(http://v\.youku\.com/[^?"]+)', extra_page))
except Exception as e: except:
print(e)
# Show full list of episodes # Show full list of episodes
if match1(url, r'youku\.com/show_page/id_([a-zA-Z0-9=]+)'): if match1(url, r'youku\.com/show_page/id_([a-zA-Z0-9=]+)'):
ep_id = match1(url, r'youku\.com/show_page/id_([a-zA-Z0-9=]+)') ep_id = match1(url, r'youku\.com/show_page/id_([a-zA-Z0-9=]+)')