Add Letv analyzation.

Need to install bs4, requests: pip install bs4 pip install requests
2025-02-11 12:42:29 +03:00 · 2014-12-19 12:02:44 +08:00 · 2014-12-19 12:02:44 +08:00 · b6e9bcb4c2
commit b6e9bcb4c2
parent ca7c008c50
1 changed files with 215 additions and 0 deletions
--- a/src/you_get/extractors/letv.py
+++ b/src/you_get/extractors/letv.py
@ -4,11 +4,33 @@ __all__ = ['letv_download', 'letvcloud_download', 'letvcloud_download_by_vu']

 import json
 import random
+import xml
 import xml.etree.ElementTree as ET
 import base64, hashlib, urllib
+import os
+import sys
+import inspect
+import io
+import bs4, requests
+import time
+import urllib.request
+import urllib.parse
+
+
+currentDir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentDir = os.path.dirname(os.path.dirname(currentDir))
+se_parentDir = os.path.dirname(parentDir)
+sys.path.append(parentDir)
+sys.path.append(se_parentDir)
+
+print(currentDir)
+print(parentDir)

 from ..common import *

+USER_AGENT = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0'
+OSTYPE = 'MacOS10.10.1'
+
 def get_timestamp():
    tn = random.random()
    url = 'http://api.letv.com/time?tn={}'.format(tn)
@ -94,6 +116,7 @@ def letv_download(url, output_dir='.', merge=True, info_only=False):
    if re.match(r'http://yuntv.letv.com/', url):
        letvcloud_download(url, output_dir=output_dir, merge=merge, info_only=info_only)
    else:
+        """
        html = get_content(url)
        #to get title
        if re.match(r'http://www.letv.com/ptv/vplay/(\d+).html', url):
@ -101,8 +124,200 @@ def letv_download(url, output_dir='.', merge=True, info_only=False):
        else:
            vid = match1(html, r'vid="(\d+)"')
        title = match1(html,r'name="irTitle" content="(.*?)"')
+
        letv_download_by_vid(vid, title=title, output_dir=output_dir, merge=merge, info_only=info_only)

+        """
+
+        title, vid, nextvid = letv_get_vid2title(url)
+        letv_download_by_vid_sub(vid, nextvid, title=title, output_dir=output_dir,
+                                 merge=merge, info_only=info_only)
+
+
+def to_dict(dict_str):
+    class _dict(dict):
+        def __getitem__(self, key):
+            return key
+    return eval(dict_str, _dict())
+
+
+def ror(a, b):
+    c = 0
+    while c < b:
+        a = (0x7fffffff & (a >> 1)) + (0x80000000 & (a << 31))
+        c += 1
+    return a
+
+
+def get_tkey(tm=None):
+    l2 = 773625421
+    if not tm:
+        tm = int(time.time())
+    l3 = ror(tm, l2 % 13)
+    l3 ^= l2
+    l3 = ror(l3, l2 % 17)
+    if l3 & 0x80000000:
+        return l3 - 0x100000000
+    return l3
+
+
+def letv_get_vid2title(page_url):
+    #browser = Browser()
+    #browser.set_handle_robots(False)
+    #browser.addheaders = [('User-Agent', USER_AGENT)]
+
+    #resp = browser.open(page_url)
+    #resp_body = resp.read()
+
+    #request = urllib.request.Request(page_url)
+    #request.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0')
+    #response = urllib.request.urlopen(request)
+    #resp_body = response.read()
+
+    """
+    tree = html.fromstring(resp_body)
+    for script in tree.xpath('/html/head/script'):
+    """
+    #print(resp_body)
+
+    response = requests.get(page_url)
+    tree = bs4.BeautifulSoup(response.text)
+    for script in tree.select('head script'):
+        match_info = []
+        start = False
+        if not script.text:
+            continue
+        for line in script.text.split('\n'):
+            if not start:
+                match = re.match('var\s+__INFO__\s?=(.+)', line)
+                if match:
+                    start = True
+                    match_info.append(match.group(1))
+            else:
+                if line.startswith('var'):
+                    start = False
+                    break
+                hp = line.find('://')
+                p = line.rfind('//')
+                if p != -1 and p != hp+1:
+                    match_info.append(line[:p])
+                else:
+                    match_info.append(line)
+        if match_info:
+            break
+
+    match_info = '\n'.join(match_info)
+    match_info = to_dict(match_info)
+    vid = match_info['video']['vid']
+    nextvid = match_info['video']['nextvid']
+    #print '%s' % match_info['video']['title']
+    title = match_info['video']['title']
+
+    return (title, vid, nextvid)
+
+
+def letv_download_by_vid_sub(vid, nextvid, title, output_dir='.', merge=True, info_only=False):
+    """
+    browser = Browser()
+    browser.set_handle_robots(False)
+    browser.addheaders = [
+        ('User-Agent', USER_AGENT),
+        ('Referer', 'http://player.letvcdn.com/p/201411/14/10/newplayer/LetvPlayer.swf')
+    ]
+    """
+
+    param_dict = {
+        'id': vid,
+        'platid': 1,
+        'splatid': 101,
+        'format': 1,
+        'nextvid': nextvid,
+        'tkey': get_tkey(),
+        'domain': 'www.letv.com'
+    }
+
+    url = 'http://api.letv.com/mms/out/video/playJson?%s' % urllib.parse.urlencode(param_dict)
+
+    #resp = browser.open(url)
+    #resp_body = resp.read()
+
+    #request = urllib.request.Request(url)
+    #request.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0')
+    #request.add_header('Referer', 'http://player.letvcdn.com/p/201411/14/10/newplayer/LetvPlayer.swf')
+
+    #response = urllib.request.urlopen(request)
+    #resp_body = response.read()
+
+    response = requests.get(url)
+    resp_body = response.text
+    resp_dict = json.loads(str(resp_body))
+
+    assert resp_dict['statuscode'] == '1001'
+    assert resp_dict['playstatus']['status'] == '1'
+
+    playurls = resp_dict['playurl']['dispatch']
+    domains = resp_dict['playurl']['domain']
+    duration = int(resp_dict['playurl']['duration'])
+
+    #print 'Avaliable Size:', ' '.join(playurls.keys())
+    keys = ['1080p', '720p', '1300', '1000', '350']
+    for key in keys:
+        playurl = playurls.get(key)
+        if playurl:
+            break
+
+    #print 'Select %s' % key
+    assert playurl
+
+    tn = random.random()
+    url = domains[0] + playurl[0] + '&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=%s&tag=letv&sign=letv&expect=3&tn=%s&pay=0&rateid=%s' % (OSTYPE, tn, key)
+
+    #resp = browser.open(url)
+    #gslb_data = json.loads(resp.read())
+
+    #request = urllib.request.Request(url)
+    #response = urllib.request.urlopen(request)
+    #gslb_data = json.loads(str(response.read()) )
+
+    response = requests.get(url)
+    gslb_data = json.loads(response.text)
+
+#    import pprint
+#    pprint.pprint(resp_dict)
+#    pprint.pprint(gslb_data)
+    play_url = gslb_data.get('location')
+
+    """
+    file_name_m3u8 = os.path.basename(urlparse.urlsplit(play_url).path)
+    file_name = '%s.ts' % os.path.splitext(file_name_m3u8)[0]
+    target_file = os.path.join(target_dir, file_name)
+
+
+    """
+
+    url= play_url
+    size = 0
+    ext = 'm3u8'
+    print_info(site_info, title, ext, size)
+    #print "###LETV:m3u8:%s" % url
+    print("###LETV:m3u8{}".format(url))
+
+    if not info_only:
+        download_urls([url], title, ext, size, output_dir=output_dir, merge=merge)
+
+
 site_info = "LeTV.com"
 download = letv_download
 download_playlist = playlist_not_supported('letv')
+
+
+
+if __name__ == '__main__':
+    #page_url = "http://www.letv.com/ptv/vplay/21371716.html"
+    #page_url = "http://www.letv.com/ptv/vplay/21470739.html"
+    #page_url = "http://www.letv.com/ptv/vplay/21470465.html"
+    #page_url = "http://www.letv.com/ptv/vplay/21470448.html"
+    #target_dir = "./"
+
+    letv_download("http://www.letv.com/ptv/vplay/21470448.html", './', True, True)
+    pass