mirror of
https://github.com/soimort/you-get.git
synced 2025-01-23 21:45:02 +03:00
unescape HTML entities in media titles
This commit is contained in:
parent
ecb7e84e6b
commit
36f7cf798f
@ -11,7 +11,7 @@ import platform
|
||||
import threading
|
||||
|
||||
from .version import __version__
|
||||
from .util import log, legitimize, sogou_proxy_server
|
||||
from .util import log, legitimize, sogou_proxy_server, unescape
|
||||
|
||||
dry_run = False
|
||||
force = False
|
||||
@ -523,6 +523,7 @@ def download_urls(urls, title, ext, total_size, output_dir = '.', refer = None,
|
||||
traceback.print_exc(file = sys.stdout)
|
||||
pass
|
||||
|
||||
title = unescape(title)
|
||||
title = legitimize(title)
|
||||
|
||||
filename = '%s.%s' % (title, ext)
|
||||
@ -727,7 +728,7 @@ def print_info(site_info, title, type, size):
|
||||
type_info = "Unknown type (%s)" % type
|
||||
|
||||
print("Video Site:", site_info)
|
||||
print("Title: ", tr(title))
|
||||
print("Title: ", unescape(tr(title)))
|
||||
print("Type: ", type_info)
|
||||
print("Size: ", round(size / 1048576, 2), "MiB (" + str(size) + " Bytes)")
|
||||
print()
|
||||
|
@ -3,3 +3,4 @@
|
||||
from .fs import *
|
||||
from .log import *
|
||||
from .sogou_proxy import *
|
||||
from .strings import *
|
||||
|
20
src/you_get/util/strings.py
Normal file
20
src/you_get/util/strings.py
Normal file
@ -0,0 +1,20 @@
|
||||
try:
|
||||
# py 3.4
|
||||
from html import unescape
|
||||
except ImportError:
|
||||
import re
|
||||
from html.entities import entitydefs
|
||||
|
||||
def unescape(string):
|
||||
'''HTML entity decode'''
|
||||
string = re.sub(r'&#[^;]+;', _sharp2uni, string)
|
||||
string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string)
|
||||
return string
|
||||
|
||||
def _sharp2uni(m):
|
||||
'''&#...; ==> unicode'''
|
||||
s = m.group(0)[2:].rstrip(';;')
|
||||
if s.startswith('x'):
|
||||
return chr(int('0'+s, 16))
|
||||
else:
|
||||
return chr(int(s))
|
Loading…
Reference in New Issue
Block a user