try:
# py 3.4
from html import unescape as unescape_html
except ImportError:
import re
from html.entities import entitydefs
def unescape_html(string):
'''HTML entity decode'''
string = re.sub(r'[^;]+;', _sharp2uni, string)
string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string)
return string
def _sharp2uni(m):
'''...; ==> unicode'''
s = m.group(0)[2:].rstrip(';;')
if s.startswith('x'):
return chr(int('0'+s, 16))
else:
return chr(int(s))
from .fs import legitimize
def get_filename(htmlstring):
return legitimize(unescape_html(htmlstring))