1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
import re
# import htmlmin
def minimize_html(html: bytes) -> bytes:
"""
Minimizes HTML files.
Slightly better than htmlmin for my files,
but maybe I break something in process and I don't notice
"""
html = bytearray(html)
# remove newlines
html = (html
.replace(b'\r', b'')
.replace(b'\n', b''))
# remove double spaces
size = len(html)
while True:
html = html.replace(b' ', b'')
# if nothing changes -> break
if size == len(html):
break
size = len(html)
# simplify '> <' to '><'
html = html.replace(b'> <', b'><')
# remove unnecessary quotes
index = 0
for tag in re.findall(r"<.*?>", html.decode("utf8")):
index = html.find(tag.encode("utf8"), index)
processed = (tag
.replace("\"", "")
.replace(": ", ":")
.replace("; ", ";"))
if len(processed) < len(tag):
html[index:index+len(tag)] = (html[index:index+len(tag)]
.replace(tag.encode("utf8"), processed.encode("utf8"), 1))
return html
def test():
with open("../www/about.html", "rb") as file:
original = file.read()
processed = minimize_html(original)
print(f"Original : {len(original)}\n"
f"Processed: {len(processed)}\n"
f"Rate : {(1 - len(processed) / len(original)) * 100:.2f}%", end="\n\n")
# processed = htmlmin.minify(original.decode("utf8"), True, True, True, True, True)
#
# print(f"Original : {len(original)}\n"
# f"Processed: {len(processed)}\n"
# f"Rate : {(1 - len(processed) / len(original)) * 100:.2f}%")
if __name__ == '__main__':
test()
|