diff options
| author | UltraQbik <no1skill@yandex.ru> | 2024-08-22 03:00:37 +0300 |
|---|---|---|
| committer | UltraQbik <no1skill@yandex.ru> | 2024-08-22 03:00:37 +0300 |
| commit | eedb57f893986091887d1a889d6752ea4b06ba2a (patch) | |
| tree | 92aee69db8d6dba0e3b42341c8b40553c7ced851 /src | |
| parent | 76ac2995b1c460f852816a3271fc7777d12b632d (diff) | |
| download | httpy-eedb57f893986091887d1a889d6752ea4b06ba2a.tar.gz httpy-eedb57f893986091887d1a889d6752ea4b06ba2a.zip | |
Add simple HTML minimizer for gooder compression
Diffstat (limited to 'src')
| -rw-r--r-- | src/minimizer.py | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/src/minimizer.py b/src/minimizer.py new file mode 100644 index 0000000..af1c015 --- /dev/null +++ b/src/minimizer.py @@ -0,0 +1,65 @@ +import re +# import htmlmin + + +def minimize_html(html: bytes) -> bytes: + """ + Minimizes HTML files. + Slightly better than htmlmin for my files, + but maybe I break something in process and I don't notice + """ + + html = bytearray(html) + + # remove newlines + html = (html + .replace(b'\r', b'') + .replace(b'\n', b'')) + + # remove double spaces + size = len(html) + while True: + html = html.replace(b' ', b'') + + # if nothing changes -> break + if size == len(html): + break + size = len(html) + + # simplify '> <' to '><' + html = html.replace(b'> <', b'><') + + # remove unnecessary quotes + index = 0 + for tag in re.findall(r"<.*?>", html.decode("utf8")): + index = html.find(tag.encode("utf8"), index) + processed = (tag + .replace("\"", "") + .replace(": ", ":") + .replace("; ", ";")) + if len(processed) < len(tag): + html[index:index+len(tag)] = (html[index:index+len(tag)] + .replace(tag.encode("utf8"), processed.encode("utf8"), 1)) + + return html + + +def test(): + with open("../www/about.html", "rb") as file: + original = file.read() + + processed = minimize_html(original) + + print(f"Original : {len(original)}\n" + f"Processed: {len(processed)}\n" + f"Rate : {(1 - len(processed) / len(original)) * 100:.2f}%", end="\n\n") + + # processed = htmlmin.minify(original.decode("utf8"), True, True, True, True, True) + # + # print(f"Original : {len(original)}\n" + # f"Processed: {len(processed)}\n" + # f"Rate : {(1 - len(processed) / len(original)) * 100:.2f}%") + + +if __name__ == '__main__': + test() |