diff options
| author | UltraQbik <no1skill@yandex.ru> | 2024-08-22 03:00:37 +0300 |
|---|---|---|
| committer | UltraQbik <no1skill@yandex.ru> | 2024-08-22 03:00:37 +0300 |
| commit | eedb57f893986091887d1a889d6752ea4b06ba2a (patch) | |
| tree | 92aee69db8d6dba0e3b42341c8b40553c7ced851 | |
| parent | 76ac2995b1c460f852816a3271fc7777d12b632d (diff) | |
| download | httpy-eedb57f893986091887d1a889d6752ea4b06ba2a.tar.gz httpy-eedb57f893986091887d1a889d6752ea4b06ba2a.zip | |
Add simple HTML minimizer for gooder compression
| -rw-r--r-- | main.py | 5 | ||||
| -rw-r--r-- | src/minimizer.py | 65 | ||||
| -rw-r--r-- | www/index.html | 2 |
3 files changed, 71 insertions, 1 deletions
diff --git a/main.py b/main.py index e85e536..f22b957 100644 --- a/main.py +++ b/main.py @@ -11,6 +11,7 @@ import signal import asyncio import aiofiles from src.request import Request +from src.minimizer import minimize_html # path mapping @@ -168,6 +169,10 @@ class HTTPServer: async with aiofiles.open(PATH_MAP[request.path]["path"], "rb") as f: data = await f.read() + # pre-compress data for HTML files + if PATH_MAP[request.path]["path"][-4:] == "html": + data = minimize_html(data) + # add gzip compression header (if supported) headers = {} if "gzip" in compressions: diff --git a/src/minimizer.py b/src/minimizer.py new file mode 100644 index 0000000..af1c015 --- /dev/null +++ b/src/minimizer.py @@ -0,0 +1,65 @@ +import re +# import htmlmin + + +def minimize_html(html: bytes) -> bytes: + """ + Minimizes HTML files. + Slightly better than htmlmin for my files, + but maybe I break something in process and I don't notice + """ + + html = bytearray(html) + + # remove newlines + html = (html + .replace(b'\r', b'') + .replace(b'\n', b'')) + + # remove double spaces + size = len(html) + while True: + html = html.replace(b' ', b'') + + # if nothing changes -> break + if size == len(html): + break + size = len(html) + + # simplify '> <' to '><' + html = html.replace(b'> <', b'><') + + # remove unnecessary quotes + index = 0 + for tag in re.findall(r"<.*?>", html.decode("utf8")): + index = html.find(tag.encode("utf8"), index) + processed = (tag + .replace("\"", "") + .replace(": ", ":") + .replace("; ", ";")) + if len(processed) < len(tag): + html[index:index+len(tag)] = (html[index:index+len(tag)] + .replace(tag.encode("utf8"), processed.encode("utf8"), 1)) + + return html + + +def test(): + with open("../www/about.html", "rb") as file: + original = file.read() + + processed = minimize_html(original) + + print(f"Original : {len(original)}\n" + f"Processed: {len(processed)}\n" + f"Rate : {(1 - len(processed) / len(original)) * 100:.2f}%", end="\n\n") + + # processed = htmlmin.minify(original.decode("utf8"), True, True, True, True, True) + # + # print(f"Original : {len(original)}\n" + # f"Processed: {len(processed)}\n" + # f"Rate : {(1 - len(processed) / len(original)) * 100:.2f}%") + + +if __name__ == '__main__': + test() diff --git a/www/index.html b/www/index.html index e787340..c2ef300 100644 --- a/www/index.html +++ b/www/index.html @@ -23,7 +23,7 @@ <h1> What is it running? </h1> <p> > This server is run by the shitty python code I wrote </p> <p> > Server does not use flask or any other similar python web frameworks </p> - <p> > It primarily uses standard python libraries, with 2 libraries being an exception </p> + <p> > It primarily uses standard python libraries, with 1 library being an exception </p> <p> > <i> aiofiles </i> - for asynchronous file I/O </p> </section> <section> |