about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorUltraQbik <no1skill@yandex.ru>2024-08-22 03:00:37 +0300
committerUltraQbik <no1skill@yandex.ru>2024-08-22 03:00:37 +0300
commiteedb57f893986091887d1a889d6752ea4b06ba2a (patch)
tree92aee69db8d6dba0e3b42341c8b40553c7ced851 /src
parent76ac2995b1c460f852816a3271fc7777d12b632d (diff)
downloadhttpy-eedb57f893986091887d1a889d6752ea4b06ba2a.tar.gz
httpy-eedb57f893986091887d1a889d6752ea4b06ba2a.zip
Add simple HTML minimizer for gooder compression
Diffstat (limited to 'src')
-rw-r--r--src/minimizer.py65
1 files changed, 65 insertions, 0 deletions
diff --git a/src/minimizer.py b/src/minimizer.py
new file mode 100644
index 0000000..af1c015
--- /dev/null
+++ b/src/minimizer.py
@@ -0,0 +1,65 @@
+import re
+# import htmlmin
+
+
+def minimize_html(html: bytes) -> bytes:
+    """
+    Minimizes HTML files.
+    Slightly better than htmlmin for my files,
+    but maybe I break something in process and I don't notice
+    """
+
+    html = bytearray(html)
+
+    # remove newlines
+    html = (html
+            .replace(b'\r', b'')
+            .replace(b'\n', b''))
+
+    # remove double spaces
+    size = len(html)
+    while True:
+        html = html.replace(b'  ', b'')
+
+        # if nothing changes -> break
+        if size == len(html):
+            break
+        size = len(html)
+
+    # simplify '> <' to '><'
+    html = html.replace(b'> <', b'><')
+
+    # remove unnecessary quotes
+    index = 0
+    for tag in re.findall(r"<.*?>", html.decode("utf8")):
+        index = html.find(tag.encode("utf8"), index)
+        processed = (tag
+                     .replace("\"", "")
+                     .replace(": ", ":")
+                     .replace("; ", ";"))
+        if len(processed) < len(tag):
+            html[index:index+len(tag)] = (html[index:index+len(tag)]
+                                          .replace(tag.encode("utf8"), processed.encode("utf8"), 1))
+
+    return html
+
+
+def test():
+    with open("../www/about.html", "rb") as file:
+        original = file.read()
+
+    processed = minimize_html(original)
+
+    print(f"Original : {len(original)}\n"
+          f"Processed: {len(processed)}\n"
+          f"Rate     : {(1 - len(processed) / len(original)) * 100:.2f}%", end="\n\n")
+
+    # processed = htmlmin.minify(original.decode("utf8"), True, True, True, True, True)
+    #
+    # print(f"Original : {len(original)}\n"
+    #       f"Processed: {len(processed)}\n"
+    #       f"Rate     : {(1 - len(processed) / len(original)) * 100:.2f}%")
+
+
+if __name__ == '__main__':
+    test()