import re # import htmlmin def minimize_html(html: bytes) -> bytes: """ Minimizes HTML files. Slightly better than htmlmin for my files, but maybe I break something in process and I don't notice """ html = bytearray(html) # remove newlines html = (html .replace(b'\r', b'') .replace(b'\n', b'')) # remove double spaces size = len(html) while True: html = html.replace(b' ', b'') # if nothing changes -> break if size == len(html): break size = len(html) # simplify '> <' to '><' html = html.replace(b'> <', b'><') # remove unnecessary quotes index = 0 for tag in re.findall(r"<.*?>", html.decode("utf8")): index = html.find(tag.encode("utf8"), index) processed = (tag .replace("\"", "") .replace(": ", ":") .replace("; ", ";")) if len(processed) < len(tag): html[index:index+len(tag)] = (html[index:index+len(tag)] .replace(tag.encode("utf8"), processed.encode("utf8"), 1)) return html def test(): with open("../www/about.html", "rb") as file: original = file.read() processed = minimize_html(original) print(f"Original : {len(original)}\n" f"Processed: {len(processed)}\n" f"Rate : {(1 - len(processed) / len(original)) * 100:.2f}%", end="\n\n") # processed = htmlmin.minify(original.decode("utf8"), True, True, True, True, True) # # print(f"Original : {len(original)}\n" # f"Processed: {len(processed)}\n" # f"Rate : {(1 - len(processed) / len(original)) * 100:.2f}%") if __name__ == '__main__': test()