#!/usr/bin/env python3 from __future__ import annotations import argparse import re import struct from pathlib import Path _ESCAPE_RE = re.compile(r"\\(n|t|r|\\|\"|[0-7]{1,3}|x[0-9a-fA-F]{2})") def _unescape(value: str) -> str: def repl(match: re.Match[str]) -> str: escape = match.group(1) if escape == "n": return "\n" if escape == "t": return "\t" if escape == "r": return "\r" if escape == "\\": return "\\" if escape == '"': return '"' if escape.startswith("x"): return chr(int(escape[1:], 16)) return chr(int(escape, 8)) return _ESCAPE_RE.sub(repl, value) def _parse_quoted(rest: str) -> str: rest = rest.strip() if not (rest.startswith('"') and rest.endswith('"')): raise ValueError(f"Invalid PO string: {rest!r}") return _unescape(rest[1:-1]) def parse_po(path: Path) -> dict[str, str]: messages: list[tuple[str | None, str, str | None, dict[int, str], set[str]]] = [] msgctxt: str | None = None msgid: str | None = None msgid_plural: str | None = None msgstr: dict[int, str] = {} flags: set[str] = set() active: tuple[str, int | None] | None = None def flush() -> None: nonlocal msgctxt, msgid, msgid_plural, msgstr, flags, active if msgid is None: msgctxt = None msgid_plural = None msgstr = {} flags = set() active = None return messages.append((msgctxt, msgid, msgid_plural, dict(msgstr), set(flags))) msgctxt = None msgid = None msgid_plural = None msgstr = {} flags = set() active = None with path.open("r", encoding="utf-8", errors="replace", newline="") as file: for raw_line in file: line = raw_line.rstrip("\n") if not line.strip(): flush() continue if line.startswith("#,"): for flag in line[2:].split(","): flag = flag.strip() if flag: flags.add(flag) continue if line.startswith("#"): continue if line.startswith("msgctxt"): msgctxt = _parse_quoted(line[len("msgctxt") :]) active = ("msgctxt", None) continue if line.startswith("msgid_plural"): msgid_plural = _parse_quoted(line[len("msgid_plural") :]) active = ("msgid_plural", None) continue if line.startswith("msgid"): msgid = _parse_quoted(line[len("msgid") :]) active = ("msgid", None) continue if line.startswith("msgstr["): close = line.find("]") index = int(line[len("msgstr[") : close]) msgstr[index] = _parse_quoted(line[close + 1 :]) active = ("msgstr", index) continue if line.startswith("msgstr"): msgstr[0] = _parse_quoted(line[len("msgstr") :]) active = ("msgstr", 0) continue if line.lstrip().startswith('"'): value = _parse_quoted(line) if active is None: continue kind, index = active if kind == "msgctxt": msgctxt = (msgctxt or "") + value elif kind == "msgid": msgid = (msgid or "") + value elif kind == "msgid_plural": msgid_plural = (msgid_plural or "") + value elif kind == "msgstr": assert index is not None msgstr[index] = msgstr.get(index, "") + value continue flush() catalog: dict[str, str] = {} for msgctxt, msgid, msgid_plural, msgstrs, flags in messages: if "fuzzy" in flags: continue if msgid_plural is not None: key = msgid + "\x00" + msgid_plural max_index = max(msgstrs.keys(), default=0) value = "\x00".join(msgstrs.get(i, "") for i in range(max_index + 1)) else: key = msgid value = msgstrs.get(0, "") if msgctxt: key = msgctxt + "\x04" + key catalog[key] = value catalog.setdefault("", "") return catalog def write_mo(catalog: dict[str, str], out_file: Path) -> None: entries = sorted(catalog.items(), key=lambda kv: kv[0]) ids = [key.encode("utf-8") for key, _ in entries] strs = [value.encode("utf-8") for _, value in entries] count = len(entries) header_size = 7 * 4 table_size = count * 8 originals_offset = header_size translations_offset = originals_offset + table_size string_offset = translations_offset + table_size offsets_ids: list[tuple[int, int]] = [] offsets_strs: list[tuple[int, int]] = [] pool = bytearray() for value in ids: offsets_ids.append((len(value), string_offset + len(pool))) pool.extend(value) pool.append(0) for value in strs: offsets_strs.append((len(value), string_offset + len(pool))) pool.extend(value) pool.append(0) out_file.parent.mkdir(parents=True, exist_ok=True) with out_file.open("wb") as file: file.write( struct.pack( " int: parser = argparse.ArgumentParser(description="Compile a .po file into a GNU .mo/.gmo file.") parser.add_argument("input", type=Path, help="Input .po file") parser.add_argument("-o", "--output", type=Path, required=True, help="Output .mo/.gmo file") args = parser.parse_args() catalog = parse_po(args.input) write_mo(catalog, args.output) return 0 if __name__ == "__main__": raise SystemExit(main())