You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
215 lines
6.3 KiB
215 lines
6.3 KiB
#!/usr/bin/env python3 |
|
|
|
from __future__ import annotations |
|
|
|
import argparse |
|
import re |
|
import struct |
|
from pathlib import Path |
|
|
|
|
|
_ESCAPE_RE = re.compile(r"\\(n|t|r|\\|\"|[0-7]{1,3}|x[0-9a-fA-F]{2})") |
|
|
|
|
|
def _unescape(value: str) -> str: |
|
def repl(match: re.Match[str]) -> str: |
|
escape = match.group(1) |
|
if escape == "n": |
|
return "\n" |
|
if escape == "t": |
|
return "\t" |
|
if escape == "r": |
|
return "\r" |
|
if escape == "\\": |
|
return "\\" |
|
if escape == '"': |
|
return '"' |
|
if escape.startswith("x"): |
|
return chr(int(escape[1:], 16)) |
|
return chr(int(escape, 8)) |
|
|
|
return _ESCAPE_RE.sub(repl, value) |
|
|
|
|
|
def _parse_quoted(rest: str) -> str: |
|
rest = rest.strip() |
|
if not (rest.startswith('"') and rest.endswith('"')): |
|
raise ValueError(f"Invalid PO string: {rest!r}") |
|
return _unescape(rest[1:-1]) |
|
|
|
|
|
def parse_po(path: Path) -> dict[str, str]: |
|
messages: list[tuple[str | None, str, str | None, dict[int, str], set[str]]] = [] |
|
|
|
msgctxt: str | None = None |
|
msgid: str | None = None |
|
msgid_plural: str | None = None |
|
msgstr: dict[int, str] = {} |
|
flags: set[str] = set() |
|
active: tuple[str, int | None] | None = None |
|
|
|
def flush() -> None: |
|
nonlocal msgctxt, msgid, msgid_plural, msgstr, flags, active |
|
if msgid is None: |
|
msgctxt = None |
|
msgid_plural = None |
|
msgstr = {} |
|
flags = set() |
|
active = None |
|
return |
|
|
|
messages.append((msgctxt, msgid, msgid_plural, dict(msgstr), set(flags))) |
|
|
|
msgctxt = None |
|
msgid = None |
|
msgid_plural = None |
|
msgstr = {} |
|
flags = set() |
|
active = None |
|
|
|
with path.open("r", encoding="utf-8", errors="replace", newline="") as file: |
|
for raw_line in file: |
|
line = raw_line.rstrip("\n") |
|
|
|
if not line.strip(): |
|
flush() |
|
continue |
|
|
|
if line.startswith("#,"): |
|
for flag in line[2:].split(","): |
|
flag = flag.strip() |
|
if flag: |
|
flags.add(flag) |
|
continue |
|
|
|
if line.startswith("#"): |
|
continue |
|
|
|
if line.startswith("msgctxt"): |
|
msgctxt = _parse_quoted(line[len("msgctxt") :]) |
|
active = ("msgctxt", None) |
|
continue |
|
|
|
if line.startswith("msgid_plural"): |
|
msgid_plural = _parse_quoted(line[len("msgid_plural") :]) |
|
active = ("msgid_plural", None) |
|
continue |
|
|
|
if line.startswith("msgid"): |
|
msgid = _parse_quoted(line[len("msgid") :]) |
|
active = ("msgid", None) |
|
continue |
|
|
|
if line.startswith("msgstr["): |
|
close = line.find("]") |
|
index = int(line[len("msgstr[") : close]) |
|
msgstr[index] = _parse_quoted(line[close + 1 :]) |
|
active = ("msgstr", index) |
|
continue |
|
|
|
if line.startswith("msgstr"): |
|
msgstr[0] = _parse_quoted(line[len("msgstr") :]) |
|
active = ("msgstr", 0) |
|
continue |
|
|
|
if line.lstrip().startswith('"'): |
|
value = _parse_quoted(line) |
|
if active is None: |
|
continue |
|
kind, index = active |
|
if kind == "msgctxt": |
|
msgctxt = (msgctxt or "") + value |
|
elif kind == "msgid": |
|
msgid = (msgid or "") + value |
|
elif kind == "msgid_plural": |
|
msgid_plural = (msgid_plural or "") + value |
|
elif kind == "msgstr": |
|
assert index is not None |
|
msgstr[index] = msgstr.get(index, "") + value |
|
continue |
|
|
|
flush() |
|
|
|
catalog: dict[str, str] = {} |
|
for msgctxt, msgid, msgid_plural, msgstrs, flags in messages: |
|
if "fuzzy" in flags: |
|
continue |
|
|
|
if msgid_plural is not None: |
|
key = msgid + "\x00" + msgid_plural |
|
max_index = max(msgstrs.keys(), default=0) |
|
value = "\x00".join(msgstrs.get(i, "") for i in range(max_index + 1)) |
|
else: |
|
key = msgid |
|
value = msgstrs.get(0, "") |
|
|
|
if msgctxt: |
|
key = msgctxt + "\x04" + key |
|
|
|
catalog[key] = value |
|
|
|
catalog.setdefault("", "") |
|
return catalog |
|
|
|
|
|
def write_mo(catalog: dict[str, str], out_file: Path) -> None: |
|
entries = sorted(catalog.items(), key=lambda kv: kv[0]) |
|
ids = [key.encode("utf-8") for key, _ in entries] |
|
strs = [value.encode("utf-8") for _, value in entries] |
|
|
|
count = len(entries) |
|
header_size = 7 * 4 |
|
table_size = count * 8 |
|
originals_offset = header_size |
|
translations_offset = originals_offset + table_size |
|
string_offset = translations_offset + table_size |
|
|
|
offsets_ids: list[tuple[int, int]] = [] |
|
offsets_strs: list[tuple[int, int]] = [] |
|
pool = bytearray() |
|
|
|
for value in ids: |
|
offsets_ids.append((len(value), string_offset + len(pool))) |
|
pool.extend(value) |
|
pool.append(0) |
|
|
|
for value in strs: |
|
offsets_strs.append((len(value), string_offset + len(pool))) |
|
pool.extend(value) |
|
pool.append(0) |
|
|
|
out_file.parent.mkdir(parents=True, exist_ok=True) |
|
with out_file.open("wb") as file: |
|
file.write( |
|
struct.pack( |
|
"<Iiiiiii", |
|
0x950412DE, # magic |
|
0, # version |
|
count, |
|
originals_offset, |
|
translations_offset, |
|
0, # hash table size |
|
0, # hash table offset |
|
) |
|
) |
|
for length, offset in offsets_ids: |
|
file.write(struct.pack("<II", length, offset)) |
|
for length, offset in offsets_strs: |
|
file.write(struct.pack("<II", length, offset)) |
|
file.write(pool) |
|
|
|
|
|
def main() -> int: |
|
parser = argparse.ArgumentParser(description="Compile a .po file into a GNU .mo/.gmo file.") |
|
parser.add_argument("input", type=Path, help="Input .po file") |
|
parser.add_argument("-o", "--output", type=Path, required=True, help="Output .mo/.gmo file") |
|
args = parser.parse_args() |
|
|
|
catalog = parse_po(args.input) |
|
write_mo(catalog, args.output) |
|
return 0 |
|
|
|
|
|
if __name__ == "__main__": |
|
raise SystemExit(main()) |
|
|
|
|