You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
216 lines
6.3 KiB
216 lines
6.3 KiB
|
2 months ago
|
#!/usr/bin/env python3
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import re
|
||
|
|
import struct
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
|
||
|
|
_ESCAPE_RE = re.compile(r"\\(n|t|r|\\|\"|[0-7]{1,3}|x[0-9a-fA-F]{2})")
|
||
|
|
|
||
|
|
|
||
|
|
def _unescape(value: str) -> str:
|
||
|
|
def repl(match: re.Match[str]) -> str:
|
||
|
|
escape = match.group(1)
|
||
|
|
if escape == "n":
|
||
|
|
return "\n"
|
||
|
|
if escape == "t":
|
||
|
|
return "\t"
|
||
|
|
if escape == "r":
|
||
|
|
return "\r"
|
||
|
|
if escape == "\\":
|
||
|
|
return "\\"
|
||
|
|
if escape == '"':
|
||
|
|
return '"'
|
||
|
|
if escape.startswith("x"):
|
||
|
|
return chr(int(escape[1:], 16))
|
||
|
|
return chr(int(escape, 8))
|
||
|
|
|
||
|
|
return _ESCAPE_RE.sub(repl, value)
|
||
|
|
|
||
|
|
|
||
|
|
def _parse_quoted(rest: str) -> str:
|
||
|
|
rest = rest.strip()
|
||
|
|
if not (rest.startswith('"') and rest.endswith('"')):
|
||
|
|
raise ValueError(f"Invalid PO string: {rest!r}")
|
||
|
|
return _unescape(rest[1:-1])
|
||
|
|
|
||
|
|
|
||
|
|
def parse_po(path: Path) -> dict[str, str]:
|
||
|
|
messages: list[tuple[str | None, str, str | None, dict[int, str], set[str]]] = []
|
||
|
|
|
||
|
|
msgctxt: str | None = None
|
||
|
|
msgid: str | None = None
|
||
|
|
msgid_plural: str | None = None
|
||
|
|
msgstr: dict[int, str] = {}
|
||
|
|
flags: set[str] = set()
|
||
|
|
active: tuple[str, int | None] | None = None
|
||
|
|
|
||
|
|
def flush() -> None:
|
||
|
|
nonlocal msgctxt, msgid, msgid_plural, msgstr, flags, active
|
||
|
|
if msgid is None:
|
||
|
|
msgctxt = None
|
||
|
|
msgid_plural = None
|
||
|
|
msgstr = {}
|
||
|
|
flags = set()
|
||
|
|
active = None
|
||
|
|
return
|
||
|
|
|
||
|
|
messages.append((msgctxt, msgid, msgid_plural, dict(msgstr), set(flags)))
|
||
|
|
|
||
|
|
msgctxt = None
|
||
|
|
msgid = None
|
||
|
|
msgid_plural = None
|
||
|
|
msgstr = {}
|
||
|
|
flags = set()
|
||
|
|
active = None
|
||
|
|
|
||
|
|
with path.open("r", encoding="utf-8", errors="replace", newline="") as file:
|
||
|
|
for raw_line in file:
|
||
|
|
line = raw_line.rstrip("\n")
|
||
|
|
|
||
|
|
if not line.strip():
|
||
|
|
flush()
|
||
|
|
continue
|
||
|
|
|
||
|
|
if line.startswith("#,"):
|
||
|
|
for flag in line[2:].split(","):
|
||
|
|
flag = flag.strip()
|
||
|
|
if flag:
|
||
|
|
flags.add(flag)
|
||
|
|
continue
|
||
|
|
|
||
|
|
if line.startswith("#"):
|
||
|
|
continue
|
||
|
|
|
||
|
|
if line.startswith("msgctxt"):
|
||
|
|
msgctxt = _parse_quoted(line[len("msgctxt") :])
|
||
|
|
active = ("msgctxt", None)
|
||
|
|
continue
|
||
|
|
|
||
|
|
if line.startswith("msgid_plural"):
|
||
|
|
msgid_plural = _parse_quoted(line[len("msgid_plural") :])
|
||
|
|
active = ("msgid_plural", None)
|
||
|
|
continue
|
||
|
|
|
||
|
|
if line.startswith("msgid"):
|
||
|
|
msgid = _parse_quoted(line[len("msgid") :])
|
||
|
|
active = ("msgid", None)
|
||
|
|
continue
|
||
|
|
|
||
|
|
if line.startswith("msgstr["):
|
||
|
|
close = line.find("]")
|
||
|
|
index = int(line[len("msgstr[") : close])
|
||
|
|
msgstr[index] = _parse_quoted(line[close + 1 :])
|
||
|
|
active = ("msgstr", index)
|
||
|
|
continue
|
||
|
|
|
||
|
|
if line.startswith("msgstr"):
|
||
|
|
msgstr[0] = _parse_quoted(line[len("msgstr") :])
|
||
|
|
active = ("msgstr", 0)
|
||
|
|
continue
|
||
|
|
|
||
|
|
if line.lstrip().startswith('"'):
|
||
|
|
value = _parse_quoted(line)
|
||
|
|
if active is None:
|
||
|
|
continue
|
||
|
|
kind, index = active
|
||
|
|
if kind == "msgctxt":
|
||
|
|
msgctxt = (msgctxt or "") + value
|
||
|
|
elif kind == "msgid":
|
||
|
|
msgid = (msgid or "") + value
|
||
|
|
elif kind == "msgid_plural":
|
||
|
|
msgid_plural = (msgid_plural or "") + value
|
||
|
|
elif kind == "msgstr":
|
||
|
|
assert index is not None
|
||
|
|
msgstr[index] = msgstr.get(index, "") + value
|
||
|
|
continue
|
||
|
|
|
||
|
|
flush()
|
||
|
|
|
||
|
|
catalog: dict[str, str] = {}
|
||
|
|
for msgctxt, msgid, msgid_plural, msgstrs, flags in messages:
|
||
|
|
if "fuzzy" in flags:
|
||
|
|
continue
|
||
|
|
|
||
|
|
if msgid_plural is not None:
|
||
|
|
key = msgid + "\x00" + msgid_plural
|
||
|
|
max_index = max(msgstrs.keys(), default=0)
|
||
|
|
value = "\x00".join(msgstrs.get(i, "") for i in range(max_index + 1))
|
||
|
|
else:
|
||
|
|
key = msgid
|
||
|
|
value = msgstrs.get(0, "")
|
||
|
|
|
||
|
|
if msgctxt:
|
||
|
|
key = msgctxt + "\x04" + key
|
||
|
|
|
||
|
|
catalog[key] = value
|
||
|
|
|
||
|
|
catalog.setdefault("", "")
|
||
|
|
return catalog
|
||
|
|
|
||
|
|
|
||
|
|
def write_mo(catalog: dict[str, str], out_file: Path) -> None:
|
||
|
|
entries = sorted(catalog.items(), key=lambda kv: kv[0])
|
||
|
|
ids = [key.encode("utf-8") for key, _ in entries]
|
||
|
|
strs = [value.encode("utf-8") for _, value in entries]
|
||
|
|
|
||
|
|
count = len(entries)
|
||
|
|
header_size = 7 * 4
|
||
|
|
table_size = count * 8
|
||
|
|
originals_offset = header_size
|
||
|
|
translations_offset = originals_offset + table_size
|
||
|
|
string_offset = translations_offset + table_size
|
||
|
|
|
||
|
|
offsets_ids: list[tuple[int, int]] = []
|
||
|
|
offsets_strs: list[tuple[int, int]] = []
|
||
|
|
pool = bytearray()
|
||
|
|
|
||
|
|
for value in ids:
|
||
|
|
offsets_ids.append((len(value), string_offset + len(pool)))
|
||
|
|
pool.extend(value)
|
||
|
|
pool.append(0)
|
||
|
|
|
||
|
|
for value in strs:
|
||
|
|
offsets_strs.append((len(value), string_offset + len(pool)))
|
||
|
|
pool.extend(value)
|
||
|
|
pool.append(0)
|
||
|
|
|
||
|
|
out_file.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
with out_file.open("wb") as file:
|
||
|
|
file.write(
|
||
|
|
struct.pack(
|
||
|
|
"<Iiiiiii",
|
||
|
|
0x950412DE, # magic
|
||
|
|
0, # version
|
||
|
|
count,
|
||
|
|
originals_offset,
|
||
|
|
translations_offset,
|
||
|
|
0, # hash table size
|
||
|
|
0, # hash table offset
|
||
|
|
)
|
||
|
|
)
|
||
|
|
for length, offset in offsets_ids:
|
||
|
|
file.write(struct.pack("<II", length, offset))
|
||
|
|
for length, offset in offsets_strs:
|
||
|
|
file.write(struct.pack("<II", length, offset))
|
||
|
|
file.write(pool)
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> int:
|
||
|
|
parser = argparse.ArgumentParser(description="Compile a .po file into a GNU .mo/.gmo file.")
|
||
|
|
parser.add_argument("input", type=Path, help="Input .po file")
|
||
|
|
parser.add_argument("-o", "--output", type=Path, required=True, help="Output .mo/.gmo file")
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
catalog = parse_po(args.input)
|
||
|
|
write_mo(catalog, args.output)
|
||
|
|
return 0
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
raise SystemExit(main())
|
||
|
|
|