#!/usr/bin/env python3 import argparse import functools import json import hashlib import logging import os import pickle import re import subprocess import sys from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, TextIO, Tuple, Union from dataclasses import dataclass from bdflib import reader as bdfreader from bdflib.model import Font, Glyph import font_tables import brieflz import objcopy logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) HERE = Path(__file__).resolve().parent @functools.lru_cache(maxsize=None) def cjk_font() -> Font: with open(os.path.join(HERE, "wqy-bitmapsong/wenquanyi_9pt.bdf"), "rb") as f: return bdfreader.read_bdf(f) # Loading a single JSON file def load_json(filename: str) -> dict: with open(filename) as f: return json.loads(f.read()) def get_language_unqiue_id(language_ascii_name: str): """ Given a language code, it will return a unique (enough) uint16_t id code When we have a collision here we can tweak this, but language list should be fairly stable from now on """ return ( int(hashlib.sha1(language_ascii_name.encode("utf-8")).hexdigest(), 16) % 0xFFFF ) def read_translation(json_root: Union[str, Path], lang_code: str) -> dict: filename = f"translation_{lang_code}.json" file_with_path = os.path.join(json_root, filename) try: lang = load_json(file_with_path) except json.decoder.JSONDecodeError as e: logging.error(f"Failed to decode {filename}") logging.exception(str(e)) sys.exit(2) validate_langcode_matches_content(filename, lang) return lang def filter_translation(lang: dict, defs: dict, macros: frozenset): def check_excluded(record): if "include" in record and not any(m in macros for m in record["include"]): return True if "exclude" in record and any(m in macros for m in record["exclude"]): return True return False for category in ("menuOptions", "menuGroups", "menuValues"): for _, record in enumerate(defs[category]): if check_excluded(record): lang[category][record["id"]]["displayText"] = "" lang[category][record["id"]]["description"] = "" for _, record in enumerate(defs["messagesWarn"]): if check_excluded(record): lang["messagesWarn"][record["id"]]["message"] = "" return lang def validate_langcode_matches_content(filename: str, content: dict) -> None: # Extract lang code from file name lang_code = filename[12:-5].upper() # ...and the one specified in the JSON file... try: lang_code_from_json = content["languageCode"] except KeyError: lang_code_from_json = "(missing)" # ...cause they should be the same! if lang_code != lang_code_from_json: raise ValueError( f"Invalid languageCode {lang_code_from_json} in file {filename}" ) def write_start(f: TextIO): f.write( "// WARNING: THIS FILE WAS AUTO GENERATED BY make_translation.py. PLEASE DO NOT EDIT.\n" ) f.write("\n") f.write('#include "Translation.h"\n') def get_constants() -> List[Tuple[str, str]]: # Extra constants that are used in the firmware that are shared across all languages return [ ("LargeSymbolPlus", "+"), ("SmallSymbolPlus", "+"), ("LargeSymbolMinus", "-"), ("SmallSymbolMinus", "-"), ("LargeSymbolSpace", " "), ("SmallSymbolSpace", " "), ("LargeSymbolDot", "."), ("SmallSymbolDot", "."), ("SmallSymbolSlash", "/"), ("SmallSymbolColon", ":"), ("LargeSymbolDegC", "C"), ("SmallSymbolDegC", "C"), ("LargeSymbolDegF", "F"), ("SmallSymbolDegF", "F"), ("LargeSymbolMinutes", "m"), ("SmallSymbolMinutes", "m"), ("LargeSymbolSeconds", "s"), ("SmallSymbolSeconds", "s"), ("LargeSymbolWatts", "W"), ("SmallSymbolWatts", "W"), ("LargeSymbolVolts", "V"), ("SmallSymbolVolts", "V"), ("SmallSymbolAmps", "A"), ("LargeSymbolDC", "DC"), ("LargeSymbolCellCount", "S"), ("SmallSymbolVersionNumber", read_version()), ("SmallSymbolPDDebug", "PD Debug"), ("SmallSymbolState", "State"), ("SmallSymbolNoVBus", "No VBus"), ("SmallSymbolVBus", "VBus"), ("LargeSymbolSleep", "Zzz "), ] def get_debug_menu() -> List[str]: return [ datetime.today().strftime("%Y-%m-%d"), "ID ", "ACC ", "PWR ", "Vin ", "Tip C ", "Han C ", "Max C ", "UpTime ", "Move ", "Tip Res", "Tip R ", "Tip O ", "HW G ", "HW M ", "HW P ", "Hall ", ] def get_accel_names_list() -> List[str]: return [ "Scanning", "None", "MMA8652FC", "LIS2DH12", "BMA223", "MSA301", "SC7A20", "GPIO", "LIS2 CLONE", ] def get_power_source_list() -> List[str]: return [ "DC", "QC", "PV:PDwVBus", "PD:No VBus", ] def test_is_small_font(msg: str) -> bool: return "\n" in msg and msg[0] != "\n" def get_letter_counts(defs: dict, lang: dict, build_version: str) -> Dict: """From the source definitions, language file and build version; calculates the ranked symbol list Args: defs (dict): Definitions lang (dict): Language lookup build_version (str): The build version id to ensure its letters are included Returns: Dict: _description_ """ big_font_messages = [] small_font_messages = [] # iterate over all strings obj = lang["messagesWarn"] for mod in defs["messagesWarn"]: eid = mod["id"] msg = obj[eid]["message"] if test_is_small_font(msg): small_font_messages.append(msg) else: big_font_messages.append(msg) obj = lang["characters"] for mod in defs["characters"]: eid = mod["id"] msg = obj[eid] if test_is_small_font(msg): small_font_messages.append(msg) else: big_font_messages.append(msg) obj = lang["menuOptions"] for mod in defs["menuOptions"]: eid = mod["id"] msg = obj[eid]["displayText"] if test_is_small_font(msg): small_font_messages.append(msg) else: big_font_messages.append(msg) obj = lang["menuOptions"] for mod in defs["menuOptions"]: eid = mod["id"] msg = obj[eid]["description"] big_font_messages.append(msg) obj = lang["menuValues"] for mod in defs["menuValues"]: eid = mod["id"] msg = obj[eid]["displayText"] if test_is_small_font(msg): small_font_messages.append(msg) else: big_font_messages.append(msg) obj = lang["menuGroups"] for mod in defs["menuGroups"]: eid = mod["id"] msg = obj[eid]["displayText"] if test_is_small_font(msg): small_font_messages.append(msg) else: big_font_messages.append(msg) obj = lang["menuGroups"] for mod in defs["menuGroups"]: eid = mod["id"] msg = obj[eid]["description"] big_font_messages.append(msg) constants = get_constants() for x in constants: if x[0].startswith("Small"): small_font_messages.append(x[1]) else: big_font_messages.append(x[1]) small_font_messages.extend(get_debug_menu()) small_font_messages.extend(get_accel_names_list()) small_font_messages.extend(get_power_source_list()) # collapse all strings down into the composite letters and store totals for these # Doing this seperately for small and big font def sort_and_count(list_in: List[str]): symbol_counts: dict[str, int] = {} for line in list_in: line = line.replace("\n", "").replace("\r", "") line = line.replace("\\n", "").replace("\\r", "") if line: for letter in line: symbol_counts[letter] = symbol_counts.get(letter, 0) + 1 # swap to Big -> little sort order return symbol_counts small_symbol_counts = sort_and_count(small_font_messages) big_symbol_counts = sort_and_count(big_font_messages) return { "smallFontCounts": small_symbol_counts, "bigFontCounts": big_symbol_counts, } def convert_letter_counts_to_ranked_symbols_with_forced( symbol_dict: Dict[str, int] ) -> List[str]: # Add in forced symbols first ranked_symbols = [] ranked_symbols.extend(get_forced_first_symbols()) # Now add in all the others based on letter count symbols_by_occurrence = [ x[0] for x in sorted( symbol_dict.items(), key=lambda kv: (kv[1], kv[0]), reverse=True ) ] ranked_symbols.extend([x for x in symbols_by_occurrence if x not in ranked_symbols]) return ranked_symbols def merge_letter_count_info(a: Dict, b: Dict) -> Dict: """Merge the results from get_letter_counts Combining the ranked symbols lists Args: a (Dict): get_letter_counts b (Dict): get_letter_counts Returns: Dict: get_letter_counts """ smallFontCounts = {} bigFontCounts = {} for x in a.get("smallFontCounts", []): old = smallFontCounts.get(x, 0) old += a["smallFontCounts"][x] smallFontCounts[x] = old for x in a.get("bigFontCounts", []): old = bigFontCounts.get(x, 0) old += a["bigFontCounts"][x] bigFontCounts[x] = old for x in b.get("smallFontCounts", []): old = smallFontCounts.get(x, 0) old += b["smallFontCounts"][x] smallFontCounts[x] = old for x in b.get("bigFontCounts", []): old = bigFontCounts.get(x, 0) old += b["bigFontCounts"][x] bigFontCounts[x] = old return { "smallFontCounts": smallFontCounts, "bigFontCounts": bigFontCounts, } def get_cjk_glyph(sym: str) -> Optional[bytes]: try: glyph: Glyph = cjk_font()[ord(sym)] except KeyError: return None data = glyph.data src_left, src_bottom, src_w, src_h = glyph.get_bounding_box() dst_w = 12 dst_h = 16 # The source data is a per-row list of ints. The first item is the bottom- # most row. For each row, the LSB is the right-most pixel. # Here, (x, y) is the coordinates with origin at the top-left. def get_cell(x: int, y: int) -> bool: # Adjust x coordinates by actual bounding box. adj_x = x - src_left if adj_x < 0 or adj_x >= src_w: return False # Adjust y coordinates by actual bounding box, then place the glyph # baseline 3px above the bottom edge to make it centre-ish. # This metric is optimized for WenQuanYi Bitmap Song 9pt and assumes # each glyph is to be placed in a 12x12px box. adj_y = y - (dst_h - src_h - src_bottom - 3) if adj_y < 0 or adj_y >= src_h: return False if data[src_h - adj_y - 1] & (1 << (src_w - adj_x - 1)): return True else: return False # A glyph in the font table is divided into upper and lower parts, each by # 8px high. Each byte represents half if a column, with the LSB being the # top-most pixel. The data goes from the left-most to the right-most column # of the top half, then from the left-most to the right-most column of the # bottom half. bs = bytearray() for block in range(2): for c in range(dst_w): b = 0 for r in range(8): if get_cell(c, r + 8 * block): b |= 0x01 << r bs.append(b) return bytes(bs) def get_bytes_from_font_index(index: int) -> bytes: """ Converts the font table index into its corresponding bytes """ # We want to be able to use more than 254 symbols (excluding \x00 null # terminator and \x01 new-line) in the font table but without making all # the chars take 2 bytes. To do this, we use \xF1 to \xFF as lead bytes # to designate double-byte chars, and leave the remaining as single-byte # chars. # # For the sake of sanity, \x00 always means the end of string, so we skip # \xF1\x00 and others in the mapping. # # Mapping example: # # 0x02 => 2 # 0x03 => 3 # ... # 0xEF => 239 # 0xF0 => 240 # 0xF1 0x01 => 1 * 0xFF - 15 + 1 = 241 # 0xF1 0x02 => 1 * 0xFF - 15 + 2 = 242 # ... # 0xF1 0xFF => 1 * 0xFF - 15 + 255 = 495 # 0xF2 0x01 => 2 * 0xFF - 15 + 1 = 496 # ... # 0xF2 0xFF => 2 * 0xFF - 15 + 255 = 750 # 0xF3 0x01 => 3 * 0xFF - 15 + 1 = 751 # ... # 0xFF 0xFF => 15 * 0xFF - 15 + 255 = 4065 if index < 0: raise ValueError("index must be positive") page = (index + 0x0E) // 0xFF if page > 0x0F: raise ValueError("page value out of range") if page == 0: return bytes([index]) else: # Into extended range # Leader is 0xFz where z is the page number # Following char is the remainder leader = page + 0xF0 value = ((index + 0x0E) % 0xFF) + 0x01 if leader > 0xFF or value > 0xFF: raise ValueError("value is out of range") return bytes([leader, value]) def bytes_to_escaped(b: bytes) -> str: return "".join((f"\\x{i:02X}" for i in b)) def bytes_to_c_hex(b: bytes) -> str: return ", ".join((f"0x{i:02X}" for i in b)) + "," @dataclass class FontMapsPerFont: font12_symbols_ordered: List[str] font12_maps: Dict[str, Dict[str, bytes]] font06_symbols_ordered: List[str] font06_maps: Dict[str, Dict[str, bytes]] def get_font_map_per_font( text_list_small_font: List[str], text_list_large_font: List[str] ) -> FontMapsPerFont: pending_small_symbols = set(text_list_small_font) pending_large_symbols = set(text_list_large_font) if len(pending_small_symbols) != len(text_list_small_font): raise ValueError("`text_list_small_font` contains duplicated symbols") if len(pending_large_symbols) != len(text_list_large_font): raise ValueError("`text_list_large_font` contains duplicated symbols") total_symbol_count_small = len(pending_small_symbols) # \x00 is for NULL termination and \x01 is for newline, so the maximum # number of symbols allowed is as follow (see also the comments in # `get_bytes_from_font_index`): if total_symbol_count_small > (0x10 * 0xFF - 15) - 2: # 4063 raise ValueError( f"Error, too many used symbols for this version (total {total_symbol_count_small})" ) logging.info(f"Generating fonts for {total_symbol_count_small} symbols") total_symbol_count_large = len(pending_large_symbols) # \x00 is for NULL termination and \x01 is for newline, so the maximum # number of symbols allowed is as follow (see also the comments in # `get_bytes_from_font_index`): if total_symbol_count_large > (0x10 * 0xFF - 15) - 2: # 4063 raise ValueError( f"Error, too many used symbols for this version (total {total_symbol_count_large})" ) logging.info(f"Generating fonts for {total_symbol_count_large} symbols") # Build the full font maps font12_map: Dict[str, bytes] = {} font06_map: Dict[str, bytes] = {} # First we go through and do all of the CJK characters that are in the large font to have them removed for sym in text_list_large_font: font12_line = get_cjk_glyph(sym) if font12_line is None: continue font12_map[sym] = font12_line pending_large_symbols.remove(sym) # Now that all CJK characters are done, we next have to fill out all of the small and large fonts from the remainders # This creates our superset of characters to reference off that are pre-rendered ones (non CJK) # Collect font bitmaps by the defined font order: for font in font_tables.ALL_PRE_RENDERED_FONTS: font12, font06 = font_tables.get_font_maps_for_name(font) font12_map.update(font12) font06_map.update(font06) # LARGE FONT for sym in text_list_large_font: if sym in pending_large_symbols: font_data = font12_map.get(sym, None) if font_data is None: raise KeyError(f"Symbol |{sym}| is missing in large font set") font12_map[sym] = font_data pending_large_symbols.remove(sym) if len(pending_large_symbols) > 0: raise KeyError( f"Missing large font symbols for {len(pending_large_symbols)} characters: {pending_large_symbols}" ) # SMALL FONT for sym in text_list_small_font: if sym in pending_small_symbols: font_data = font06_map.get(sym, None) if font_data is None: raise KeyError(f"Symbol |{sym}| is missing in small font set") font06_map[sym] = font_data pending_small_symbols.remove(sym) if len(pending_small_symbols) > 0: raise KeyError( f"Missing small font symbols for {len(pending_small_symbols)} characters: {pending_small_symbols}" ) return FontMapsPerFont( text_list_large_font, font12_map, text_list_small_font, font06_map ) def get_forced_first_symbols() -> List[str]: """Get the list of symbols that must always occur at start of small and large fonts Used by firmware for displaying numbers and hex strings Returns: List[str]: List of single character strings that must be the first N entries in a font table """ forced_first_symbols = [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", " ", # We lock these to ease printing functions; and they are always included due to constants "-", "+", ] return forced_first_symbols def build_symbol_conversion_map(sym_list: List[str]) -> Dict[str, bytes]: forced_first_symbols = get_forced_first_symbols() if sym_list[: len(forced_first_symbols)] != forced_first_symbols: raise ValueError("Symbol list does not start with forced_first_symbols.") # the text list is sorted # allocate out these in their order as number codes symbol_map: Dict[str, bytes] = {"\n": bytes([1])} index = 2 # start at 2, as 0= null terminator,1 = new line # Assign symbol bytes by font index for index, sym in enumerate(sym_list, index): assert sym not in symbol_map symbol_map[sym] = get_bytes_from_font_index(index) return symbol_map def make_font_table_cpp( small_font_sym_list: List[str], large_font_sym_list: List[str], font_map: FontMapsPerFont, small_symbol_map: Dict[str, bytes], large_symbol_map: Dict[str, bytes], ) -> str: output_table = make_font_table_named_cpp( "USER_FONT_12", large_font_sym_list, font_map.font12_maps ) output_table += make_font_table_06_cpp(small_font_sym_list, font_map) return output_table def make_font_table_named_cpp( name: Optional[str], sym_list: List[str], font_map: Dict[str, bytes], ) -> str: output_table = "" if name: output_table = f"const uint8_t {name}[] = {{\n" for i, sym in enumerate(sym_list): output_table += f"{bytes_to_c_hex(font_map[sym])}//0x{i+2:X} -> {sym}\n" if name: output_table += f"}}; // {name}\n" return output_table def make_font_table_06_cpp(sym_list: List[str], font_map: FontMapsPerFont) -> str: output_table = "const uint8_t USER_FONT_6x8[] = {\n" for i, sym in enumerate(sym_list): font_bytes = font_map.font06_maps[sym] if font_bytes: font_line = bytes_to_c_hex(font_bytes) else: font_line = "// " # placeholder output_table += f"{font_line}//0x{i+2:X} -> {sym}\n" output_table += "};\n" return output_table def convert_string_bytes(symbol_conversion_table: Dict[str, bytes], text: str) -> bytes: # convert all of the symbols from the string into bytes for their content output_string = b"" for c in text.replace("\\r", "").replace("\\n", "\n"): if c not in symbol_conversion_table: print(symbol_conversion_table) logging.error(f"Missing font definition for {c}") raise KeyError(f"Missing font definition for {c}") else: output_string += symbol_conversion_table[c] return output_string def convert_string(symbol_conversion_table: Dict[str, bytes], text: str) -> str: # convert all of the symbols from the string into escapes for their content return bytes_to_escaped(convert_string_bytes(symbol_conversion_table, text)) def escape(string: str) -> str: return json.dumps(string, ensure_ascii=False) def write_bytes_as_c_array( f: TextIO, name: str, data: bytes, indent: int = 2, bytes_per_line: int = 16 ) -> None: f.write(f"const uint8_t {name}[] = {{\n") for i in range(0, len(data), bytes_per_line): f.write(" " * indent) f.write(", ".join((f"0x{b:02X}" for b in data[i : i + bytes_per_line]))) f.write(",\n") f.write(f"}}; // {name}\n\n") @dataclass class LanguageData: langs: List[dict] defs: dict build_version: str small_text_symbols: List[str] large_text_symbols: List[str] font_map: FontMapsPerFont def prepare_language(lang: dict, defs: dict, build_version: str) -> LanguageData: language_code: str = lang["languageCode"] logging.info(f"Preparing language data for {language_code}") # Iterate over all of the text to build up the symbols & counts letter_count_data = get_letter_counts(defs, lang, build_version) small_font_symbols = convert_letter_counts_to_ranked_symbols_with_forced( letter_count_data["smallFontCounts"] ) large_font_symbols = convert_letter_counts_to_ranked_symbols_with_forced( letter_count_data["bigFontCounts"] ) # From the letter counts, need to make a symbol index and matching font index font_data = get_font_map_per_font(small_font_symbols, large_font_symbols) return LanguageData( [lang], defs, build_version, small_font_symbols, large_font_symbols, font_data, ) def prepare_languages( langs: List[dict], defs: dict, build_version: str ) -> LanguageData: language_codes: List[str] = [lang["languageCode"] for lang in langs] logging.info(f"Preparing language data for {language_codes}") # Build the full font maps total_symbol_counts: Dict[str, Dict[str, int]] = {} for lang in langs: letter_count_data = get_letter_counts(defs, lang, build_version) total_symbol_counts = merge_letter_count_info( total_symbol_counts, letter_count_data ) small_font_symbols = convert_letter_counts_to_ranked_symbols_with_forced( total_symbol_counts["smallFontCounts"] ) large_font_symbols = convert_letter_counts_to_ranked_symbols_with_forced( total_symbol_counts["bigFontCounts"] ) font_data = get_font_map_per_font(small_font_symbols, large_font_symbols) return LanguageData( langs, defs, build_version, small_font_symbols, large_font_symbols, font_data, ) def render_font_block(data: LanguageData, f: TextIO, compress_font: bool = False): font_map = data.font_map small_font_symbol_conversion_table = build_symbol_conversion_map( data.small_text_symbols ) large_font_symbol_conversion_table = build_symbol_conversion_map( data.large_text_symbols ) if not compress_font: font_table_text = make_font_table_cpp( data.small_text_symbols, data.large_text_symbols, font_map, small_font_symbol_conversion_table, large_font_symbol_conversion_table, ) f.write(font_table_text) f.write( "const FontSection FontSectionInfo = {\n" " .font12_start_ptr = USER_FONT_12,\n" " .font06_start_ptr = USER_FONT_6x8,\n" " .font12_decompressed_size = 0,\n" " .font06_decompressed_size = 0,\n" " .font12_compressed_source = 0,\n" " .font06_compressed_source = 0,\n" "};\n" ) else: font12_uncompressed = bytearray() for sym in data.large_text_symbols: font12_uncompressed.extend(font_map.font12_maps[sym]) font12_compressed = brieflz.compress(bytes(font12_uncompressed)) logging.info( f"Font table 12x16 compressed from {len(font12_uncompressed)} to {len(font12_compressed)} bytes (ratio {len(font12_compressed) / len(font12_uncompressed):.3})" ) write_bytes_as_c_array(f, "font_12x16_brieflz", font12_compressed) font06_uncompressed = bytearray() for sym in data.small_text_symbols: font06_uncompressed.extend(font_map.font06_maps[sym]) font06_compressed = brieflz.compress(bytes(font06_uncompressed)) logging.info( f"Font table 06x08 compressed from {len(font06_uncompressed)} to {len(font06_compressed)} bytes (ratio {len(font06_compressed) / len(font06_uncompressed):.3})" ) write_bytes_as_c_array(f, "font_06x08_brieflz", font06_compressed) f.write( f"static uint8_t font12_out_buffer[{len(font12_uncompressed)}];\n" f"static uint8_t font06_out_buffer[{len(font06_uncompressed)}];\n" "const FontSection FontSectionInfo = {\n" " .font12_start_ptr = font12_out_buffer,\n" " .font06_start_ptr = font06_out_buffer,\n" f" .font12_decompressed_size = {len(font12_uncompressed)},\n" f" .font06_decompressed_size = {len(font06_uncompressed)},\n" " .font12_compressed_source = font_12x16_brieflz,\n" " .font06_compressed_source = font_06x08_brieflz,\n" "};\n" ) def write_language( data: LanguageData, f: TextIO, strings_bin: Optional[bytes] = None, compress_font: bool = False, ) -> None: if len(data.langs) > 1: raise ValueError("More than 1 languages are provided") lang = data.langs[0] defs = data.defs small_font_symbol_conversion_table = build_symbol_conversion_map( data.small_text_symbols ) large_font_symbol_conversion_table = build_symbol_conversion_map( data.large_text_symbols ) language_code: str = lang["languageCode"] logging.info(f"Generating block for {language_code}") try: lang_name = lang["languageLocalName"] except KeyError: lang_name = language_code if strings_bin or compress_font: f.write('#include "brieflz.h"\n') f.write(f"\n// ---- {lang_name} ----\n\n") render_font_block(data, f, compress_font) f.write(f"\n// ---- {lang_name} ----\n\n") translation_common_text = get_translation_common_text( small_font_symbol_conversion_table, large_font_symbol_conversion_table ) f.write(translation_common_text) f.write( f"const bool HasFahrenheit = {('true' if lang.get('tempUnitFahrenheit', True) else 'false')};\n\n" ) if not strings_bin: translation_strings_and_indices_text = get_translation_strings_and_indices_text( lang, defs, small_font_symbol_conversion_table, large_font_symbol_conversion_table, ) f.write(translation_strings_and_indices_text) f.write( "const TranslationIndexTable *Tr = &translation.indices;\n" "const char *TranslationStrings = translation.strings;\n\n" ) else: compressed = brieflz.compress(strings_bin) logging.info( f"Strings compressed from {len(strings_bin)} to {len(compressed)} bytes (ratio {len(compressed) / len(strings_bin):.3})" ) write_bytes_as_c_array(f, "translation_data_brieflz", compressed) f.write( f"static uint8_t translation_data_out_buffer[{len(strings_bin)}] __attribute__((__aligned__(2)));\n\n" "const TranslationIndexTable *Tr = reinterpret_cast(translation_data_out_buffer);\n" "const char *TranslationStrings = reinterpret_cast(translation_data_out_buffer) + sizeof(TranslationIndexTable);\n\n" ) if not strings_bin and not compress_font: f.write("void prepareTranslations() {}\n\n") else: f.write("void prepareTranslations() {\n") if compress_font: f.write( " blz_depack_srcsize(font_12x16_brieflz, font_out_buffer, sizeof(font_12x16_brieflz));\n" ) if strings_bin: f.write( " blz_depack_srcsize(translation_data_brieflz, translation_data_out_buffer, sizeof(translation_data_brieflz));\n" ) f.write("}\n\n") sanity_checks_text = get_translation_sanity_checks_text(defs) f.write(sanity_checks_text) def write_languages( data: LanguageData, f: TextIO, strings_obj_path: Optional[str] = None, compress_font: bool = False, ) -> None: defs = data.defs small_font_symbol_conversion_table = build_symbol_conversion_map( data.small_text_symbols ) large_font_symbol_conversion_table = build_symbol_conversion_map( data.large_text_symbols ) language_codes: List[str] = [lang["languageCode"] for lang in data.langs] logging.info(f"Generating block for {language_codes}") lang_names = [ lang.get("languageLocalName", lang["languageCode"]) for lang in data.langs ] f.write('#include "Translation_multi.h"') f.write(f"\n// ---- {lang_names} ----\n\n") render_font_block(data, f, compress_font) f.write(f"\n// ---- {lang_names} ----\n\n") translation_common_text = get_translation_common_text( small_font_symbol_conversion_table, large_font_symbol_conversion_table ) f.write(translation_common_text) f.write( f"const bool HasFahrenheit = {('true' if any([lang.get('tempUnitFahrenheit', True) for lang in data.langs]) else 'false')};\n\n" ) max_decompressed_translation_size = 0 if not strings_obj_path: for lang in data.langs: lang_code = lang["languageCode"] translation_strings_and_indices_text = ( get_translation_strings_and_indices_text( lang, defs, small_font_symbol_conversion_table, large_font_symbol_conversion_table, suffix=f"_{lang_code}", ) ) f.write(translation_strings_and_indices_text) f.write("const LanguageMeta LanguageMetas[] = {\n") for lang in data.langs: lang_code = lang["languageCode"] lang_id = get_language_unqiue_id(lang_code) f.write( " {\n" f" .uniqueID = {lang_id},\n" f" .translation_data = reinterpret_cast(&translation_{lang_code}),\n" f" .translation_size = sizeof(translation_{lang_code}),\n" f" .translation_is_compressed = false,\n" " },\n" ) f.write("};\n") else: for lang in data.langs: lang_code = lang["languageCode"] sym_name = objcopy.cpp_var_to_section_name(f"translation_{lang_code}") strings_bin = objcopy.get_binary_from_obj(strings_obj_path, sym_name) if len(strings_bin) == 0: raise ValueError(f"Output for {sym_name} is empty") max_decompressed_translation_size = max( max_decompressed_translation_size, len(strings_bin) ) compressed = brieflz.compress(strings_bin) logging.info( f"Strings for {lang_code} compressed from {len(strings_bin)} to {len(compressed)} bytes (ratio {len(compressed) / len(strings_bin):.3})" ) write_bytes_as_c_array( f, f"translation_data_brieflz_{lang_code}", compressed ) f.write("const LanguageMeta LanguageMetas[] = {\n") for lang in data.langs: lang_code = lang["languageCode"] lang_id = get_language_unqiue_id(lang_code) f.write( " {\n" f" .uniqueID = {lang_id},\n" f" .translation_data = translation_data_brieflz_{lang_code},\n" f" .translation_size = sizeof(translation_data_brieflz_{lang_code}),\n" f" .translation_is_compressed = true,\n" " },\n" ) f.write("};\n") f.write( "const uint8_t LanguageCount = sizeof(LanguageMetas) / sizeof(LanguageMetas[0]);\n\n" f"alignas(TranslationData) uint8_t translation_data_out_buffer[{max_decompressed_translation_size }];\n" "const uint16_t translation_data_out_buffer_size = sizeof(translation_data_out_buffer);\n\n" ) sanity_checks_text = get_translation_sanity_checks_text(defs) f.write(sanity_checks_text) def get_translation_common_text( small_symbol_conversion_table: Dict[str, bytes], large_symbol_conversion_table: Dict[str, bytes], ) -> str: translation_common_text = "" # Write out firmware constant options constants = get_constants() for x in constants: if x[0].startswith("Small"): translation_common_text += f'const char* {x[0]} = "{convert_string(small_symbol_conversion_table, x[1])}";//{x[1]} \n' elif x[0].startswith("Large"): str = x[1] translation_common_text += f'const char* {x[0]} = "{convert_string(large_symbol_conversion_table, str)}";//{x[1]} \n' else: raise ValueError(f"Constant {x} is not size encoded") translation_common_text += "\n" # Debug Menu translation_common_text += "const char* DebugMenu[] = {\n" for c in get_debug_menu(): translation_common_text += ( f'\t "{convert_string(small_symbol_conversion_table, c)}",//"{c}" \n' ) translation_common_text += "};\n\n" # accel names translation_common_text += "const char* AccelTypeNames[] = {\n" for c in get_accel_names_list(): translation_common_text += ( f'\t "{convert_string(small_symbol_conversion_table, c)}",//{c} \n' ) translation_common_text += "};\n\n" # power source types translation_common_text += "const char* PowerSourceNames[] = {\n" for c in get_power_source_list(): translation_common_text += ( f'\t "{convert_string(small_symbol_conversion_table, c)}",//{c} \n' ) translation_common_text += "};\n\n" return translation_common_text @dataclass class TranslationItem: info: str str_index: int def get_translation_strings_and_indices_text( lang: dict, defs: dict, small_font_symbol_conversion_table: Dict[str, bytes], large_font_symbol_conversion_table: Dict[str, bytes], suffix: str = "", ) -> str: # For all strings; we want to convert them to their byte encoded form (using font index lookups) # Then we want to sort by their reversed format to see if we can remove any duplicates by combining the tails (last n bytes;n>0) # Finally we look for any that are contained inside one another, and if they are we update them to point to this # _OR_ we can be lazy and abuse cpu power and just make python search for our substring each time we append byte_encoded_strings: List[bytes] = [] # List of byte arrays of encoded strings byte_encoded_strings_unencoded_reference: List[str] = [] @dataclass class TranslatedStringLocation: byte_encoded_translation_index: int = 0 str_start_offset: int = 0 translated_string_lookups: Dict[str, TranslatedStringLocation] = {} # We do the collapse on the encoded strings; since we are doing different fonts, this avoids needing to track fonts # Also means if things line up nicely for us; we can do it across fonts (rare) def add_encoded_string( unencoded_string: str, encoded_string: bytes, translation_id: str ): for i, byte_data in enumerate(byte_encoded_strings): if byte_data.endswith(encoded_string): logging.info(f"Collapsing {translation_id}") record = TranslatedStringLocation( i, len(byte_data) - len(encoded_string) ) translated_string_lookups[translation_id] = record return byte_encoded_strings.append(encoded_string) byte_encoded_strings_unencoded_reference.append(unencoded_string) record = TranslatedStringLocation(len(byte_encoded_strings) - 1, 0) translated_string_lookups[translation_id] = record def encode_string_and_add( message: str, translation_id: str, force_large_text: bool = False ): encoded_data: bytes if force_large_text is False and test_is_small_font(message): encoded_data = convert_string_bytes( small_font_symbol_conversion_table, message ) else: if force_large_text is False: message = "\n" + message encoded_data = convert_string_bytes( large_font_symbol_conversion_table, message ) add_encoded_string(message, encoded_data, translation_id) for index, record in enumerate(defs["menuOptions"]): lang_data = lang["menuOptions"][record["id"]] # Add to translations the menu text and the description encode_string_and_add( lang_data["description"], "menuOptions" + record["id"] + "description", True ) encode_string_and_add( lang_data["displayText"], "menuOptions" + record["id"] + "displayText" ) for index, record in enumerate(defs["menuValues"]): lang_data = lang["menuValues"][record["id"]] # Add to translations the menu text and the description encode_string_and_add( lang_data["displayText"], "menuValues" + record["id"] + "displayText" ) for index, record in enumerate(defs["menuGroups"]): lang_data = lang["menuGroups"][record["id"]] # Add to translations the menu text and the description encode_string_and_add( lang_data["description"], "menuGroups" + record["id"] + "description", True ) encode_string_and_add( lang_data["displayText"], "menuGroups" + record["id"] + "displayText" ) for index, record in enumerate(defs["messagesWarn"]): lang_data = lang["messagesWarn"][record["id"]] # Add to translations the menu text and the description encode_string_and_add( lang_data["message"], "messagesWarn" + record["id"] + "Message" ) for index, record in enumerate(defs["characters"]): lang_data = lang["characters"][record["id"]] # Add to translations the menu text and the description encode_string_and_add(lang_data, "characters" + record["id"] + "Message", True) # ----- Write the string table: offset = 0 # NOTE: Cannot specify C99 designator here due to GCC (g++) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55227 translation_strings_text = " /* .strings = */ {\n" for i, encoded_bytes in enumerate(byte_encoded_strings): if i > 0: translation_strings_text += ' "\\0"\n' # Write a comment of what it is translation_strings_text += f" // {offset: >4}: {escape(byte_encoded_strings_unencoded_reference[i])}\n" # Write the actual data translation_strings_text += f' "{bytes_to_escaped(encoded_bytes)}"' offset += len(encoded_bytes) + 1 translation_strings_text += "\n }, // .strings\n\n" str_total_bytes = offset ################# Part 2: Emit all the string offsets string_index_commulative_lengths = [] position = 0 for string in byte_encoded_strings: string_index_commulative_lengths.append(position) position += len(string) + 1 translation_indices_text = " .indices = {\n" # Write out the constant strings (ones we reference directly) for _, record in enumerate(defs["messagesWarn"]): # Add to translations the menu text and the description lang_data = lang["messagesWarn"][record["id"]] key = "messagesWarn" + record["id"] + "Message" translated_index = translated_string_lookups[key] string_index = translated_index.byte_encoded_translation_index start_index = ( string_index_commulative_lengths[string_index] + translated_index.str_start_offset ) translation_indices_text += ( f" .{record['id']} = {start_index}, // {escape(lang_data['message'])}\n" ) translation_indices_text += "\n" # Constant short values we use in settings menu for _, record in enumerate(defs["characters"]): # Add to translations the menu text and the description lang_data = lang["characters"][record["id"]] key = "characters" + record["id"] + "Message" translated_index = translated_string_lookups[key] string_index = translated_index.byte_encoded_translation_index start_index = ( string_index_commulative_lengths[string_index] + translated_index.str_start_offset ) translation_indices_text += ( f" .{record['id']} = {start_index}, // {escape(lang_data)}\n" ) for _, record in enumerate(defs["menuValues"]): # Add to translations the menu text and the description lang_data = lang["menuValues"][record["id"]] key = "menuValues" + record["id"] + "displayText" translated_index = translated_string_lookups[key] string_index = translated_index.byte_encoded_translation_index start_index = ( string_index_commulative_lengths[string_index] + translated_index.str_start_offset ) translation_indices_text += ( f" .{record['id']} = {start_index}, // {escape(lang_data)}\n" ) translation_indices_text += "\n" # Now for the fun ones, where they are nested and ordered def write_grouped_indexes(output_text: str, name: str, mainKey: str, subKey: str): max_len = 30 output_text += f" .{name} = {{\n" for index, record in enumerate(defs[mainKey]): lang_data = lang[mainKey][record["id"]] key = mainKey + record["id"] + subKey raw_string = lang_data[subKey] translated_index = translated_string_lookups[key] string_index = translated_index.byte_encoded_translation_index start_index = ( string_index_commulative_lengths[string_index] + translated_index.str_start_offset ) output_text += f" /* {record['id'].ljust(max_len)[:max_len]} */ {start_index}, // {escape(raw_string)}\n" output_text += f" }}, // {name}\n\n" return output_text translation_indices_text = write_grouped_indexes( translation_indices_text, "SettingsDescriptions", "menuOptions", "description" ) translation_indices_text = write_grouped_indexes( translation_indices_text, "SettingsShortNames", "menuOptions", "displayText" ) translation_indices_text = write_grouped_indexes( translation_indices_text, "SettingsMenuEntriesDescriptions", "menuGroups", "description", ) translation_indices_text = write_grouped_indexes( translation_indices_text, "SettingsMenuEntries", "menuGroups", "displayText" ) translation_indices_text += " }, // .indices\n\n" return ( "struct {\n" " TranslationIndexTable indices;\n" f" char strings[{str_total_bytes}];\n" f"}} const translation{suffix} = {{\n" + translation_indices_text + translation_strings_text + f"}}; // translation{suffix}\n\n" ) def get_translation_sanity_checks_text(defs: dict) -> str: sanity_checks_text = "\n// Verify SettingsItemIndex values:\n" for i, mod in enumerate(defs["menuOptions"]): eid = mod["id"] sanity_checks_text += ( f"static_assert(static_cast(SettingsItemIndex::{eid}) == {i});\n" ) sanity_checks_text += f"static_assert(static_cast(SettingsItemIndex::NUM_ITEMS) == {len(defs['menuOptions'])});\n" return sanity_checks_text def get_version_suffix(ver) -> str: # Check env var from push.yml first: # - if it's pull request then use vX.YY + C.ID for version line as in *C*I with proper tag instead of merge tag for detached tree if os.environ.get("GITHUB_CI_PR_SHA", "") != "": return "C" + "." + os.environ["GITHUB_CI_PR_SHA"][:8].upper() # - no github PR SHA ID, hence keep checking suffix = str("") try: # Use commands _hoping_ they won't be too new for one environments nor deprecated for another ones: ## - get commit id; --short=8 - the shorted hash with 8 digits (increase/decrease if needed!) sha_id = f"{subprocess.check_output(['git', 'rev-parse', '--short=8', 'HEAD']).strip().decode('ascii').upper()}" ## - if the exact commit relates to tag, then this command should return one-line tag name: tag = f"{subprocess.check_output(['git', 'tag', '--points-at', '%s' % sha_id]).strip().decode('ascii')}" if ( f"{subprocess.check_output(['git', 'rev-parse', '--symbolic-full-name', '--short', 'HEAD']).strip().decode('ascii')}" == "HEAD" ): return "E" + "." + sha_id else: ## - get short "traditional" branch name (as in `git branch` for that one with asterisk): branch = f"{subprocess.check_output(['git', 'symbolic-ref', '--short', 'HEAD']).strip().decode('ascii')}" if tag and "" != tag: # _Speculate_ on tag that it's Release... if ver == tag: # ... but only if double-check for tag is matched suffix = "R" else: # ... otherwise it's tagged but not a release version! suffix = "T" elif branch and "" != branch: # _Hardcoded_ current main development branch... if "dev" == branch: suffix = "D" # ... or some other branch else: suffix = "B" else: # Something else but from Git suffix = "G" # Attach SHA commit to ID a build since it's from git anyway suffix += "." + sha_id except subprocess.CalledProcessError: # No git tree so _probably_ Homebrew build from source suffix = "H" except OSError: # Something _special_? suffix = "S" if "" == suffix: # Something _very_ special! suffix = "V" return suffix def read_version() -> str: with open(HERE.parent / "source" / "version.h") as version_file: for line in version_file: if re.findall(r"^.*(?<=(#define)).*(?<=(BUILD_VERSION))", line): matches = re.findall(r"\"(.+?)\"", line) if matches: version = matches[0] version += get_version_suffix(version) return version def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument( "--output-pickled", help="Write pickled language data for later reuse", type=argparse.FileType("wb"), required=False, dest="output_pickled", ) parser.add_argument( "--input-pickled", help="Use previously generated pickled language data", type=argparse.FileType("rb"), required=False, dest="input_pickled", ) parser.add_argument( "--strings-obj", help="Use generated TranslationData by extracting from object file", type=argparse.FileType("rb"), required=False, dest="strings_obj", ) parser.add_argument( "--compress-font", help="Compress the font table", action="store_true", required=False, dest="compress_font", ) parser.add_argument( "--macros", help="Extracted macros to filter translation strings by", type=argparse.FileType("r"), required=True, dest="macros", ) parser.add_argument( "--output", "-o", help="Target file", type=argparse.FileType("w"), required=True ) parser.add_argument( "languageCodes", metavar="languageCode", nargs="+", help="Language(s) to generate", ) return parser.parse_args() def main() -> None: json_dir = HERE args = parse_args() if args.input_pickled and args.output_pickled: logging.error("error: Both --output-pickled and --input-pickled are specified") sys.exit(1) macros = ( frozenset(re.findall(r"#define ([^ ]+)", args.macros.read())) if args.macros else frozenset() ) language_data: LanguageData if args.input_pickled: logging.info(f"Reading pickled language data from {args.input_pickled.name}...") language_data = pickle.load(args.input_pickled) language_codes = [lang["languageCode"] for lang in language_data.langs] if language_codes != args.languageCodes: logging.error( f"error: languageCode {args.languageCode} does not match language data {language_codes}" ) sys.exit(1) logging.info(f"Read language data for {language_codes}") logging.info(f"Build version: {language_data.build_version}") else: try: build_version = read_version() except FileNotFoundError: logging.error("error: Could not find version info ") sys.exit(1) logging.info(f"Build version: {build_version}") logging.info(f"Making {args.languageCodes} from {json_dir}") defs_ = load_json(os.path.join(json_dir, "translations_definitions.json")) if len(args.languageCodes) == 1: lang_ = filter_translation( read_translation(json_dir, args.languageCodes[0]), defs_, macros ) language_data = prepare_language(lang_, defs_, build_version) else: langs_ = [ filter_translation(read_translation(json_dir, lang_code), defs_, macros) for lang_code in args.languageCodes ] language_data = prepare_languages(langs_, defs_, build_version) out_ = args.output write_start(out_) if len(language_data.langs) == 1: if args.strings_obj: sym_name = objcopy.cpp_var_to_section_name("translation") strings_bin = objcopy.get_binary_from_obj(args.strings_obj.name, sym_name) if len(strings_bin) == 0: raise ValueError(f"Output for {sym_name} is empty") write_language( language_data, out_, strings_bin=strings_bin, compress_font=args.compress_font, ) else: write_language(language_data, out_, compress_font=args.compress_font) else: if args.strings_obj: write_languages( language_data, out_, strings_obj_path=args.strings_obj.name, compress_font=args.compress_font, ) else: write_languages(language_data, out_, compress_font=args.compress_font) if args.output_pickled: logging.info(f"Writing pickled data to {args.output_pickled.name}") pickle.dump(language_data, args.output_pickled) logging.info("Done") if __name__ == "__main__": main()