diff options
author | alvinhochun <[email protected]> | 2021-04-30 16:51:13 +0800 |
---|---|---|
committer | GitHub <[email protected]> | 2021-04-30 18:51:13 +1000 |
commit | 1a0b542ae6fd1094306ac26f10119ed96641d5f0 (patch) | |
tree | 0620e22836481d07d0971d1d4b8f4708a8beacc0 | |
parent | a51e3e9f0325b17862b3438834a116a8c24a9088 (diff) | |
download | IronOS-2.15-rc1.tar.gz IronOS-2.15-rc1.zip |
[RFC] Multi-language firmware (second try) (#941)v2.15-rc1
* Impl. sectioned font table in firmware
* make_translation.py: Extract build_symbol_conversion_table function
* Put translation indices and strings in a struct
* Move translation objcopy step to Python
* Impl. multi-language firmware demo
* Impl. strings-compressed multi-lang firmware demo
* Add font compression to multi-lang demo
* Refactor Makefile a bit
* Fix rules for make < 4.3
* Add more multi-lang groups
* Add Pinecil multi-lang CI build
* Add lzfx compression license text
* Remote multi-language demo group
* Fix build after merge
* Import code from BriefLZ
* Change brieflz for our use case
* Change compression to use brieflz
* Remove lzfx code
* Update license file for brieflz
* Exclude brieflz files from format check
* Add BriefLZ test
57 files changed, 3449 insertions, 769 deletions
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index ba2af0f4..ca2e4abc 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -48,6 +48,51 @@ jobs: source/Hexfile/LICENSE_RELEASE.md if-no-files-found: error + build_multi-lang: + runs-on: ubuntu-20.04 + strategy: + matrix: + model: ["Pinecil"] + fail-fast: true + + steps: + - uses: actions/checkout@v2 + + - name: chmod + run: chmod +x setup.sh && chmod +x source/build.sh && sudo mkdir -p /build/cache && sudo chmod -R 777 /build + + - name: Cached compiler source files + uses: actions/[email protected] + env: + cache-name: cache-compilers + with: + path: /build/cache + key: ${{ runner.os }}-build-${{ env.cache-name }} + restore-keys: | + ${{ runner.os }}- + + - name: setup + run: ./setup.sh + + - name: build ${{ matrix.model }} + run: cd source && make -j$(nproc) model="${{ matrix.model }}" firmware-multi_compressed_European firmware-multi_compressed_Bulgarian+Russian+Serbian+Ukrainian firmware-multi_Chinese+Japanese + + - name: copy license text + run: | + cp LICENSE source/Hexfile/LICENSE + cp LICENSE_RELEASE.md source/Hexfile/LICENSE_RELEASE.md + + - name: Archive ${{ matrix.model }} artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.model }}_multi-lang + path: | + source/Hexfile/${{ matrix.model }}_*.hex + source/Hexfile/${{ matrix.model }}_*.bin + source/Hexfile/LICENSE + source/Hexfile/LICENSE_RELEASE.md + if-no-files-found: error + tests: runs-on: ubuntu-20.04 steps: @@ -59,6 +104,14 @@ jobs: - name: Run python tests run: cd Translations && chmod +x make_translation_test.py && ./make_translation_test.py + - name: Run BriefLZ tests + run: | + cd source + make Objects/host/brieflz/libbrieflz.so + cd ../Translations + chmod +x brieflz_test.py + ./brieflz_test.py + check_formatting: runs-on: ubuntu-20.04 diff --git a/LICENSE_RELEASE.md b/LICENSE_RELEASE.md index 94e9eebb..b41fcceb 100644 --- a/LICENSE_RELEASE.md +++ b/LICENSE_RELEASE.md @@ -7,6 +7,7 @@ This document outlines the license of IronOS and its dependencies. - NMSIS: Apache-2.0 - GD32VF103 board files: BSD-3-Clause - WenQuanYi Bitmap Song font: GPL-2.0-or-later +- BriefLZ compression library: Zlib The source code of IronOS can be obtained on the [IronOS GitHub repo][gh]. @@ -178,3 +179,34 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ``` + + +BriefLZ compression library +--- + +* Only applies to multi-language builds. + +``` +The zlib License (Zlib) + +Copyright (c) 2002-2020 Joergen Ibsen + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +``` diff --git a/Translations/brieflz.py b/Translations/brieflz.py new file mode 100644 index 00000000..5b3c08cf --- /dev/null +++ b/Translations/brieflz.py @@ -0,0 +1,189 @@ +import ctypes +import functools +import os +from pathlib import Path + +HERE = Path(__file__).resolve().parent + + [email protected]_cache(maxsize=None) +def _libbrieflz(): + so_path = os.path.join(HERE, "../source/Objects/host/brieflz/libbrieflz.so") + libbrieflz = ctypes.cdll.LoadLibrary(so_path) + return libbrieflz + + [email protected]_cache(maxsize=None) +def _fn_blz_max_packed_size(): + """Returns the blz_max_packed_size C function. + :: + + /** + * Get bound on compressed data size. + * + * @see blz_pack + * + * @param src_size number of bytes to compress + * @return maximum size of compressed data + */ + BLZ_API size_t + blz_max_packed_size(size_t src_size); + """ + + fn = _libbrieflz().blz_max_packed_size + fn.argtype = [ + ctypes.c_size_t, + ] + fn.restype = ctypes.c_size_t + return fn + + +def blz_max_packed_size(src_size: int) -> int: + """Get bound on compressed data size.""" + fn_blz_max_packed_size = _fn_blz_max_packed_size() + return int(fn_blz_max_packed_size(src_size)) + + [email protected]_cache(maxsize=None) +def _fn_blz_workmem_size_level(): + """Returns the blz_workmem_size_level C function. + :: + + /** + * Get required size of `workmem` buffer. + * + * @see blz_pack_level + * + * @param src_size number of bytes to compress + * @param level compression level + * @return required size in bytes of `workmem` buffer + */ + BLZ_API size_t + blz_workmem_size_level(size_t src_size, int level); + """ + + fn = _libbrieflz().blz_workmem_size_level + fn.argtype = [ + ctypes.c_size_t, + ctypes.c_int, + ] + fn.restype = ctypes.c_size_t + return fn + + +def blz_workmem_size_level(src_size: int, level: int) -> int: + """Get required size of `workmem` buffer.""" + fn_blz_workmem_size_level = _fn_blz_workmem_size_level() + return int(fn_blz_workmem_size_level(src_size, level)) + + [email protected]_cache(maxsize=None) +def _fn_blz_pack_level(): + """Returns the blz_pack_level C function. + :: + + /** + * Compress `src_size` bytes of data from `src` to `dst`. + * + * Compression levels between 1 and 9 offer a trade-off between + * time/space and ratio. Level 10 is optimal but very slow. + * + * @param src pointer to data + * @param dst pointer to where to place compressed data + * @param src_size number of bytes to compress + * @param workmem pointer to memory for temporary use + * @param level compression level + * @return size of compressed data + */ + BLZ_API unsigned long + blz_pack_level(const void *src, void *dst, unsigned long src_size, + void *workmem, int level); + """ + + fn = _libbrieflz().blz_pack_level + fn.argtype = [ + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_ulong, + ctypes.c_char_p, + ctypes.c_int, + ] + fn.restype = ctypes.c_ulong + return fn + + +def compress(data: bytes) -> bytes: + """Returns a bytes object of the brieflz-compressed data.""" + + fn_blz_pack_level = _fn_blz_pack_level() + + output_buffer_len = blz_max_packed_size(len(data)) + + src = data + dst = ctypes.create_string_buffer(output_buffer_len) + src_size = len(src) + workmem = ctypes.create_string_buffer(blz_workmem_size_level(len(data), 10)) + level = 10 + + res = fn_blz_pack_level(src, dst, src_size, workmem, level) + + if res == 0: + raise BriefLZError() + else: + return bytes(dst[:res]) # type: ignore + + [email protected]_cache(maxsize=None) +def _fn_blz_depack_srcsize(): + """Returns the blz_depack_srcsize C function. + :: + + /** + * Decompress `src_size` bytes of data from `src` to `dst`. + * + * This function is unsafe. If the provided data is malformed, it may + * read more than `src_size` from the `src` buffer. + * + * @param src pointer to compressed data + * @param dst pointer to where to place decompressed data + * @param src_size size of the compressed data + * @return size of decompressed data + */ + BLZ_API unsigned long + blz_depack_srcsize(const void *src, void *dst, unsigned long src_size); + """ + + fn = _libbrieflz().blz_depack_srcsize + fn.argtype = [ + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_ulong, + ] + fn.restype = ctypes.c_ulong + return fn + + +def depack_srcsize(data: bytes, expected_depack_size: int) -> bytes: + """Returns a bytes object of the uncompressed data.""" + + fn_blz_depack_srcsize = _fn_blz_depack_srcsize() + + output_buffer_len = expected_depack_size * 2 + + src = data + dst = ctypes.create_string_buffer(output_buffer_len) + src_size = len(src) + + res = fn_blz_depack_srcsize(src, dst, src_size) + + if res == 0: + raise BriefLZError() + else: + return bytes(dst[:res]) # type: ignore + + +class BriefLZError(Exception): + """Exception raised for brieflz compression or decompression error.""" + + def __init__(self): + pass diff --git a/Translations/brieflz_test.py b/Translations/brieflz_test.py new file mode 100644 index 00000000..ee711091 --- /dev/null +++ b/Translations/brieflz_test.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +import brieflz +import unittest + + +TEST_DATA = ( + b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " + b"Ut consequat mattis orci ac laoreet. Duis ac turpis tempus, varius lacus non, dignissim lectus. " + b"Curabitur quis metus luctus, sollicitudin ipsum at, dictum metus. " + b"Cras sed est nec ex tempor tincidunt in at ante. Vivamus laoreet urna eget lectus euismod feugiat. " + b"Duis a massa ac metus pellentesque interdum. Nunc congue, est faucibus convallis commodo, justo nibh sagittis augue, sed tristique urna neque vitae urna. " + b"Donec quis orci et purus imperdiet sollicitudin." +) + + +class TestBriefLZ(unittest.TestCase): + def test_roundtrip(self): + packed = brieflz.compress(TEST_DATA) + depacked = brieflz.depack_srcsize(packed, len(TEST_DATA)) + self.assertEqual(depacked, TEST_DATA) + + +if __name__ == "__main__": + unittest.main() diff --git a/Translations/lzfx.py b/Translations/lzfx.py deleted file mode 100644 index 9f389673..00000000 --- a/Translations/lzfx.py +++ /dev/null @@ -1,91 +0,0 @@ -import ctypes -import functools -import os -from pathlib import Path - -HERE = Path(__file__).resolve().parent - - [email protected]_cache(maxsize=None) -def _liblzfx(): - so_path = os.path.join(HERE, "../source/Objects/host/lzfx/liblzfx.so") - liblzfx = ctypes.cdll.LoadLibrary(so_path) - return liblzfx - - [email protected]_cache(maxsize=None) -def _fn_lzfx_compress(): - """Returns the lzfx_compress C function. - :: - - /* Buffer-to buffer compression. - - Supply pre-allocated input and output buffers via ibuf and obuf, and - their size in bytes via ilen and olen. Buffers may not overlap. - - On success, the function returns a non-negative value and the argument - olen contains the compressed size in bytes. On failure, a negative - value is returned and olen is not modified. - */ - int lzfx_compress(const void* ibuf, unsigned int ilen, - void* obuf, unsigned int *olen); - """ - - fn = _liblzfx().lzfx_compress - fn.argtype = [ - ctypes.c_char_p, - ctypes.c_uint, - ctypes.c_char_p, - ctypes.POINTER(ctypes.c_uint), - ] - fn.restype = ctypes.c_int - return fn - - -def compress(data: bytes) -> bytes: - """Returns a bytes object of the lzfx-compressed data.""" - - fn_compress = _fn_lzfx_compress() - - output_buffer_len = len(data) + 8 - - ibuf = data - ilen = len(ibuf) - obuf = ctypes.create_string_buffer(output_buffer_len) - olen = ctypes.c_uint(output_buffer_len) - - res = fn_compress(ibuf, ilen, obuf, ctypes.byref(olen)) - - if res < 0: - raise LzfxError(res) - else: - return bytes(obuf[: olen.value]) # type: ignore - - -class LzfxError(Exception): - """Exception raised for lzfx compression or decompression error. - - Attributes: - error_code -- The source error code, which is a negative integer - error_name -- The constant name of the error - message -- explanation of the error - """ - - # define LZFX_ESIZE -1 /* Output buffer too small */ - # define LZFX_ECORRUPT -2 /* Invalid data for decompression */ - # define LZFX_EARGS -3 /* Arguments invalid (NULL) */ - - def __init__(self, error_code): - self.error_code = error_code - if error_code == -1: - self.error_name = "LZFX_ESIZE" - self.message = "Output buffer too small" - elif error_code == -2: - self.error_name = "LZFX_ECORRUPT" - self.message = "Invalid data for decompression" - elif error_code == -3: - self.error_name = "LZFX_EARGS" - self.message = "Arguments invalid (NULL)" - else: - self.error_name = "UNKNOWN" - self.message = "Unknown error" diff --git a/Translations/make_translation.py b/Translations/make_translation.py index 4eb7a3b7..8c6274c2 100755 --- a/Translations/make_translation.py +++ b/Translations/make_translation.py @@ -19,7 +19,8 @@ from bdflib import reader as bdfreader from bdflib.model import Font, Glyph
import font_tables
-import lzfx
+import brieflz
+import objcopy
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
@@ -118,7 +119,9 @@ def get_debug_menu() -> List[str]: ]
-def get_letter_counts(defs: dict, lang: dict, build_version: str) -> List[str]:
+def get_letter_counts(
+ defs: dict, lang: dict, build_version: str
+) -> Tuple[List[str], Dict[str, int]]:
text_list = []
# iterate over all strings
obj = lang["menuOptions"]
@@ -187,10 +190,12 @@ def get_letter_counts(defs: dict, lang: dict, build_version: str) -> List[str]: symbol_counts[letter] = symbol_counts.get(letter, 0) + 1
# swap to Big -> little sort order
symbols_by_occurrence = [
- x[0] for x in sorted(symbol_counts.items(), key=lambda kv: (kv[1], kv[0]))
+ x[0]
+ for x in sorted(
+ symbol_counts.items(), key=lambda kv: (kv[1], kv[0]), reverse=True
+ )
]
- symbols_by_occurrence.reverse()
- return symbols_by_occurrence
+ return symbols_by_occurrence, symbol_counts
def get_cjk_glyph(sym: str) -> bytes:
@@ -383,20 +388,14 @@ def get_font_map_per_font(text_list: List[str], fonts: List[str]) -> FontMapsPer return FontMapsPerFont(font12_maps, font06_maps, sym_lists)
-def get_font_map_and_table(
- text_list: List[str], fonts: List[str]
-) -> Tuple[List[str], FontMap, Dict[str, bytes]]:
- # the text list is sorted
- # allocate out these in their order as number codes
- symbol_map: Dict[str, bytes] = {"\n": bytes([1])}
- index = 2 # start at 2, as 0= null terminator,1 = new line
+def get_forced_first_symbols() -> List[str]:
forced_first_symbols = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
+ return forced_first_symbols
- # We enforce that numbers come first.
- text_list = forced_first_symbols + [
- x for x in text_list if x not in forced_first_symbols
- ]
+def get_sym_list_and_font_map(
+ text_list: List[str], fonts: List[str]
+) -> Tuple[List[str], Dict[str, List[str]], FontMap]:
font_maps = get_font_map_per_font(text_list, fonts)
font12_maps = font_maps.font12_maps
font06_maps = font_maps.font06_maps
@@ -420,29 +419,50 @@ def get_font_map_and_table( sym_list_both_fonts.append(sym)
sym_list = sym_list_both_fonts + sym_list_large_only
+ return sym_list, font_maps.sym_lists, FontMap(font12_map, font06_map)
+
+
+def build_symbol_conversion_map(sym_list: List[str]) -> Dict[str, bytes]:
+ forced_first_symbols = get_forced_first_symbols()
+ if sym_list[: len(forced_first_symbols)] != forced_first_symbols:
+ raise ValueError("Symbol list does not start with forced_first_symbols.")
+
+ # the text list is sorted
+ # allocate out these in their order as number codes
+ symbol_map: Dict[str, bytes] = {"\n": bytes([1])}
+ index = 2 # start at 2, as 0= null terminator,1 = new line
+
# Assign symbol bytes by font index
for index, sym in enumerate(sym_list, index):
assert sym not in symbol_map
symbol_map[sym] = get_bytes_from_font_index(index)
- return sym_list, FontMap(font12_map, font06_map), symbol_map
+ return symbol_map
def make_font_table_cpp(
sym_list: List[str], font_map: FontMap, symbol_map: Dict[str, bytes]
) -> str:
- output_table = make_font_table_12_cpp(sym_list, font_map, symbol_map)
+ output_table = make_font_table_named_cpp(
+ "USER_FONT_12", sym_list, font_map.font12, symbol_map
+ )
output_table += make_font_table_06_cpp(sym_list, font_map, symbol_map)
return output_table
-def make_font_table_12_cpp(
- sym_list: List[str], font_map: FontMap, symbol_map: Dict[str, bytes]
+def make_font_table_named_cpp(
+ name: Optional[str],
+ sym_list: List[str],
+ font_map: Dict[str, bytes],
+ symbol_map: Dict[str, bytes],
) -> str:
- output_table = "const uint8_t USER_FONT_12[] = {\n"
+ output_table = ""
+ if name:
+ output_table = f"const uint8_t {name}[] = {{\n"
for sym in sym_list:
- output_table += f"{bytes_to_c_hex(font_map.font12[sym])}//{bytes_to_escaped(symbol_map[sym])} -> {sym}\n"
- output_table += "};\n"
+ output_table += f"{bytes_to_c_hex(font_map[sym])}//{bytes_to_escaped(symbol_map[sym])} -> {sym}\n"
+ if name:
+ output_table += f"}}; // {name}\n"
return output_table
@@ -495,26 +515,102 @@ def write_bytes_as_c_array( @dataclass
class LanguageData:
- lang: dict
+ langs: List[dict]
defs: dict
build_version: str
sym_list: List[str]
+ sym_lists_by_font: Dict[str, List[str]]
font_map: FontMap
- symbol_conversion_table: Dict[str, bytes]
def prepare_language(lang: dict, defs: dict, build_version: str) -> LanguageData:
language_code: str = lang["languageCode"]
logging.info(f"Preparing language data for {language_code}")
# Iterate over all of the text to build up the symbols & counts
- text_list = get_letter_counts(defs, lang, build_version)
+ text_list, _ = get_letter_counts(defs, lang, build_version)
# From the letter counts, need to make a symbol translator & write out the font
fonts = lang["fonts"]
- sym_list, font_map, symbol_conversion_table = get_font_map_and_table(
- text_list, fonts
+
+ forced_first_symbols = get_forced_first_symbols()
+
+ # We enforce that numbers come first.
+ text_list = forced_first_symbols + [
+ x for x in text_list if x not in forced_first_symbols
+ ]
+
+ sym_list, sym_lists_by_font, font_map = get_sym_list_and_font_map(text_list, fonts)
+ return LanguageData(
+ [lang], defs, build_version, sym_list, sym_lists_by_font, font_map
)
+
+
+def prepare_languages(
+ langs: List[dict], defs: dict, build_version: str
+) -> LanguageData:
+ language_codes: List[str] = [lang["languageCode"] for lang in langs]
+ logging.info(f"Preparing language data for {language_codes}")
+
+ forced_first_symbols = get_forced_first_symbols()
+
+ all_fonts = [
+ font_tables.NAME_ASCII_BASIC,
+ font_tables.NAME_LATIN_EXTENDED,
+ font_tables.NAME_CYRILLIC,
+ font_tables.NAME_CJK,
+ ]
+
+ # Build the full font maps
+ font12_map = {}
+ font06_map = {}
+ # Calculate total symbol counts per font:
+ total_sym_counts: Dict[str, Dict[str, int]] = {}
+ for lang in langs:
+ text_list, sym_counts = get_letter_counts(defs, lang, build_version)
+ fonts = lang["fonts"]
+ text_list = forced_first_symbols + [
+ x for x in text_list if x not in forced_first_symbols
+ ]
+ font_maps = get_font_map_per_font(text_list, fonts)
+ for font in fonts:
+ font12_map.update(font_maps.font12_maps[font])
+ font06_map.update(font_maps.font06_maps[font])
+ for font, font_sym_list in font_maps.sym_lists.items():
+ font_total_sym_counts = total_sym_counts.get(font, {})
+ for sym in font_sym_list:
+ font_total_sym_counts[sym] = font_total_sym_counts.get(
+ sym, 0
+ ) + sym_counts.get(sym, 0)
+ total_sym_counts[font] = font_total_sym_counts
+
+ sym_lists_by_font: Dict[str, List[str]] = {}
+ combined_sym_list = []
+ for font in all_fonts:
+ if font not in total_sym_counts:
+ continue
+ # swap to Big -> little sort order
+ current_sym_list = [
+ x[0]
+ for x in sorted(
+ total_sym_counts[font].items(),
+ key=lambda kv: (kv[1], kv[0]),
+ reverse=True,
+ )
+ ]
+ if font == font_tables.NAME_ASCII_BASIC:
+ # We enforce that numbers come first.
+ current_sym_list = forced_first_symbols + [
+ x for x in current_sym_list if x not in forced_first_symbols
+ ]
+ sym_lists_by_font[font] = current_sym_list
+ combined_sym_list.extend(current_sym_list)
+
return LanguageData(
- lang, defs, build_version, sym_list, font_map, symbol_conversion_table
+ langs,
+ defs,
+ build_version,
+ combined_sym_list,
+ sym_lists_by_font,
+ FontMap(font12_map, font06_map),
)
@@ -524,12 +620,15 @@ def write_language( strings_bin: Optional[bytes] = None,
compress_font: bool = False,
) -> None:
- lang = data.lang
+ if len(data.langs) > 1:
+ raise ValueError("More than 1 languages are provided")
+ lang = data.langs[0]
defs = data.defs
build_version = data.build_version
sym_list = data.sym_list
font_map = data.font_map
- symbol_conversion_table = data.symbol_conversion_table
+
+ symbol_conversion_table = build_symbol_conversion_map(sym_list)
language_code: str = lang["languageCode"]
logging.info(f"Generating block for {language_code}")
@@ -540,7 +639,7 @@ def write_language( lang_name = language_code
if strings_bin or compress_font:
- f.write('#include "lzfx.h"\n')
+ f.write('#include "brieflz.h"\n')
f.write(f"\n// ---- {lang_name} ----\n\n")
@@ -549,19 +648,44 @@ def write_language( sym_list, font_map, symbol_conversion_table
)
f.write(font_table_text)
+ f.write(
+ "const FontSection FontSectionsData[] = {\n"
+ " {\n"
+ " .symbol_start = 2,\n"
+ f" .symbol_end = {len(sym_list) + 2},\n"
+ " .font12_start_ptr = USER_FONT_12,\n"
+ " .font06_start_ptr = USER_FONT_6x8,\n"
+ " },\n"
+ "};\n"
+ "const FontSection *const FontSections = FontSectionsData;\n"
+ "const uint8_t FontSectionsCount = sizeof(FontSectionsData) / sizeof(FontSectionsData[0]);\n"
+ )
else:
font12_uncompressed = bytearray()
for sym in sym_list:
font12_uncompressed.extend(font_map.font12[sym])
- font12_compressed = lzfx.compress(bytes(font12_uncompressed))
+ font12_compressed = brieflz.compress(bytes(font12_uncompressed))
logging.info(
f"Font table 12x16 compressed from {len(font12_uncompressed)} to {len(font12_compressed)} bytes (ratio {len(font12_compressed) / len(font12_uncompressed):.3})"
)
- write_bytes_as_c_array(f, "font_12x16_lzfx", font12_compressed)
+ write_bytes_as_c_array(f, "font_12x16_brieflz", font12_compressed)
font_table_text = make_font_table_06_cpp(
sym_list, font_map, symbol_conversion_table
)
f.write(font_table_text)
+ f.write(
+ f"static uint8_t font_out_buffer[{len(font12_uncompressed)}];\n"
+ "const FontSection FontSectionsData[] = {\n"
+ " {\n"
+ " .symbol_start = 2,\n"
+ f" .symbol_end = {len(sym_list) + 2},\n"
+ " .font12_start_ptr = font_out_buffer,\n"
+ " .font06_start_ptr = USER_FONT_6x8,\n"
+ " },\n"
+ "};\n"
+ "const FontSection *const FontSections = FontSectionsData;\n"
+ "const uint8_t FontSectionsCount = sizeof(FontSectionsData) / sizeof(FontSectionsData[0]);\n"
+ )
f.write(f"\n// ---- {lang_name} ----\n\n")
@@ -573,49 +697,38 @@ def write_language( f"const bool HasFahrenheit = {('true' if lang.get('tempUnitFahrenheit', True) else 'false')};\n\n"
)
- if not compress_font:
- f.write("extern const uint8_t *const Font_12x16 = USER_FONT_12;\n")
- else:
- f.write(
- f"static uint8_t font_out_buffer[{len(font12_uncompressed)}];\n\n"
- "extern const uint8_t *const Font_12x16 = font_out_buffer;\n"
- )
- f.write("extern const uint8_t *const Font_6x8 = USER_FONT_6x8;\n\n")
-
if not strings_bin:
translation_strings_and_indices_text = get_translation_strings_and_indices_text(
lang, defs, symbol_conversion_table
)
f.write(translation_strings_and_indices_text)
f.write(
- "const TranslationIndexTable *const Tr = &TranslationIndices;\n"
- "const char *const TranslationStrings = TranslationStringsData;\n\n"
+ "const TranslationIndexTable *Tr = &translation.indices;\n"
+ "const char *TranslationStrings = translation.strings;\n\n"
)
else:
- compressed = lzfx.compress(strings_bin)
+ compressed = brieflz.compress(strings_bin)
logging.info(
f"Strings compressed from {len(strings_bin)} to {len(compressed)} bytes (ratio {len(compressed) / len(strings_bin):.3})"
)
- write_bytes_as_c_array(f, "translation_data_lzfx", compressed)
+ write_bytes_as_c_array(f, "translation_data_brieflz", compressed)
f.write(
f"static uint8_t translation_data_out_buffer[{len(strings_bin)}] __attribute__((__aligned__(2)));\n\n"
- "const TranslationIndexTable *const Tr = reinterpret_cast<const TranslationIndexTable *>(translation_data_out_buffer);\n"
- "const char *const TranslationStrings = reinterpret_cast<const char *>(translation_data_out_buffer) + sizeof(TranslationIndexTable);\n\n"
+ "const TranslationIndexTable *Tr = reinterpret_cast<const TranslationIndexTable *>(translation_data_out_buffer);\n"
+ "const char *TranslationStrings = reinterpret_cast<const char *>(translation_data_out_buffer) + sizeof(TranslationIndexTable);\n\n"
)
if not strings_bin and not compress_font:
f.write("void prepareTranslations() {}\n\n")
else:
- f.write("void prepareTranslations() {\n" " unsigned int outsize;\n")
+ f.write("void prepareTranslations() {\n")
if compress_font:
f.write(
- " outsize = sizeof(font_out_buffer);\n"
- " lzfx_decompress(font_12x16_lzfx, sizeof(font_12x16_lzfx), font_out_buffer, &outsize);\n"
+ " blz_depack_srcsize(font_12x16_brieflz, font_out_buffer, sizeof(font_12x16_brieflz));\n"
)
if strings_bin:
f.write(
- " outsize = sizeof(translation_data_out_buffer);\n"
- " lzfx_decompress(translation_data_lzfx, sizeof(translation_data_lzfx), translation_data_out_buffer, &outsize);\n"
+ " blz_depack_srcsize(translation_data_brieflz, translation_data_out_buffer, sizeof(translation_data_brieflz));\n"
)
f.write("}\n\n")
@@ -623,6 +736,188 @@ def write_language( f.write(sanity_checks_text)
+def write_languages(
+ data: LanguageData,
+ f: TextIO,
+ strings_obj_path: Optional[str] = None,
+ compress_font: bool = False,
+) -> None:
+ defs = data.defs
+ build_version = data.build_version
+ combined_sym_list = data.sym_list
+ sym_lists_by_font = data.sym_lists_by_font
+ font_map = data.font_map
+
+ symbol_conversion_table = build_symbol_conversion_map(combined_sym_list)
+
+ language_codes: List[str] = [lang["languageCode"] for lang in data.langs]
+ logging.info(f"Generating block for {language_codes}")
+
+ lang_names = [
+ lang.get("languageLocalName", lang["languageCode"]) for lang in data.langs
+ ]
+
+ f.write('#include "Translation_multi.h"')
+
+ f.write(f"\n// ---- {lang_names} ----\n\n")
+
+ max_decompressed_font_size = 0
+ if not compress_font:
+ font_table_text = ""
+ font_section_info_text = (
+ "const FontSectionDataInfo FontSectionDataInfos[] = {\n"
+ )
+ for font, current_sym_list in sym_lists_by_font.items():
+ font_table_text += f"const uint8_t font_table_data_{font}[] = {{\n"
+ font_table_text += "// 12x16:\n"
+ font_table_text += make_font_table_named_cpp(
+ None,
+ current_sym_list,
+ font_map.font12,
+ symbol_conversion_table,
+ )
+ if font != font_tables.NAME_CJK:
+ font_table_text += "// 6x8:\n"
+ font_table_text += make_font_table_named_cpp(
+ None,
+ current_sym_list,
+ font_map.font06, # type: ignore[arg-type]
+ symbol_conversion_table,
+ )
+ font_table_text += f"}}; // font_table_data_{font}\n"
+ current_sym_start = combined_sym_list.index(current_sym_list[0]) + 2
+ font_section_info_text += (
+ " {\n"
+ f" .symbol_start = {current_sym_start},\n"
+ f" .symbol_count = {len(current_sym_list)},\n"
+ f" .data_size = sizeof(font_table_data_{font}),\n"
+ " .data_is_compressed = false,\n"
+ f" .data_ptr = font_table_data_{font},\n"
+ " },\n"
+ )
+
+ f.write(font_table_text)
+ font_section_info_text += (
+ "};\n"
+ "const uint8_t FontSectionDataCount = sizeof(FontSectionDataInfos) / sizeof(FontSectionDataInfos[0]);\n\n"
+ )
+ f.write(font_section_info_text)
+ f.write(
+ "FontSection DynamicFontSections[4] = {};\n"
+ "const FontSection *const FontSections = DynamicFontSections;\n"
+ "const uint8_t FontSectionsCount = sizeof(DynamicFontSections) / sizeof(DynamicFontSections[0]);\n"
+ )
+ else:
+ font_section_info_text = (
+ "const FontSectionDataInfo FontSectionDataInfos[] = {\n"
+ )
+ for font, current_sym_list in sym_lists_by_font.items():
+ current_sym_start = combined_sym_list.index(current_sym_list[0]) + 2
+ font_uncompressed = bytearray()
+ for sym in current_sym_list:
+ font_uncompressed.extend(font_map.font12[sym])
+ if font != font_tables.NAME_CJK:
+ for sym in current_sym_list:
+ font_uncompressed.extend(font_map.font06[sym]) # type: ignore[arg-type]
+ font_compressed = brieflz.compress(bytes(font_uncompressed))
+ logging.info(
+ f"Font table for {font} compressed from {len(font_uncompressed)} to {len(font_compressed)} bytes (ratio {len(font_compressed) / len(font_uncompressed):.3})"
+ )
+ max_decompressed_font_size += len(font_uncompressed)
+ write_bytes_as_c_array(f, f"font_data_brieflz_{font}", font_compressed)
+ font_section_info_text += (
+ " {\n"
+ f" .symbol_start = {current_sym_start},\n"
+ f" .symbol_count = {len(current_sym_list)},\n"
+ f" .data_size = sizeof(font_data_brieflz_{font}),\n"
+ " .data_is_compressed = true,\n"
+ f" .data_ptr = font_data_brieflz_{font},\n"
+ " },\n"
+ )
+ font_section_info_text += (
+ "};\n"
+ "const uint8_t FontSectionDataCount = sizeof(FontSectionDataInfos) / sizeof(FontSectionDataInfos[0]);\n\n"
+ )
+ f.write(font_section_info_text)
+ f.write(
+ "FontSection DynamicFontSections[4] = {};\n"
+ "const FontSection *const FontSections = DynamicFontSections;\n"
+ "const uint8_t FontSectionsCount = sizeof(DynamicFontSections) / sizeof(DynamicFontSections[0]);\n"
+ )
+
+ f.write(f"\n// ---- {lang_names} ----\n\n")
+
+ translation_common_text = get_translation_common_text(
+ defs, symbol_conversion_table, build_version
+ )
+ f.write(translation_common_text)
+ f.write(
+ f"const bool HasFahrenheit = {('true' if any([lang.get('tempUnitFahrenheit', True) for lang in data.langs]) else 'false')};\n\n"
+ )
+
+ max_decompressed_translation_size = 0
+ if not strings_obj_path:
+ for lang in data.langs:
+ lang_code = lang["languageCode"]
+ translation_strings_and_indices_text = (
+ get_translation_strings_and_indices_text(
+ lang, defs, symbol_conversion_table, suffix=f"_{lang_code}"
+ )
+ )
+ f.write(translation_strings_and_indices_text)
+ f.write("const LanguageMeta LanguageMetas[] = {\n")
+ for lang in data.langs:
+ lang_code = lang["languageCode"]
+ f.write(
+ " {\n"
+ # NOTE: Cannot specify C99 designator here due to GCC (g++) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55227
+ f' /* .code = */ "{lang_code}",\n'
+ f" .translation_data = reinterpret_cast<const uint8_t *>(&translation_{lang_code}),\n"
+ f" .translation_size = sizeof(translation_{lang_code}),\n"
+ f" .translation_is_compressed = false,\n"
+ " },\n"
+ )
+ f.write("};\n")
+ else:
+ for lang in data.langs:
+ lang_code = lang["languageCode"]
+ sym_name = objcopy.cpp_var_to_section_name(f"translation_{lang_code}")
+ strings_bin = objcopy.get_binary_from_obj(strings_obj_path, sym_name)
+ if len(strings_bin) == 0:
+ raise ValueError(f"Output for {sym_name} is empty")
+ max_decompressed_translation_size = max(
+ max_decompressed_translation_size, len(strings_bin)
+ )
+ compressed = brieflz.compress(strings_bin)
+ logging.info(
+ f"Strings for {lang_code} compressed from {len(strings_bin)} to {len(compressed)} bytes (ratio {len(compressed) / len(strings_bin):.3})"
+ )
+ write_bytes_as_c_array(
+ f, f"translation_data_brieflz_{lang_code}", compressed
+ )
+ f.write("const LanguageMeta LanguageMetas[] = {\n")
+ for lang in data.langs:
+ lang_code = lang["languageCode"]
+ f.write(
+ " {\n"
+ # NOTE: Cannot specify C99 designator here due to GCC (g++) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55227
+ f' /* .code = */ "{lang_code}",\n'
+ f" .translation_data = translation_data_brieflz_{lang_code},\n"
+ f" .translation_size = sizeof(translation_data_brieflz_{lang_code}),\n"
+ f" .translation_is_compressed = true,\n"
+ " },\n"
+ )
+ f.write("};\n")
+ f.write(
+ "const uint8_t LanguageCount = sizeof(LanguageMetas) / sizeof(LanguageMetas[0]);\n\n"
+ f"alignas(TranslationData) uint8_t translation_data_out_buffer[{max_decompressed_translation_size + max_decompressed_font_size}];\n"
+ "const uint16_t translation_data_out_buffer_size = sizeof(translation_data_out_buffer);\n\n"
+ )
+
+ sanity_checks_text = get_translation_sanity_checks_text(defs)
+ f.write(sanity_checks_text)
+
+
def get_translation_common_text(
defs: dict, symbol_conversion_table: Dict[str, bytes], build_version
) -> str:
@@ -652,7 +947,7 @@ class TranslationItem: def get_translation_strings_and_indices_text(
- lang: dict, defs: dict, symbol_conversion_table: Dict[str, bytes]
+ lang: dict, defs: dict, symbol_conversion_table: Dict[str, bytes], suffix: str = ""
) -> str:
str_table: List[str] = []
str_group_messages: List[TranslationItem] = []
@@ -780,6 +1075,8 @@ def get_translation_strings_and_indices_text( j = i
while backward_sorted_table[j + 1][2].startswith(converted):
j += 1
+ if j + 1 == len(backward_sorted_table):
+ break
if j != i:
str_remapping[str_index] = RemappedTranslationItem(
str_index=backward_sorted_table[j][0],
@@ -790,7 +1087,8 @@ def get_translation_strings_and_indices_text( str_offsets = [-1] * len(str_table)
offset = 0
write_null = False
- translation_strings_text = "const char TranslationStringsData[] = {\n"
+ # NOTE: Cannot specify C99 designator here due to GCC (g++) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55227
+ translation_strings_text = " /* .strings = */ {\n"
for i, source_str in enumerate(str_table):
if str_remapping[i] is not None:
continue
@@ -814,33 +1112,37 @@ def get_translation_strings_and_indices_text( for item in group:
if item.str_index == j:
translation_strings_text += (
- f" // - {pre_info} {item.info}\n"
+ f" // - {pre_info} {item.info}\n"
)
if j == i:
- translation_strings_text += f" // {offset: >4}: {escape(source_str)}\n"
+ translation_strings_text += (
+ f" // {offset: >4}: {escape(source_str)}\n"
+ )
str_offsets[j] = offset
else:
remapped = str_remapping[j]
assert remapped is not None
- translation_strings_text += f" // {offset + remapped.str_start_offset: >4}: {escape(str_table[j])}\n"
+ translation_strings_text += f" // {offset + remapped.str_start_offset: >4}: {escape(str_table[j])}\n"
str_offsets[j] = offset + remapped.str_start_offset
converted_bytes = convert_string_bytes(symbol_conversion_table, source_str)
- translation_strings_text += f' "{bytes_to_escaped(converted_bytes)}"'
+ translation_strings_text += f' "{bytes_to_escaped(converted_bytes)}"'
str_offsets[i] = offset
# Add the length and the null terminator
offset += len(converted_bytes) + 1
- translation_strings_text += "\n}; // TranslationStringsData\n\n"
+ translation_strings_text += "\n }, // .strings\n\n"
+
+ str_total_bytes = offset
def get_offset(idx: int) -> int:
assert str_offsets[idx] >= 0
return str_offsets[idx]
- translation_indices_text = "const TranslationIndexTable TranslationIndices = {\n"
+ translation_indices_text = " .indices = {\n"
# ----- Write the messages string indices:
for group in [str_group_messages, str_group_messageswarn, str_group_characters]:
for item in group:
- translation_indices_text += f" .{item.info} = {get_offset(item.str_index)}, // {escape(str_table[item.str_index])}\n"
+ translation_indices_text += f" .{item.info} = {get_offset(item.str_index)}, // {escape(str_table[item.str_index])}\n"
translation_indices_text += "\n"
# ----- Write the settings index tables:
@@ -851,14 +1153,22 @@ def get_translation_strings_and_indices_text( (str_group_settingmenuentriesdesc, "SettingsMenuEntriesDescriptions"),
]:
max_len = 30
- translation_indices_text += f" .{name} = {{\n"
+ translation_indices_text += f" .{name} = {{\n"
for item in group:
- translation_indices_text += f" /* {item.info.ljust(max_len)[:max_len]} */ {get_offset(item.str_index)}, // {escape(str_table[item.str_index])}\n"
- translation_indices_text += f" }}, // {name}\n\n"
-
- translation_indices_text += "}; // TranslationIndices\n\n"
-
- return translation_strings_text + translation_indices_text
+ translation_indices_text += f" /* {item.info.ljust(max_len)[:max_len]} */ {get_offset(item.str_index)}, // {escape(str_table[item.str_index])}\n"
+ translation_indices_text += f" }}, // {name}\n\n"
+
+ translation_indices_text += " }, // .indices\n\n"
+
+ return (
+ "struct {\n"
+ " TranslationIndexTable indices;\n"
+ f" char strings[{str_total_bytes}];\n"
+ f"}} const translation{suffix} = {{\n"
+ + translation_indices_text
+ + translation_strings_text
+ + f"}}; // translation{suffix}\n\n"
+ )
def get_translation_sanity_checks_text(defs: dict) -> str:
@@ -904,11 +1214,11 @@ def parse_args() -> argparse.Namespace: dest="input_pickled",
)
parser.add_argument(
- "--strings-bin",
- help="Use generated TranslationIndices + TranslationStrings data and compress them",
+ "--strings-obj",
+ help="Use generated TranslationData by extracting from object file",
type=argparse.FileType("rb"),
required=False,
- dest="strings_bin",
+ dest="strings_obj",
)
parser.add_argument(
"--compress-font",
@@ -920,7 +1230,12 @@ def parse_args() -> argparse.Namespace: parser.add_argument(
"--output", "-o", help="Target file", type=argparse.FileType("w"), required=True
)
- parser.add_argument("languageCode", help="Language to generate")
+ parser.add_argument(
+ "languageCodes",
+ metavar="languageCode",
+ nargs="+",
+ help="Language(s) to generate",
+ )
return parser.parse_args()
@@ -936,12 +1251,13 @@ def main() -> None: if args.input_pickled:
logging.info(f"Reading pickled language data from {args.input_pickled.name}...")
language_data = pickle.load(args.input_pickled)
- if language_data.lang["languageCode"] != args.languageCode:
+ language_codes = [lang["languageCode"] for lang in language_data.langs]
+ if language_codes != args.languageCodes:
logging.error(
- f"error: languageCode {args.languageCode} does not match language data {language_data.lang['languageCode']}"
+ f"error: languageCode {args.languageCode} does not match language data {language_codes}"
)
sys.exit(1)
- logging.info(f"Read language data for {language_data.lang['languageCode']}")
+ logging.info(f"Read language data for {language_codes}")
logging.info(f"Build version: {language_data.build_version}")
else:
try:
@@ -951,23 +1267,45 @@ def main() -> None: sys.exit(1)
logging.info(f"Build version: {build_version}")
- logging.info(f"Making {args.languageCode} from {json_dir}")
+ logging.info(f"Making {args.languageCodes} from {json_dir}")
- lang_ = read_translation(json_dir, args.languageCode)
defs_ = load_json(os.path.join(json_dir, "translations_def.js"), True)
- language_data = prepare_language(lang_, defs_, build_version)
+ if len(args.languageCodes) == 1:
+ lang_ = read_translation(json_dir, args.languageCodes[0])
+ language_data = prepare_language(lang_, defs_, build_version)
+ else:
+ langs_ = [
+ read_translation(json_dir, lang_code)
+ for lang_code in args.languageCodes
+ ]
+ language_data = prepare_languages(langs_, defs_, build_version)
out_ = args.output
write_start(out_)
- if args.strings_bin:
- write_language(
- language_data,
- out_,
- args.strings_bin.read(),
- compress_font=args.compress_font,
- )
+ if len(language_data.langs) == 1:
+ if args.strings_obj:
+ sym_name = objcopy.cpp_var_to_section_name("translation")
+ strings_bin = objcopy.get_binary_from_obj(args.strings_obj.name, sym_name)
+ if len(strings_bin) == 0:
+ raise ValueError(f"Output for {sym_name} is empty")
+ write_language(
+ language_data,
+ out_,
+ strings_bin=strings_bin,
+ compress_font=args.compress_font,
+ )
+ else:
+ write_language(language_data, out_, compress_font=args.compress_font)
else:
- write_language(language_data, out_, compress_font=args.compress_font)
+ if args.strings_obj:
+ write_languages(
+ language_data,
+ out_,
+ strings_obj_path=args.strings_obj.name,
+ compress_font=args.compress_font,
+ )
+ else:
+ write_languages(language_data, out_, compress_font=args.compress_font)
if args.output_pickled:
logging.info(f"Writing pickled data to {args.output_pickled.name}")
diff --git a/Translations/objcopy.py b/Translations/objcopy.py new file mode 100644 index 00000000..be0cc409 --- /dev/null +++ b/Translations/objcopy.py @@ -0,0 +1,25 @@ +import os +import subprocess +import tempfile + + +if "OBJCOPY" in os.environ: + OBJCOPY = os.environ["OBJCOPY"] +else: + OBJCOPY = "objcopy" + + +def get_binary_from_obj(objfile_path: str, section_name: str) -> bytes: + tmpfd, tmpfile = tempfile.mkstemp() + result = subprocess.run( + [OBJCOPY, "-O", "binary", "-j", section_name, objfile_path, tmpfile] + ) + result.check_returncode() + with open(tmpfd, "rb") as f: + bin: bytes = f.read() + os.remove(tmpfile) + return bin + + +def cpp_var_to_section_name(var_name: str) -> str: + return f".rodata._ZL{len(var_name)}{var_name}" diff --git a/Translations/translation_BG.json b/Translations/translation_BG.json index 5c285352..0d746668 100644 --- a/Translations/translation_BG.json +++ b/Translations/translation_BG.json @@ -308,6 +308,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " BG Български" + ], + "desc": "" } } } diff --git a/Translations/translation_CS.json b/Translations/translation_CS.json index 645994f8..56811025 100644 --- a/Translations/translation_CS.json +++ b/Translations/translation_CS.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " CS Český" + ], + "desc": "" } } } diff --git a/Translations/translation_DA.json b/Translations/translation_DA.json index 1541e74b..88c9ec3a 100644 --- a/Translations/translation_DA.json +++ b/Translations/translation_DA.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " DA Dansk" + ], + "desc": "" } } } diff --git a/Translations/translation_DE.json b/Translations/translation_DE.json index efd46499..703e2b16 100644 --- a/Translations/translation_DE.json +++ b/Translations/translation_DE.json @@ -306,6 +306,13 @@ "Dauer" ], "desc": "Dauer des Wachhalteimpulses (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " DE Deutsch" + ], + "desc": "" } } } diff --git a/Translations/translation_EN.json b/Translations/translation_EN.json index d5b67ae4..a17d886c 100644 --- a/Translations/translation_EN.json +++ b/Translations/translation_EN.json @@ -305,6 +305,13 @@ "duration"
],
"desc": "Keep-awake-pulse duration (x 250ms)"
+ },
+ "LanguageSwitch": {
+ "text2": [
+ "Language:",
+ " EN English"
+ ],
+ "desc": ""
}
}
}
diff --git a/Translations/translation_ES.json b/Translations/translation_ES.json index 90afeaa0..69f65d2e 100644 --- a/Translations/translation_ES.json +++ b/Translations/translation_ES.json @@ -306,6 +306,13 @@ "duración" ], "desc": "Duración del impulso de mantenimiento de la vigilia (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " ES Castellano" + ], + "desc": "" } } } diff --git a/Translations/translation_FI.json b/Translations/translation_FI.json index f4ebef8f..5d67d61d 100644 --- a/Translations/translation_FI.json +++ b/Translations/translation_FI.json @@ -302,6 +302,13 @@ "kesto" ], "desc": "Herätyspulssin kesto (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " FI Suomi" + ], + "desc": "" } } } diff --git a/Translations/translation_FR.json b/Translations/translation_FR.json index 1a1cf1f7..fef83c9c 100644 --- a/Translations/translation_FR.json +++ b/Translations/translation_FR.json @@ -305,6 +305,13 @@ "impulsions" ], "desc": "Durée des impulsions pour empêcher la mise en veille (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " FR Français" + ], + "desc": "" } } } diff --git a/Translations/translation_HR.json b/Translations/translation_HR.json index a488a994..a6e1deaa 100644 --- a/Translations/translation_HR.json +++ b/Translations/translation_HR.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " HR Hrvatski" + ], + "desc": "" } } } diff --git a/Translations/translation_HU.json b/Translations/translation_HU.json index 13d2e0ed..427a5c54 100644 --- a/Translations/translation_HU.json +++ b/Translations/translation_HU.json @@ -308,6 +308,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " HU Magyar" + ], + "desc": "" } } } diff --git a/Translations/translation_IT.json b/Translations/translation_IT.json index dfcebc61..45d9cc65 100644 --- a/Translations/translation_IT.json +++ b/Translations/translation_IT.json @@ -305,6 +305,13 @@ "impulso" ], "desc": "Regola la durata dell'«impulso sveglia» [multipli di 250 ms]" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " IT Italiano" + ], + "desc": "" } } } diff --git a/Translations/translation_JA_JP.json b/Translations/translation_JA_JP.json index 20bde0dd..3dc612ce 100644 --- a/Translations/translation_JA_JP.json +++ b/Translations/translation_JA_JP.json @@ -201,6 +201,10 @@ "PowerPulseDuration": { "text2": "パルス時間長", "desc": "電源供給元をオンに保つために使用される、電力パルスの時間長 <x250ms(ミリ秒)>" + }, + "LanguageSwitch": { + "text2": "言語: 日本語", + "desc": "" } } } diff --git a/Translations/translation_LT.json b/Translations/translation_LT.json index aba3520d..8eda6656 100644 --- a/Translations/translation_LT.json +++ b/Translations/translation_LT.json @@ -308,6 +308,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " LT Lietuvių" + ], + "desc": "" } } } diff --git a/Translations/translation_NL.json b/Translations/translation_NL.json index 5d4d5f6a..85060eab 100644 --- a/Translations/translation_NL.json +++ b/Translations/translation_NL.json @@ -314,6 +314,13 @@ "duration"
],
"desc": "Keep-awake-pulse duration (x 250ms)"
+ },
+ "LanguageSwitch": {
+ "text2": [
+ "Language:",
+ " NL Nederlands"
+ ],
+ "desc": ""
}
}
}
diff --git a/Translations/translation_NL_BE.json b/Translations/translation_NL_BE.json index 00ae61bf..9aafcead 100644 --- a/Translations/translation_NL_BE.json +++ b/Translations/translation_NL_BE.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " NL_BE Vlaams" + ], + "desc": "" } } } diff --git a/Translations/translation_NO.json b/Translations/translation_NO.json index 90401145..c126f69b 100644 --- a/Translations/translation_NO.json +++ b/Translations/translation_NO.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " NO Norsk" + ], + "desc": "" } } } diff --git a/Translations/translation_PL.json b/Translations/translation_PL.json index 4159e226..c9af631a 100644 --- a/Translations/translation_PL.json +++ b/Translations/translation_PL.json @@ -306,6 +306,13 @@ "impulsu mocy" ], "desc": "Długość impulsu mocy zapobiegającego usypianiu powerbanku (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " PL Polski" + ], + "desc": "" } } } diff --git a/Translations/translation_PT.json b/Translations/translation_PT.json index 06b51d37..7bbcc7ea 100644 --- a/Translations/translation_PT.json +++ b/Translations/translation_PT.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " PT Português" + ], + "desc": "" } } } diff --git a/Translations/translation_RU.json b/Translations/translation_RU.json index b3de0c81..1827e672 100644 --- a/Translations/translation_RU.json +++ b/Translations/translation_RU.json @@ -306,6 +306,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " RU Русский" + ], + "desc": "" } } } diff --git a/Translations/translation_SK.json b/Translations/translation_SK.json index 56e9e19d..05a1624c 100644 --- a/Translations/translation_SK.json +++ b/Translations/translation_SK.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SK Slovenčina" + ], + "desc": "" } } } diff --git a/Translations/translation_SL.json b/Translations/translation_SL.json index 622fb026..c964b03a 100644 --- a/Translations/translation_SL.json +++ b/Translations/translation_SL.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SL Slovenščina" + ], + "desc": "" } } } diff --git a/Translations/translation_SR_CYRL.json b/Translations/translation_SR_CYRL.json index 4c0eb951..84fe1352 100644 --- a/Translations/translation_SR_CYRL.json +++ b/Translations/translation_SR_CYRL.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SR Српски" + ], + "desc": "" } } } diff --git a/Translations/translation_SR_LATN.json b/Translations/translation_SR_LATN.json index d5192fe7..07cf7856 100644 --- a/Translations/translation_SR_LATN.json +++ b/Translations/translation_SR_LATN.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SR Srpski" + ], + "desc": "" } } } diff --git a/Translations/translation_SV.json b/Translations/translation_SV.json index b30449c2..cd66de4c 100644 --- a/Translations/translation_SV.json +++ b/Translations/translation_SV.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SV Svenska" + ], + "desc": "" } } } diff --git a/Translations/translation_TR.json b/Translations/translation_TR.json index 50c3a01f..d95961d7 100644 --- a/Translations/translation_TR.json +++ b/Translations/translation_TR.json @@ -329,6 +329,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " TR Türkçe" + ], + "desc": "" } } } diff --git a/Translations/translation_UK.json b/Translations/translation_UK.json index fbd44a3a..5ec0021b 100644 --- a/Translations/translation_UK.json +++ b/Translations/translation_UK.json @@ -306,6 +306,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " UK Українська" + ], + "desc": "" } } } diff --git a/Translations/translation_YUE_HK.json b/Translations/translation_YUE_HK.json index 19af3103..0bfd24e0 100644 --- a/Translations/translation_YUE_HK.json +++ b/Translations/translation_YUE_HK.json @@ -201,6 +201,10 @@ "PowerPulseDuration": {
"text2": "電源脈衝時長",
"desc": "為保持電源喚醒,每次通電脈衝嘅時間長度 <x250ms(亳秒)>"
+ },
+ "LanguageSwitch": {
+ "text2": "語言: 廣東話",
+ "desc": ""
}
}
}
diff --git a/Translations/translation_ZH_CN.json b/Translations/translation_ZH_CN.json index 5ef42239..7a024672 100644 --- a/Translations/translation_ZH_CN.json +++ b/Translations/translation_ZH_CN.json @@ -201,6 +201,10 @@ "PowerPulseDuration": {
"text2": "电源脉冲时长",
"desc": "为保持电源唤醒,每次通电脉冲的时间长度 <x250ms(亳秒)>"
+ },
+ "LanguageSwitch": {
+ "text2": "语言:简体中文",
+ "desc": ""
}
}
}
diff --git a/Translations/translation_ZH_TW.json b/Translations/translation_ZH_TW.json index 5d9aed5f..84f81437 100644 --- a/Translations/translation_ZH_TW.json +++ b/Translations/translation_ZH_TW.json @@ -201,6 +201,10 @@ "PowerPulseDuration": {
"text2": "電源脈衝時長",
"desc": "為保持電源喚醒,每次通電脈衝的時間長度 <x250ms(亳秒)>"
+ },
+ "LanguageSwitch": {
+ "text2": "語言:正體中文",
+ "desc": ""
}
}
}
diff --git a/Translations/translations_def.js b/Translations/translations_def.js index 2bbdb9d0..6d0f206e 100644 --- a/Translations/translations_def.js +++ b/Translations/translations_def.js @@ -339,6 +339,11 @@ var def = "id": "PowerPulseDuration",
"maxLen": 6,
"maxLen2": 13
+ },
+ {
+ "id": "LanguageSwitch",
+ "maxLen": 7,
+ "maxLen2": 15
}
]
}
diff --git a/source/Core/BSP/Pine64/Vendor/SoC/gd32vf103/Board/pinecil/Source/GCC/gcc_gd32vf103_flashxip.ld b/source/Core/BSP/Pine64/Vendor/SoC/gd32vf103/Board/pinecil/Source/GCC/gcc_gd32vf103_flashxip.ld index 819ae05f..710bbdd8 100644 --- a/source/Core/BSP/Pine64/Vendor/SoC/gd32vf103/Board/pinecil/Source/GCC/gcc_gd32vf103_flashxip.ld +++ b/source/Core/BSP/Pine64/Vendor/SoC/gd32vf103/Board/pinecil/Source/GCC/gcc_gd32vf103_flashxip.ld @@ -50,7 +50,7 @@ __ILM_RAM_SIZE = 0x00010000; * </h> */ __RAM_BASE = 0x20000000; -__RAM_SIZE = 0x00005000; +__RAM_SIZE = 0x00006800; /********************* Stack / Heap Configuration **************************** * <h> Stack / Heap Configuration diff --git a/source/Core/Drivers/OLED.cpp b/source/Core/Drivers/OLED.cpp index f1e057bc..0555f838 100644 --- a/source/Core/Drivers/OLED.cpp +++ b/source/Core/Drivers/OLED.cpp @@ -113,35 +113,49 @@ void OLED::setFramebuffer(uint8_t *buffer) { * Precursor is the command char that is used to select the table. */ void OLED::drawChar(const uint16_t charCode, const FontStyle fontStyle) { + const uint8_t *currentFont; static uint8_t fontWidth, fontHeight; + uint16_t index; switch (fontStyle) { - case FontStyle::SMALL: - currentFont = Font_6x8; - fontHeight = 8; - fontWidth = 6; - break; case FontStyle::EXTRAS: currentFont = ExtraFontChars; + index = charCode; fontHeight = 16; fontWidth = 12; break; + case FontStyle::SMALL: case FontStyle::LARGE: default: - currentFont = Font_12x16; - fontHeight = 16; - fontWidth = 12; + if (charCode == '\x01' && cursor_y == 0) { // 0x01 is used as new line char + setCursor(0, 8); + return; + } else if (charCode <= 0x01) { + return; + } + currentFont = nullptr; + index = 0; + switch (fontStyle) { + case FontStyle::SMALL: + fontHeight = 8; + fontWidth = 6; + break; + case FontStyle::LARGE: + default: + fontHeight = 16; + fontWidth = 12; + break; + } + for (uint32_t i = 0; i < FontSectionsCount; i++) { + const auto §ion = FontSections[i]; + if (charCode >= section.symbol_start && charCode < section.symbol_end) { + currentFont = fontStyle == FontStyle::SMALL ? section.font06_start_ptr : section.font12_start_ptr; + index = charCode - section.symbol_start; + break; + } + } break; } - - if (charCode == '\x01' && cursor_y == 0) { // 0x01 is used as new line char - setCursor(0, 8); - return; - } else if (charCode <= 0x01) { - return; - } - // First index is \x02 - const uint16_t index = charCode - 2; const uint8_t *charPointer = currentFont + ((fontWidth * (fontHeight / 8)) * index); drawArea(cursor_x, cursor_y, fontWidth, fontHeight, charPointer); cursor_x += fontWidth; @@ -348,7 +362,7 @@ void OLED::debugNumber(int32_t val, FontStyle fontStyle) { void OLED::drawSymbol(uint8_t symbolID) { // draw a symbol to the current cursor location - drawChar(symbolID + 2, FontStyle::EXTRAS); + drawChar(symbolID, FontStyle::EXTRAS); } // Draw an area, but y must be aligned on 0/8 offset diff --git a/source/Core/Inc/Settings.h b/source/Core/Inc/Settings.h index 1a854366..45e3bb20 100644 --- a/source/Core/Inc/Settings.h +++ b/source/Core/Inc/Settings.h @@ -10,7 +10,7 @@ #ifndef SETTINGS_H_ #define SETTINGS_H_ #include <stdint.h> -#define SETTINGSVERSION (0x29) +#define SETTINGSVERSION (0x2A) /*Change this if you change the struct below to prevent people getting \ out of sync*/ @@ -59,6 +59,7 @@ typedef struct { uint8_t hallEffectSensitivity; // Operating mode of the hall effect sensor uint8_t accelMissingWarningCounter; // Counter of how many times we have warned we cannot detect the accelerometer uint8_t pdMissingWarningCounter; // Counter of how many times we have warned we cannot detect the pd interface + char uiLanguage[8]; // Selected UI Language code, null-terminated *only if* the length is less than 8 chars uint32_t padding; // This is here for in case we are not an even divisor so // that nothing gets cut off diff --git a/source/Core/Inc/Translation.h b/source/Core/Inc/Translation.h index 62327d7f..84e88844 100644 --- a/source/Core/Inc/Translation.h +++ b/source/Core/Inc/Translation.h @@ -57,6 +57,7 @@ enum class SettingsItemIndex : uint8_t { AnimSpeed,
PowerPulseWait,
PowerPulseDuration,
+ LanguageSwitch,
NUM_ITEMS,
};
@@ -110,11 +111,28 @@ struct TranslationIndexTable { uint16_t SettingsMenuEntriesDescriptions[5]; // unused
};
-extern const TranslationIndexTable *const Tr;
-extern const char *const TranslationStrings;
+extern const TranslationIndexTable *Tr;
+extern const char * TranslationStrings;
-extern const uint8_t *const Font_12x16;
-extern const uint8_t *const Font_6x8;
+struct TranslationData {
+ TranslationIndexTable indices;
+ // Translation strings follows the translation index table.
+ // C++ does not support flexible array member as in C, so we use a 1-element
+ // array as a placeholder.
+ char strings[1];
+};
+
+struct FontSection {
+ /// Start index of font section, inclusive
+ uint16_t symbol_start;
+ /// End index of font section, exclusive
+ uint16_t symbol_end;
+ const uint8_t *font12_start_ptr;
+ const uint8_t *font06_start_ptr;
+};
+
+extern const FontSection *const FontSections;
+extern const uint8_t FontSectionsCount;
constexpr uint8_t settings_item_index(const SettingsItemIndex i) { return static_cast<uint8_t>(i); }
// Use a constexpr function for type-checking.
@@ -123,5 +141,7 @@ constexpr uint8_t settings_item_index(const SettingsItemIndex i) { return static const char *translatedString(uint16_t index);
void prepareTranslations();
+bool settings_displayLanguageSwitch(void);
+bool settings_setLanguageSwitch(void);
#endif /* TRANSLATION_H_ */
diff --git a/source/Core/Inc/Translation_multi.h b/source/Core/Inc/Translation_multi.h new file mode 100644 index 00000000..bd6b2528 --- /dev/null +++ b/source/Core/Inc/Translation_multi.h @@ -0,0 +1,42 @@ +#ifndef TRANSLATION_MULTI_H_ +#define TRANSLATION_MULTI_H_ + +#include "Translation.h" + +// The compressed translation data will be decompressed to this buffer. These +// data may include: +// - TranslationData (translation index table and translation strings) +// - Font table(s) +// The translation index table consists of uint16_t (half words) which has a +// 2-byte alignment. Therefore, the declaration of this buffer must include +// the alignment specifier `alignas(TranslationData)` to satisfy its alignment. +// TranslationData must always be decompressed to the start of this buffer. +extern uint8_t translation_data_out_buffer[]; +extern const uint16_t translation_data_out_buffer_size; + +struct FontSectionDataInfo { + uint16_t symbol_start; + uint16_t symbol_count; + uint16_t data_size : 15; + bool data_is_compressed : 1; + + // Font12x16 data followed by font6x8 data + const uint8_t *data_ptr; +}; + +extern const FontSectionDataInfo FontSectionDataInfos[]; +extern const uint8_t FontSectionDataCount; + +extern FontSection DynamicFontSections[]; + +struct LanguageMeta { + char code[8]; + const uint8_t *translation_data; + uint16_t translation_size : 15; + bool translation_is_compressed : 1; +}; + +extern const LanguageMeta LanguageMetas[]; +extern const uint8_t LanguageCount; + +#endif /* TRANSLATION_MULTI_H_ */ diff --git a/source/Core/LangSupport/lang_multi.cpp b/source/Core/LangSupport/lang_multi.cpp new file mode 100644 index 00000000..13d7cd89 --- /dev/null +++ b/source/Core/LangSupport/lang_multi.cpp @@ -0,0 +1,92 @@ +#include "OLED.hpp" +#include "Translation.h" +#include "Translation_multi.h" +#include "brieflz.h" +#include "configuration.h" +#include "gui.hpp" + +const TranslationIndexTable *Tr = nullptr; +const char * TranslationStrings = nullptr; + +static uint8_t selectedLangIndex = 255; + +static void initSelectedLanguageIndex() { + if (selectedLangIndex == 255) { + const char *lang = const_cast<char *>(systemSettings.uiLanguage); + for (size_t i = 0; i < LanguageCount; i++) { + if (strncmp(lang, LanguageMetas[i].code, sizeof(systemSettings.uiLanguage)) == 0) { + selectedLangIndex = i; + return; + } + } + // No match, use the first language. + selectedLangIndex = 0; + } +} + +static void writeSelectedLanguageToSettings() { + char *lang = const_cast<char *>(systemSettings.uiLanguage); + strncpy(lang, LanguageMetas[selectedLangIndex].code, sizeof(systemSettings.uiLanguage)); +} + +void prepareTranslations() { + initSelectedLanguageIndex(); + if (selectedLangIndex >= LanguageCount) { + // This shouldn't happen. + return; + } + const LanguageMeta &langMeta = LanguageMetas[selectedLangIndex]; + + const TranslationData *translationData; + uint16_t buffer_remaining_size = translation_data_out_buffer_size; + uint8_t * buffer_next_ptr = translation_data_out_buffer; + if (langMeta.translation_is_compressed) { + unsigned int outsize; + outsize = blz_depack_srcsize(langMeta.translation_data, buffer_next_ptr, langMeta.translation_size); + + translationData = reinterpret_cast<const TranslationData *>(buffer_next_ptr); + buffer_remaining_size -= outsize; + buffer_next_ptr += outsize; + } else { + translationData = reinterpret_cast<const TranslationData *>(langMeta.translation_data); + } + Tr = &translationData->indices; + TranslationStrings = translationData->strings; + + memset(DynamicFontSections, 0, FontSectionsCount * sizeof(DynamicFontSections[0])); + for (int i = 0; i < FontSectionDataCount; i++) { + const auto &fontSectionDataInfo = FontSectionDataInfos[i]; + auto & fontSection = DynamicFontSections[i]; + fontSection.symbol_start = fontSectionDataInfo.symbol_start; + fontSection.symbol_end = fontSection.symbol_start + fontSectionDataInfo.symbol_count; + const uint16_t font12_size = fontSectionDataInfo.symbol_count * (12 * 16 / 8); + uint16_t dataSize; + if (fontSectionDataInfo.data_is_compressed) { + unsigned int outsize; + outsize = blz_depack_srcsize(fontSectionDataInfo.data_ptr, buffer_next_ptr, fontSectionDataInfo.data_size); + + fontSection.font12_start_ptr = buffer_next_ptr; + dataSize = outsize; + buffer_remaining_size -= outsize; + buffer_next_ptr += outsize; + } else { + fontSection.font12_start_ptr = fontSectionDataInfo.data_ptr; + dataSize = fontSectionDataInfo.data_size; + } + if (dataSize > font12_size) { + fontSection.font06_start_ptr = fontSection.font12_start_ptr + font12_size; + } + } +} + +bool settings_setLanguageSwitch(void) { + selectedLangIndex = (selectedLangIndex + 1) % LanguageCount; + writeSelectedLanguageToSettings(); + prepareTranslations(); + return selectedLangIndex == (LanguageCount - 1); +} + +bool settings_displayLanguageSwitch(void) { + OLED::printWholeScreen(translatedString(Tr->SettingsShortNames[static_cast<uint8_t>(SettingsItemIndex::LanguageSwitch)])); + return false; +} diff --git a/source/Core/LangSupport/lang_single.cpp b/source/Core/LangSupport/lang_single.cpp new file mode 100644 index 00000000..75258521 --- /dev/null +++ b/source/Core/LangSupport/lang_single.cpp @@ -0,0 +1,7 @@ +#include "Translation.h" + +bool settings_setLanguageSwitch(void) { return false; } + +bool settings_displayLanguageSwitch(void) { + return true; // skip +} diff --git a/source/Core/Src/gui.cpp b/source/Core/Src/gui.cpp index 2804f2bb..b5a3b331 100644 --- a/source/Core/Src/gui.cpp +++ b/source/Core/Src/gui.cpp @@ -144,6 +144,7 @@ const menuitem rootSettingsMenu[]{ {0, settings_enterPowerSavingMenu, settings_displayPowerSavingMenu}, /*Sleep Options Menu*/ {0, settings_enterUIMenu, settings_displayUIMenu}, /*UI Menu*/ {0, settings_enterAdvancedMenu, settings_displayAdvancedMenu}, /*Advanced Menu*/ + {0, settings_setLanguageSwitch, settings_displayLanguageSwitch}, /*Language Switch*/ {0, nullptr, nullptr} // end of menu marker. DO NOT REMOVE }; diff --git a/source/Core/brieflz/README.md b/source/Core/brieflz/README.md new file mode 100644 index 00000000..06f7f7da --- /dev/null +++ b/source/Core/brieflz/README.md @@ -0,0 +1,41 @@ +This directory contains file originally by other people. + + +## BriefLZ + +- `brieflz_btparse.h` +- `brieflz_hashbucket.h` +- `brieflz_lazy.h` +- `brieflz_leparse.h` +- `brieflz.c` +- `depack.c` + +The above files are originally obtained from https://github.com/jibsen/brieflz +(commit 0ab07a5). + +### License: + +``` +The zlib License (Zlib) + +Copyright (c) 2002-2020 Joergen Ibsen + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +``` diff --git a/source/Core/brieflz/brieflz.c b/source/Core/brieflz/brieflz.c new file mode 100644 index 00000000..e197f573 --- /dev/null +++ b/source/Core/brieflz/brieflz.c @@ -0,0 +1,659 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// C packer +// +// Copyright (c) 2002-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#include "brieflz.h" + +#include <assert.h> +#include <limits.h> +#include <stdint.h> + +#if _MSC_VER >= 1400 +# include <intrin.h> +# define BLZ_BUILTIN_MSVC +#elif defined(__clang__) && defined(__has_builtin) +# if __has_builtin(__builtin_clz) +# define BLZ_BUILTIN_GCC +# endif +#elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# define BLZ_BUILTIN_GCC +#endif + +// Type used to store values in workmem. +// +// This is used to store positions and lengths, so src_size has to be within +// the range of this type. +// +typedef uint32_t blz_word; + +#define BLZ_WORD_MAX UINT32_MAX + +// Number of bits of hash to use for lookup. +// +// The size of the lookup table (and thus workmem) depends on this. +// +// Values between 10 and 18 work well. Lower values generally make compression +// speed faster but ratio worse. The default value 17 (128k entries) is a +// compromise. +// +#ifndef BLZ_HASH_BITS +# define BLZ_HASH_BITS 17 +#endif + +#define LOOKUP_SIZE (1UL << BLZ_HASH_BITS) + +#define NO_MATCH_POS ((blz_word) -1) + +// Internal data structure +struct blz_state { + unsigned char *next_out; + unsigned char *tag_out; + unsigned int tag; + int bits_left; +}; + +#if !defined(BLZ_NO_LUT) +static const unsigned short blz_gamma_lookup[512][2] = { + {0, 0}, + {0, 0}, + + {0x00, 2}, {0x02, 2}, + + {0x04, 4}, {0x06, 4}, {0x0C, 4}, {0x0E, 4}, + + {0x14, 6}, {0x16, 6}, {0x1C, 6}, {0x1E, 6}, + {0x34, 6}, {0x36, 6}, {0x3C, 6}, {0x3E, 6}, + + {0x54, 8}, {0x56, 8}, {0x5C, 8}, {0x5E, 8}, + {0x74, 8}, {0x76, 8}, {0x7C, 8}, {0x7E, 8}, + {0xD4, 8}, {0xD6, 8}, {0xDC, 8}, {0xDE, 8}, + {0xF4, 8}, {0xF6, 8}, {0xFC, 8}, {0xFE, 8}, + + {0x154, 10}, {0x156, 10}, {0x15C, 10}, {0x15E, 10}, + {0x174, 10}, {0x176, 10}, {0x17C, 10}, {0x17E, 10}, + {0x1D4, 10}, {0x1D6, 10}, {0x1DC, 10}, {0x1DE, 10}, + {0x1F4, 10}, {0x1F6, 10}, {0x1FC, 10}, {0x1FE, 10}, + {0x354, 10}, {0x356, 10}, {0x35C, 10}, {0x35E, 10}, + {0x374, 10}, {0x376, 10}, {0x37C, 10}, {0x37E, 10}, + {0x3D4, 10}, {0x3D6, 10}, {0x3DC, 10}, {0x3DE, 10}, + {0x3F4, 10}, {0x3F6, 10}, {0x3FC, 10}, {0x3FE, 10}, + + {0x554, 12}, {0x556, 12}, {0x55C, 12}, {0x55E, 12}, + {0x574, 12}, {0x576, 12}, {0x57C, 12}, {0x57E, 12}, + {0x5D4, 12}, {0x5D6, 12}, {0x5DC, 12}, {0x5DE, 12}, + {0x5F4, 12}, {0x5F6, 12}, {0x5FC, 12}, {0x5FE, 12}, + {0x754, 12}, {0x756, 12}, {0x75C, 12}, {0x75E, 12}, + {0x774, 12}, {0x776, 12}, {0x77C, 12}, {0x77E, 12}, + {0x7D4, 12}, {0x7D6, 12}, {0x7DC, 12}, {0x7DE, 12}, + {0x7F4, 12}, {0x7F6, 12}, {0x7FC, 12}, {0x7FE, 12}, + {0xD54, 12}, {0xD56, 12}, {0xD5C, 12}, {0xD5E, 12}, + {0xD74, 12}, {0xD76, 12}, {0xD7C, 12}, {0xD7E, 12}, + {0xDD4, 12}, {0xDD6, 12}, {0xDDC, 12}, {0xDDE, 12}, + {0xDF4, 12}, {0xDF6, 12}, {0xDFC, 12}, {0xDFE, 12}, + {0xF54, 12}, {0xF56, 12}, {0xF5C, 12}, {0xF5E, 12}, + {0xF74, 12}, {0xF76, 12}, {0xF7C, 12}, {0xF7E, 12}, + {0xFD4, 12}, {0xFD6, 12}, {0xFDC, 12}, {0xFDE, 12}, + {0xFF4, 12}, {0xFF6, 12}, {0xFFC, 12}, {0xFFE, 12}, + + {0x1554, 14}, {0x1556, 14}, {0x155C, 14}, {0x155E, 14}, + {0x1574, 14}, {0x1576, 14}, {0x157C, 14}, {0x157E, 14}, + {0x15D4, 14}, {0x15D6, 14}, {0x15DC, 14}, {0x15DE, 14}, + {0x15F4, 14}, {0x15F6, 14}, {0x15FC, 14}, {0x15FE, 14}, + {0x1754, 14}, {0x1756, 14}, {0x175C, 14}, {0x175E, 14}, + {0x1774, 14}, {0x1776, 14}, {0x177C, 14}, {0x177E, 14}, + {0x17D4, 14}, {0x17D6, 14}, {0x17DC, 14}, {0x17DE, 14}, + {0x17F4, 14}, {0x17F6, 14}, {0x17FC, 14}, {0x17FE, 14}, + {0x1D54, 14}, {0x1D56, 14}, {0x1D5C, 14}, {0x1D5E, 14}, + {0x1D74, 14}, {0x1D76, 14}, {0x1D7C, 14}, {0x1D7E, 14}, + {0x1DD4, 14}, {0x1DD6, 14}, {0x1DDC, 14}, {0x1DDE, 14}, + {0x1DF4, 14}, {0x1DF6, 14}, {0x1DFC, 14}, {0x1DFE, 14}, + {0x1F54, 14}, {0x1F56, 14}, {0x1F5C, 14}, {0x1F5E, 14}, + {0x1F74, 14}, {0x1F76, 14}, {0x1F7C, 14}, {0x1F7E, 14}, + {0x1FD4, 14}, {0x1FD6, 14}, {0x1FDC, 14}, {0x1FDE, 14}, + {0x1FF4, 14}, {0x1FF6, 14}, {0x1FFC, 14}, {0x1FFE, 14}, + {0x3554, 14}, {0x3556, 14}, {0x355C, 14}, {0x355E, 14}, + {0x3574, 14}, {0x3576, 14}, {0x357C, 14}, {0x357E, 14}, + {0x35D4, 14}, {0x35D6, 14}, {0x35DC, 14}, {0x35DE, 14}, + {0x35F4, 14}, {0x35F6, 14}, {0x35FC, 14}, {0x35FE, 14}, + {0x3754, 14}, {0x3756, 14}, {0x375C, 14}, {0x375E, 14}, + {0x3774, 14}, {0x3776, 14}, {0x377C, 14}, {0x377E, 14}, + {0x37D4, 14}, {0x37D6, 14}, {0x37DC, 14}, {0x37DE, 14}, + {0x37F4, 14}, {0x37F6, 14}, {0x37FC, 14}, {0x37FE, 14}, + {0x3D54, 14}, {0x3D56, 14}, {0x3D5C, 14}, {0x3D5E, 14}, + {0x3D74, 14}, {0x3D76, 14}, {0x3D7C, 14}, {0x3D7E, 14}, + {0x3DD4, 14}, {0x3DD6, 14}, {0x3DDC, 14}, {0x3DDE, 14}, + {0x3DF4, 14}, {0x3DF6, 14}, {0x3DFC, 14}, {0x3DFE, 14}, + {0x3F54, 14}, {0x3F56, 14}, {0x3F5C, 14}, {0x3F5E, 14}, + {0x3F74, 14}, {0x3F76, 14}, {0x3F7C, 14}, {0x3F7E, 14}, + {0x3FD4, 14}, {0x3FD6, 14}, {0x3FDC, 14}, {0x3FDE, 14}, + {0x3FF4, 14}, {0x3FF6, 14}, {0x3FFC, 14}, {0x3FFE, 14}, + + {0x5554, 16}, {0x5556, 16}, {0x555C, 16}, {0x555E, 16}, + {0x5574, 16}, {0x5576, 16}, {0x557C, 16}, {0x557E, 16}, + {0x55D4, 16}, {0x55D6, 16}, {0x55DC, 16}, {0x55DE, 16}, + {0x55F4, 16}, {0x55F6, 16}, {0x55FC, 16}, {0x55FE, 16}, + {0x5754, 16}, {0x5756, 16}, {0x575C, 16}, {0x575E, 16}, + {0x5774, 16}, {0x5776, 16}, {0x577C, 16}, {0x577E, 16}, + {0x57D4, 16}, {0x57D6, 16}, {0x57DC, 16}, {0x57DE, 16}, + {0x57F4, 16}, {0x57F6, 16}, {0x57FC, 16}, {0x57FE, 16}, + {0x5D54, 16}, {0x5D56, 16}, {0x5D5C, 16}, {0x5D5E, 16}, + {0x5D74, 16}, {0x5D76, 16}, {0x5D7C, 16}, {0x5D7E, 16}, + {0x5DD4, 16}, {0x5DD6, 16}, {0x5DDC, 16}, {0x5DDE, 16}, + {0x5DF4, 16}, {0x5DF6, 16}, {0x5DFC, 16}, {0x5DFE, 16}, + {0x5F54, 16}, {0x5F56, 16}, {0x5F5C, 16}, {0x5F5E, 16}, + {0x5F74, 16}, {0x5F76, 16}, {0x5F7C, 16}, {0x5F7E, 16}, + {0x5FD4, 16}, {0x5FD6, 16}, {0x5FDC, 16}, {0x5FDE, 16}, + {0x5FF4, 16}, {0x5FF6, 16}, {0x5FFC, 16}, {0x5FFE, 16}, + {0x7554, 16}, {0x7556, 16}, {0x755C, 16}, {0x755E, 16}, + {0x7574, 16}, {0x7576, 16}, {0x757C, 16}, {0x757E, 16}, + {0x75D4, 16}, {0x75D6, 16}, {0x75DC, 16}, {0x75DE, 16}, + {0x75F4, 16}, {0x75F6, 16}, {0x75FC, 16}, {0x75FE, 16}, + {0x7754, 16}, {0x7756, 16}, {0x775C, 16}, {0x775E, 16}, + {0x7774, 16}, {0x7776, 16}, {0x777C, 16}, {0x777E, 16}, + {0x77D4, 16}, {0x77D6, 16}, {0x77DC, 16}, {0x77DE, 16}, + {0x77F4, 16}, {0x77F6, 16}, {0x77FC, 16}, {0x77FE, 16}, + {0x7D54, 16}, {0x7D56, 16}, {0x7D5C, 16}, {0x7D5E, 16}, + {0x7D74, 16}, {0x7D76, 16}, {0x7D7C, 16}, {0x7D7E, 16}, + {0x7DD4, 16}, {0x7DD6, 16}, {0x7DDC, 16}, {0x7DDE, 16}, + {0x7DF4, 16}, {0x7DF6, 16}, {0x7DFC, 16}, {0x7DFE, 16}, + {0x7F54, 16}, {0x7F56, 16}, {0x7F5C, 16}, {0x7F5E, 16}, + {0x7F74, 16}, {0x7F76, 16}, {0x7F7C, 16}, {0x7F7E, 16}, + {0x7FD4, 16}, {0x7FD6, 16}, {0x7FDC, 16}, {0x7FDE, 16}, + {0x7FF4, 16}, {0x7FF6, 16}, {0x7FFC, 16}, {0x7FFE, 16}, + {0xD554, 16}, {0xD556, 16}, {0xD55C, 16}, {0xD55E, 16}, + {0xD574, 16}, {0xD576, 16}, {0xD57C, 16}, {0xD57E, 16}, + {0xD5D4, 16}, {0xD5D6, 16}, {0xD5DC, 16}, {0xD5DE, 16}, + {0xD5F4, 16}, {0xD5F6, 16}, {0xD5FC, 16}, {0xD5FE, 16}, + {0xD754, 16}, {0xD756, 16}, {0xD75C, 16}, {0xD75E, 16}, + {0xD774, 16}, {0xD776, 16}, {0xD77C, 16}, {0xD77E, 16}, + {0xD7D4, 16}, {0xD7D6, 16}, {0xD7DC, 16}, {0xD7DE, 16}, + {0xD7F4, 16}, {0xD7F6, 16}, {0xD7FC, 16}, {0xD7FE, 16}, + {0xDD54, 16}, {0xDD56, 16}, {0xDD5C, 16}, {0xDD5E, 16}, + {0xDD74, 16}, {0xDD76, 16}, {0xDD7C, 16}, {0xDD7E, 16}, + {0xDDD4, 16}, {0xDDD6, 16}, {0xDDDC, 16}, {0xDDDE, 16}, + {0xDDF4, 16}, {0xDDF6, 16}, {0xDDFC, 16}, {0xDDFE, 16}, + {0xDF54, 16}, {0xDF56, 16}, {0xDF5C, 16}, {0xDF5E, 16}, + {0xDF74, 16}, {0xDF76, 16}, {0xDF7C, 16}, {0xDF7E, 16}, + {0xDFD4, 16}, {0xDFD6, 16}, {0xDFDC, 16}, {0xDFDE, 16}, + {0xDFF4, 16}, {0xDFF6, 16}, {0xDFFC, 16}, {0xDFFE, 16}, + {0xF554, 16}, {0xF556, 16}, {0xF55C, 16}, {0xF55E, 16}, + {0xF574, 16}, {0xF576, 16}, {0xF57C, 16}, {0xF57E, 16}, + {0xF5D4, 16}, {0xF5D6, 16}, {0xF5DC, 16}, {0xF5DE, 16}, + {0xF5F4, 16}, {0xF5F6, 16}, {0xF5FC, 16}, {0xF5FE, 16}, + {0xF754, 16}, {0xF756, 16}, {0xF75C, 16}, {0xF75E, 16}, + {0xF774, 16}, {0xF776, 16}, {0xF77C, 16}, {0xF77E, 16}, + {0xF7D4, 16}, {0xF7D6, 16}, {0xF7DC, 16}, {0xF7DE, 16}, + {0xF7F4, 16}, {0xF7F6, 16}, {0xF7FC, 16}, {0xF7FE, 16}, + {0xFD54, 16}, {0xFD56, 16}, {0xFD5C, 16}, {0xFD5E, 16}, + {0xFD74, 16}, {0xFD76, 16}, {0xFD7C, 16}, {0xFD7E, 16}, + {0xFDD4, 16}, {0xFDD6, 16}, {0xFDDC, 16}, {0xFDDE, 16}, + {0xFDF4, 16}, {0xFDF6, 16}, {0xFDFC, 16}, {0xFDFE, 16}, + {0xFF54, 16}, {0xFF56, 16}, {0xFF5C, 16}, {0xFF5E, 16}, + {0xFF74, 16}, {0xFF76, 16}, {0xFF7C, 16}, {0xFF7E, 16}, + {0xFFD4, 16}, {0xFFD6, 16}, {0xFFDC, 16}, {0xFFDE, 16}, + {0xFFF4, 16}, {0xFFF6, 16}, {0xFFFC, 16}, {0xFFFE, 16} +}; +#endif + +static int +blz_log2(unsigned long n) +{ + assert(n > 0); + +#if defined(BLZ_BUILTIN_MSVC) + unsigned long msb_pos; + _BitScanReverse(&msb_pos, n); + return (int) msb_pos; +#elif defined(BLZ_BUILTIN_GCC) + return (int) sizeof(n) * CHAR_BIT - 1 - __builtin_clzl(n); +#else + int bits = 0; + + while (n >>= 1) { + ++bits; + } + + return bits; +#endif +} + +static unsigned long +blz_gamma_cost(unsigned long n) +{ + assert(n >= 2); + + return 2 * (unsigned long) blz_log2(n); +} + +static unsigned long +blz_match_cost(unsigned long pos, unsigned long len) +{ + return 1 + blz_gamma_cost(len - 2) + blz_gamma_cost((pos >> 8) + 2) + 8; +} + +// Heuristic to compare matches +static int +blz_match_better(unsigned long cur, unsigned long new_pos, unsigned long new_len, + unsigned long pos, unsigned long len) +{ + const unsigned long offs = cur - pos - 1; + const unsigned long new_offs = cur - new_pos - 1; + + return (new_len > len + 1) + || (new_len >= len + 1 && new_offs / 8 <= offs); +} + +// Heuristic to compare match with match at next position +static int +blz_next_match_better(unsigned long cur, unsigned long new_pos, unsigned long new_len, + unsigned long pos, unsigned long len) +{ + const unsigned long offs = cur - pos - 1; + const unsigned long new_offs = cur + 1 - new_pos - 1; + + return (new_len > len + 1 && new_offs / 8 < offs) + || (new_len > len && new_offs < offs) + || (new_len >= len && new_offs < offs / 4); +} + +static void +blz_putbit(struct blz_state *bs, unsigned int bit) +{ + // Check if tag is full + if (!bs->bits_left--) { + // Store tag + bs->tag_out[0] = bs->tag & 0x00FF; + bs->tag_out[1] = (bs->tag >> 8) & 0x00FF; + + // Init next tag + bs->tag_out = bs->next_out; + bs->next_out += 2; + bs->bits_left = 15; + } + + // Shift bit into tag + bs->tag = (bs->tag << 1) + bit; +} + +static void +blz_putbits(struct blz_state *bs, unsigned long bits, int num) +{ + assert(num >= 0 && num <= 16); + assert((bits & (~0UL << num)) == 0); + + // Shift num bits into tag + unsigned long tag = ((unsigned long) bs->tag << num) | bits; + bs->tag = (unsigned int) tag; + + // Check if tag is full + if (bs->bits_left < num) { + const unsigned int top16 = (unsigned int) (tag >> (num - bs->bits_left)); + + // Store tag + bs->tag_out[0] = top16 & 0x00FF; + bs->tag_out[1] = (top16 >> 8) & 0x00FF; + + // Init next tag + bs->tag_out = bs->next_out; + bs->next_out += 2; + + bs->bits_left += 16; + } + + bs->bits_left -= num; +} + +// Encode val using a universal code based on Elias gamma. +// +// This outputs each bit of val (after the leading one bit) as a pair where +// the first bit is the value, and the second is zero if this was the last +// pair, and one otherwise. +// +// 2 = 10 -> 00 +// 3 = 11 -> 10 +// 4 = 100 -> 01 00 +// 5 = 101 -> 01 10 +// 6 = 110 -> 11 00 +// ... +// +// On modern hardware this variant is slower to decode because we cannot count +// the leading zeroes to get the number of value bits and then read them +// directly. However on constrained hardware, it has the advantage of being +// decodable using only one variable (register) and a tiny loop: +// +// result = 1; +// do { result = (result << 1) + getbit(); } while (getbit()); +// +// Strictly speaking, this is order-1 exp-Golomb, where we interleave the +// value bits with the bits of the unary coding of the length, but I've always +// known it as the gamma2 code. I am not sure where it originated from, but I +// can see I used it in aPLib around 1998. +// +static void +blz_putgamma(struct blz_state *bs, unsigned long val) +{ + assert(val >= 2); + +#if !defined(BLZ_NO_LUT) + // Output small values using lookup + if (val < 512) { + const unsigned int bits = blz_gamma_lookup[val][0]; + const unsigned int shift = blz_gamma_lookup[val][1]; + + blz_putbits(bs, bits, (int) shift); + + return; + } +#endif + + // Create a mask for the second-highest bit of val +#if defined(BLZ_BUILTIN_MSVC) + unsigned long msb_pos; + _BitScanReverse(&msb_pos, val); + unsigned long mask = 1UL << (msb_pos - 1); +#elif defined(BLZ_BUILTIN_GCC) + unsigned long mask = 1UL << ((int) sizeof(val) * CHAR_BIT - 2 - __builtin_clzl(val)); +#else + unsigned long mask = val >> 1; + + // Clear bits except highest + while (mask & (mask - 1)) { + mask &= mask - 1; + } +#endif + + // Output gamma2-encoded bits + blz_putbit(bs, (val & mask) ? 1 : 0); + + while (mask >>= 1) { + blz_putbit(bs, 1); + blz_putbit(bs, (val & mask) ? 1 : 0); + } + + blz_putbit(bs, 0); +} + +static unsigned char* +blz_finalize(struct blz_state *bs) +{ + // Trailing one bit to delimit any literal tags + blz_putbit(bs, 1); + + // Shift last tag into position and store + bs->tag <<= bs->bits_left; + bs->tag_out[0] = bs->tag & 0x00FF; + bs->tag_out[1] = (bs->tag >> 8) & 0x00FF; + + // Return pointer one past end of output + return bs->next_out; +} + +// Hash four bytes starting a p. +// +// This is Fibonacci hashing, also known as Knuth's multiplicative hash. The +// constant is a prime close to 2^32/phi. +// +static unsigned long +blz_hash4_bits(const unsigned char *p, int bits) +{ + assert(bits > 0 && bits <= 32); + + uint32_t val = (uint32_t) p[0] + | ((uint32_t) p[1] << 8) + | ((uint32_t) p[2] << 16) + | ((uint32_t) p[3] << 24); + + return (val * UINT32_C(2654435761)) >> (32 - bits); +} + +static unsigned long +blz_hash4(const unsigned char *p) +{ + return blz_hash4_bits(p, BLZ_HASH_BITS); +} + +size_t +blz_max_packed_size(size_t src_size) +{ + return src_size + src_size / 8 + 64; +} + +size_t +blz_workmem_size(size_t src_size) +{ + (void) src_size; + + return LOOKUP_SIZE * sizeof(blz_word); +} + +// Simple LZSS using hashing. +// +// The lookup table stores the previous position in the input that had a given +// hash value, or NO_MATCH_POS if none. +// +unsigned long +blz_pack(const void *src, void *dst, unsigned long src_size, void *workmem) +{ + struct blz_state bs; + blz_word *const lookup = (blz_word *) workmem; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + unsigned long hash_pos = 0; + unsigned long cur = 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + // Initialize lookup + for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Main compression loop + for (cur = 1; cur <= last_match_pos; ) { + // Update lookup up to current position + while (hash_pos < cur) { + lookup[blz_hash4(&in[hash_pos])] = hash_pos; + hash_pos++; + } + + // Look up match for current position + const unsigned long pos = lookup[blz_hash4(&in[cur])]; + unsigned long len = 0; + + // Check match + if (pos != NO_MATCH_POS) { + const unsigned long len_limit = src_size - cur; + + while (len < len_limit + && in[pos + len] == in[cur + len]) { + ++len; + } + } + + // Output match or literal + // + // When offs >= 0x1FFE00, encoding a match of length 4 + // (37 bits) is longer than encoding 4 literals (36 bits). + // + // The value 0x7E00 is a heuristic that sacrifices some + // length 4 matches in the hope that there will be a better + // match at the next position. + if (len > 4 || (len == 4 && cur - pos - 1 < 0x7E00UL)) { + const unsigned long offs = cur - pos - 1; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, len - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + + cur += len; + } + else { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + } + + // Output any remaining literals + while (cur < src_size) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + + // Trailing one bit to delimit any literal tags + blz_putbit(&bs, 1); + + // Shift last tag into position and store + bs.tag <<= bs.bits_left; + bs.tag_out[0] = bs.tag & 0x00FF; + bs.tag_out[1] = (bs.tag >> 8) & 0x00FF; + + // Return compressed size + return (unsigned long) (bs.next_out - (unsigned char *) dst); +} + +// Include compression algorithms used by blz_pack_level +#include "brieflz_btparse.h" +#include "brieflz_hashbucket.h" +#include "brieflz_lazy.h" +#include "brieflz_leparse.h" + +size_t +blz_workmem_size_level(size_t src_size, int level) +{ + switch (level) { + case 1: + return blz_workmem_size(src_size); + case 2: + return blz_lazy_workmem_size(src_size); + case 3: + return blz_hashbucket_workmem_size(src_size, 2); + case 4: + return blz_hashbucket_workmem_size(src_size, 4); + case 5: + case 6: + case 7: + return blz_leparse_workmem_size(src_size); + case 8: + case 9: + case 10: + return blz_btparse_workmem_size(src_size); + default: + return (size_t) -1; + } +} + +unsigned long +blz_pack_level(const void *src, void *dst, unsigned long src_size, + void *workmem, int level) +{ + switch (level) { + case 1: + return blz_pack(src, dst, src_size, workmem); + case 2: + return blz_pack_lazy(src, dst, src_size, workmem); + case 3: + return blz_pack_hashbucket(src, dst, src_size, workmem, 2, 16); + case 4: + return blz_pack_hashbucket(src, dst, src_size, workmem, 4, 16); + case 5: + return blz_pack_leparse(src, dst, src_size, workmem, 1, 16); + case 6: + return blz_pack_leparse(src, dst, src_size, workmem, 8, 32); + case 7: + return blz_pack_leparse(src, dst, src_size, workmem, 64, 64); + case 8: + return blz_pack_btparse(src, dst, src_size, workmem, 16, 96); + case 9: + return blz_pack_btparse(src, dst, src_size, workmem, 32, 224); + case 10: + return blz_pack_btparse(src, dst, src_size, workmem, ULONG_MAX, ULONG_MAX); + default: + return BLZ_ERROR; + } +} + +// clang -g -O1 -fsanitize=fuzzer,address -DBLZ_FUZZING brieflz.c depack.c +#if defined(BLZ_FUZZING) +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#ifndef BLZ_FUZZ_LEVEL +# define BLZ_FUZZ_LEVEL 1 +#endif + +extern int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size > ULONG_MAX / 2) { return 0; } + void *workmem = malloc(blz_workmem_size_level(size, BLZ_FUZZ_LEVEL)); + void *packed = malloc(blz_max_packed_size(size)); + void *depacked = malloc(size); + if (!workmem || !packed || !depacked) { abort(); } + unsigned long packed_size = blz_pack_level(data, packed, size, workmem, BLZ_FUZZ_LEVEL); + blz_depack(packed, depacked, size); + if (memcmp(data, depacked, size)) { abort(); } + free(depacked); + free(packed); + free(workmem); + return 0; +} +#endif diff --git a/source/Core/brieflz/brieflz.h b/source/Core/brieflz/brieflz.h new file mode 100644 index 00000000..cb6162a4 --- /dev/null +++ b/source/Core/brieflz/brieflz.h @@ -0,0 +1,183 @@ +/* + * BriefLZ - small fast Lempel-Ziv + * + * C/C++ header file + * + * Copyright (c) 2002-2020 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#ifndef BRIEFLZ_H_INCLUDED +#define BRIEFLZ_H_INCLUDED + +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define BLZ_VER_MAJOR 1 /**< Major version number */ +#define BLZ_VER_MINOR 3 /**< Minor version number */ +#define BLZ_VER_PATCH 0 /**< Patch version number */ +#define BLZ_VER_STRING "1.3.0" /**< Version number as a string */ + +#define BLZ_NO_LUT + +#ifdef BLZ_DLL +# if defined(_WIN32) || defined(__CYGWIN__) +# ifdef BLZ_DLL_EXPORTS +# define BLZ_API __declspec(dllexport) +# else +# define BLZ_API __declspec(dllimport) +# endif +# define BLZ_LOCAL +# else +# if __GNUC__ >= 4 +# define BLZ_API __attribute__ ((visibility ("default"))) +# define BLZ_LOCAL __attribute__ ((visibility ("hidden"))) +# else +# define BLZ_API +# define BLZ_LOCAL +# endif +# endif +#else +# define BLZ_API +# define BLZ_LOCAL +#endif + +/** + * Return value on error. + * + * @see blz_depack_safe + */ +#ifndef BLZ_ERROR +# define BLZ_ERROR ((unsigned long) (-1)) +#endif + +/** + * Get bound on compressed data size. + * + * @see blz_pack + * + * @param src_size number of bytes to compress + * @return maximum size of compressed data + */ +BLZ_API size_t +blz_max_packed_size(size_t src_size); + +/** + * Get required size of `workmem` buffer. + * + * @see blz_pack + * + * @param src_size number of bytes to compress + * @return required size in bytes of `workmem` buffer + */ +BLZ_API size_t +blz_workmem_size(size_t src_size); + +/** + * Compress `src_size` bytes of data from `src` to `dst`. + * + * @param src pointer to data + * @param dst pointer to where to place compressed data + * @param src_size number of bytes to compress + * @param workmem pointer to memory for temporary use + * @return size of compressed data + */ +BLZ_API unsigned long +blz_pack(const void *src, void *dst, unsigned long src_size, void *workmem); + +/** + * Get required size of `workmem` buffer. + * + * @see blz_pack_level + * + * @param src_size number of bytes to compress + * @param level compression level + * @return required size in bytes of `workmem` buffer + */ +BLZ_API size_t +blz_workmem_size_level(size_t src_size, int level); + +/** + * Compress `src_size` bytes of data from `src` to `dst`. + * + * Compression levels between 1 and 9 offer a trade-off between + * time/space and ratio. Level 10 is optimal but very slow. + * + * @param src pointer to data + * @param dst pointer to where to place compressed data + * @param src_size number of bytes to compress + * @param workmem pointer to memory for temporary use + * @param level compression level + * @return size of compressed data + */ +BLZ_API unsigned long +blz_pack_level(const void *src, void *dst, unsigned long src_size, + void *workmem, int level); + +/** + * Decompress `depacked_size` bytes of data from `src` to `dst`. + * + * @param src pointer to compressed data + * @param dst pointer to where to place decompressed data + * @param depacked_size size of decompressed data + * @return size of decompressed data + */ +BLZ_API unsigned long +blz_depack(const void *src, void *dst, unsigned long depacked_size); + +/** + * Decompress `src_size` bytes of data from `src` to `dst`. + * + * This function is unsafe. If the provided data is malformed, it may + * read more than `src_size` from the `src` buffer. + * + * @param src pointer to compressed data + * @param dst pointer to where to place decompressed data + * @param src_size size of the compressed data + * @return size of decompressed data + */ +BLZ_API unsigned long +blz_depack_srcsize(const void *src, void *dst, unsigned long src_size); + +/** + * Decompress `depacked_size` bytes of data from `src` to `dst`. + * + * Reads at most `src_size` bytes from `src`. + * Writes at most `depacked_size` bytes to `dst`. + * + * @param src pointer to compressed data + * @param src_size size of compressed data + * @param dst pointer to where to place decompressed data + * @param depacked_size size of decompressed data + * @return size of decompressed data, `BLZ_ERROR` on error + */ +BLZ_API unsigned long +blz_depack_safe(const void *src, unsigned long src_size, + void *dst, unsigned long depacked_size); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* BRIEFLZ_H_INCLUDED */ diff --git a/source/Core/brieflz/brieflz_btparse.h b/source/Core/brieflz/brieflz_btparse.h new file mode 100644 index 00000000..61d7c175 --- /dev/null +++ b/source/Core/brieflz/brieflz_btparse.h @@ -0,0 +1,332 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// Forwards dynamic programming parse using binary trees +// +// Copyright (c) 2016-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#ifndef BRIEFLZ_BTPARSE_H_INCLUDED +#define BRIEFLZ_BTPARSE_H_INCLUDED + +static size_t +blz_btparse_workmem_size(size_t src_size) +{ + return (5 * src_size + 3 + LOOKUP_SIZE) * sizeof(blz_word); +} + +// Forwards dynamic programming parse using binary trees, checking all +// possible matches. +// +// The match search uses a binary tree for each hash entry, which is updated +// dynamically as it is searched by re-rooting the tree at the search string. +// +// This does not result in balanced trees on all inputs, but often works well +// in practice, and has the advantage that we get the matches in order from +// closest and back. +// +// A drawback is the memory requirement of 5 * src_size words, since we cannot +// overlap the arrays in a forwards parse. +// +// This match search method is found in LZMA by Igor Pavlov, libdeflate +// by Eric Biggers, and other libraries. +// +static unsigned long +blz_pack_btparse(const void *src, void *dst, unsigned long src_size, void *workmem, + const unsigned long max_depth, const unsigned long accept_len) +{ + struct blz_state bs; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + if (src_size < 4) { + for (unsigned long i = 1; i < src_size; ++i) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[i]; + } + + // Return compressed size + return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst); + } + + blz_word *const cost = (blz_word *) workmem; + blz_word *const mpos = cost + src_size + 1; + blz_word *const mlen = mpos + src_size + 1; + blz_word *const nodes = mlen + src_size + 1; + blz_word *const lookup = nodes + 2 * src_size; + + // Initialize lookup + for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Since we are not processing the first literal, update tree for + // position 0 + lookup[blz_hash4(&in[0])] = 0; + nodes[0] = NO_MATCH_POS; + nodes[1] = NO_MATCH_POS; + + // Initialize to all literals with infinite cost + for (unsigned long i = 0; i <= src_size; ++i) { + cost[i] = BLZ_WORD_MAX; + mlen[i] = 1; + } + + cost[0] = 0; + cost[1] = 8; + + // Next position where we are going to check matches + // + // This is used to skip matching while still updating the trees when + // we find a match that is accept_len or longer. + // + unsigned long next_match_cur = 1; + + // Phase 1: Find lowest cost path arriving at each position + for (unsigned long cur = 1; cur <= last_match_pos; ++cur) { + // Adjust remaining costs to avoid overflow + if (cost[cur] > BLZ_WORD_MAX - 128) { + blz_word min_cost = BLZ_WORD_MAX; + + for (unsigned long i = cur; i <= src_size; ++i) { + min_cost = cost[i] < min_cost ? cost[i] : min_cost; + } + + for (unsigned long i = cur; i <= src_size; ++i) { + if (cost[i] != BLZ_WORD_MAX) { + cost[i] -= min_cost; + } + } + } + + // Check literal + if (cost[cur + 1] > cost[cur] + 9) { + cost[cur + 1] = cost[cur] + 9; + mlen[cur + 1] = 1; + } + + if (cur > next_match_cur) { + next_match_cur = cur; + } + + unsigned long max_len = 3; + + // Look up first match for current position + // + // pos is the current root of the tree of strings with this + // hash. We are going to re-root the tree so cur becomes the + // new root. + // + const unsigned long hash = blz_hash4(&in[cur]); + unsigned long pos = lookup[hash]; + lookup[hash] = cur; + + blz_word *lt_node = &nodes[2 * cur]; + blz_word *gt_node = &nodes[2 * cur + 1]; + unsigned long lt_len = 0; + unsigned long gt_len = 0; + + assert(pos == NO_MATCH_POS || pos < cur); + + // If we are checking matches, allow lengths up to end of + // input, otherwise compare only up to accept_len + const unsigned long len_limit = cur == next_match_cur ? src_size - cur + : accept_len < src_size - cur ? accept_len + : src_size - cur; + unsigned long num_chain = max_depth; + + // Check matches + for (;;) { + // If at bottom of tree, mark leaf nodes + // + // In case we reached max_depth, this also prunes the + // subtree we have not searched yet and do not know + // where belongs. + // + if (pos == NO_MATCH_POS || num_chain-- == 0) { + *lt_node = NO_MATCH_POS; + *gt_node = NO_MATCH_POS; + + break; + } + + // The string at pos is lexicographically greater than + // a string that matched in the first lt_len positions, + // and less than a string that matched in the first + // gt_len positions, so it must match up to at least + // the minimum of these. + unsigned long len = lt_len < gt_len ? lt_len : gt_len; + + // Find match len + while (len < len_limit && in[pos + len] == in[cur + len]) { + ++len; + } + + // Extend current match if possible + // + // Note that we are checking matches in order from the + // closest and back. This means for a match further + // away, the encoding of all lengths up to the current + // max length will always be longer or equal, so we need + // only consider the extension. + // + if (cur == next_match_cur && len > max_len) { + for (unsigned long i = max_len + 1; i <= len; ++i) { + unsigned long match_cost = blz_match_cost(cur - pos - 1, i); + + assert(match_cost < BLZ_WORD_MAX - cost[cur]); + + unsigned long cost_there = cost[cur] + match_cost; + + if (cost_there < cost[cur + i]) { + cost[cur + i] = cost_there; + mpos[cur + i] = cur - pos - 1; + mlen[cur + i] = i; + } + } + + max_len = len; + + if (len >= accept_len) { + next_match_cur = cur + len; + } + } + + // If we reach maximum match length, the string at pos + // is equal to cur, so we can assign the left and right + // subtrees. + // + // This removes pos from the tree, but we added cur + // which is equal and closer for future matches. + // + if (len >= accept_len || len == len_limit) { + *lt_node = nodes[2 * pos]; + *gt_node = nodes[2 * pos + 1]; + + break; + } + + // Go to previous match and restructure tree + // + // lt_node points to a node that is going to contain + // elements lexicographically less than cur (the search + // string). + // + // If the string at pos is less than cur, we set that + // lt_node to pos. We know that all elements in the + // left subtree are less than pos, and thus less than + // cur, so we point lt_node at the right subtree of + // pos and continue our search there. + // + // The equivalent applies to gt_node when the string at + // pos is greater than cur. + // + if (in[pos + len] < in[cur + len]) { + *lt_node = pos; + lt_node = &nodes[2 * pos + 1]; + assert(*lt_node == NO_MATCH_POS || *lt_node < pos); + pos = *lt_node; + lt_len = len; + } + else { + *gt_node = pos; + gt_node = &nodes[2 * pos]; + assert(*gt_node == NO_MATCH_POS || *gt_node < pos); + pos = *gt_node; + gt_len = len; + } + } + } + + for (unsigned long cur = last_match_pos + 1; cur < src_size; ++cur) { + // Check literal + if (cost[cur + 1] > cost[cur] + 9) { + cost[cur + 1] = cost[cur] + 9; + mlen[cur + 1] = 1; + } + } + + // Phase 2: Follow lowest cost path backwards gathering tokens + unsigned long next_token = src_size; + + for (unsigned long cur = src_size; cur > 1; cur -= mlen[cur], --next_token) { + mlen[next_token] = mlen[cur]; + mpos[next_token] = mpos[cur]; + } + + // Phase 3: Output tokens + unsigned long cur = 1; + + for (unsigned long i = next_token + 1; i <= src_size; cur += mlen[i++]) { + if (mlen[i] == 1) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur]; + } + else { + const unsigned long offs = mpos[i]; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, mlen[i] - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + } + } + + // Return compressed size + return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst); +} + +#endif /* BRIEFLZ_BTPARSE_H_INCLUDED */ diff --git a/source/Core/brieflz/brieflz_hashbucket.h b/source/Core/brieflz/brieflz_hashbucket.h new file mode 100644 index 00000000..99441423 --- /dev/null +++ b/source/Core/brieflz/brieflz_hashbucket.h @@ -0,0 +1,262 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// Lazy parsing with multiple previous positions per hash +// +// Copyright (c) 2016-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#ifndef BRIEFLZ_HASHBUCKET_H_INCLUDED +#define BRIEFLZ_HASHBUCKET_H_INCLUDED + +static size_t +blz_hashbucket_workmem_size(size_t src_size, unsigned int bucket_size) +{ + (void) src_size; + + assert(bucket_size > 0); + assert(sizeof(bucket_size) < sizeof(size_t) + || bucket_size < SIZE_MAX / (LOOKUP_SIZE * sizeof(blz_word))); + + return (LOOKUP_SIZE * bucket_size) * sizeof(blz_word); +} + +// Lazy parsing with multiple previous positions per hash. +// +// Instead of storing only the previous position a given hash occured at, +// this stores the last bucket_size such positions in lookup. This means we +// can check each of these and choose the "best". +// +// There are multiple options for maintaining the entries of the buckets, we +// simply insert at the front to maintain the order of matches and avoid extra +// variables. This gives some overhead for moving elements, but as long as +// bucket_size is small and everything fits in a cache line it is pretty fast. +// +// If we find a match that is accept_len or longer, we stop searching. +// +static unsigned long +blz_pack_hashbucket(const void *src, void *dst, unsigned long src_size, void *workmem, + const unsigned int bucket_size, const unsigned long accept_len) +{ + struct blz_state bs; + blz_word *const lookup = (blz_word *) workmem; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + unsigned long hash_pos = 0; + unsigned long cur = 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + assert(bucket_size > 0); + assert(sizeof(bucket_size) < sizeof(unsigned long) + || bucket_size < ULONG_MAX / LOOKUP_SIZE); + + // Initialize lookup + for (unsigned long i = 0; i < LOOKUP_SIZE * bucket_size; ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Main compression loop + for (cur = 1; cur <= last_match_pos; ) { + // Update lookup up to current position + while (hash_pos < cur) { + blz_word *const bucket = &lookup[blz_hash4(&in[hash_pos]) * bucket_size]; + unsigned long next = hash_pos; + + // Insert hash_pos at start of bucket + for (unsigned int i = 0; i < bucket_size; ++i) { + unsigned long tmp = bucket[i]; + bucket[i] = next; + next = tmp; + } + + hash_pos++; + } + + unsigned long best_pos = NO_MATCH_POS; + unsigned long best_len = 0; + + // Look up first match for current position + const blz_word *const bucket = &lookup[blz_hash4(&in[cur]) * bucket_size]; + unsigned long pos = bucket[0]; + unsigned int bucket_idx = 0; + + const unsigned long len_limit = src_size - cur; + + // Check matches + while (pos != NO_MATCH_POS) { + unsigned long len = 0; + + // Check match + if (best_len < len_limit + && in[pos + best_len] == in[cur + best_len]) { + while (len < len_limit && in[pos + len] == in[cur + len]) { + ++len; + } + } + + // Update best match + if (blz_match_better(cur, pos, len, best_pos, best_len)) { + best_pos = pos; + best_len = len; + if (best_len >= accept_len) { + break; + } + } + + // Go to previous match + if (++bucket_idx == bucket_size) { + break; + } + pos = bucket[bucket_idx]; + } + + // Check if match at next position is better + if (best_len > 3 && best_len < accept_len && cur < last_match_pos) { + // Update lookup up to next position + { + blz_word *const next_bucket = &lookup[blz_hash4(&in[hash_pos]) * bucket_size]; + unsigned long next = hash_pos; + + // Insert hash_pos at start of bucket + for (unsigned int i = 0; i < bucket_size; ++i) { + unsigned long tmp = next_bucket[i]; + next_bucket[i] = next; + next = tmp; + } + + hash_pos++; + } + + // Look up first match for next position + const blz_word *const next_bucket = &lookup[blz_hash4(&in[cur + 1]) * bucket_size]; + unsigned long next_pos = next_bucket[0]; + unsigned int next_bucket_idx = 0; + + const unsigned long next_len_limit = src_size - (cur + 1); + + // Check matches + while (next_pos != NO_MATCH_POS) { + unsigned long next_len = 0; + + // Check match + if (best_len - 1 < next_len_limit + && in[next_pos + best_len - 1] == in[cur + 1 + best_len - 1]) { + while (next_len < next_len_limit + && in[next_pos + next_len] == in[cur + 1 + next_len]) { + ++next_len; + } + } + + if (next_len >= best_len) { + // Replace with next match if it extends backwards + if (next_pos > 0 && in[next_pos - 1] == in[cur]) { + if (blz_match_better(cur, next_pos - 1, next_len + 1, best_pos, best_len)) { + best_pos = next_pos - 1; + best_len = next_len + 1; + } + } + else { + // Drop current match if next match is better + if (blz_next_match_better(cur, next_pos, next_len, best_pos, best_len)) { + best_len = 0; + break; + } + } + } + + // Go to previous match + if (++next_bucket_idx == bucket_size) { + break; + } + next_pos = next_bucket[next_bucket_idx]; + } + } + + // Output match or literal + if (best_len > 4 || (best_len == 4 && cur - best_pos - 1 < 0x3FE00UL)) { + const unsigned long offs = cur - best_pos - 1; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, best_len - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + + cur += best_len; + } + else { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + } + + // Output any remaining literals + while (cur < src_size) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + + // Trailing one bit to delimit any literal tags + blz_putbit(&bs, 1); + + // Shift last tag into position and store + bs.tag <<= bs.bits_left; + bs.tag_out[0] = bs.tag & 0x00FF; + bs.tag_out[1] = (bs.tag >> 8) & 0x00FF; + + // Return compressed size + return (unsigned long) (bs.next_out - (unsigned char *) dst); +} + +#endif /* BRIEFLZ_HASHBUCKET_H_INCLUDED */ diff --git a/source/Core/brieflz/brieflz_lazy.h b/source/Core/brieflz/brieflz_lazy.h new file mode 100644 index 00000000..63a278eb --- /dev/null +++ b/source/Core/brieflz/brieflz_lazy.h @@ -0,0 +1,192 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// Lazy (non-greedy) parsing with one-byte-lookahead +// +// Copyright (c) 2016-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#ifndef BRIEFLZ_LAZY_H_INCLUDED +#define BRIEFLZ_LAZY_H_INCLUDED + +static size_t +blz_lazy_workmem_size(size_t src_size) +{ + (void) src_size; + + return LOOKUP_SIZE * sizeof(blz_word); +} + +// Lazy (non-greedy) parsing with one-byte-lookahead. +// +// Each time we find a match, we check if there is a better match at the next +// position, and if so encode a literal instead. +// +static unsigned long +blz_pack_lazy(const void *src, void *dst, unsigned long src_size, void *workmem) +{ + struct blz_state bs; + blz_word *const lookup = (blz_word *) workmem; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + unsigned long hash_pos = 0; + unsigned long cur = 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + // Initialize lookup + for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Main compression loop + for (cur = 1; cur <= last_match_pos; ) { + // Update lookup up to current position + while (hash_pos < cur) { + lookup[blz_hash4(&in[hash_pos])] = hash_pos; + hash_pos++; + } + + // Look up match for current position + unsigned long pos = lookup[blz_hash4(&in[cur])]; + unsigned long len = 0; + + // Check match + if (pos != NO_MATCH_POS) { + const unsigned long len_limit = src_size - cur; + + while (len < len_limit + && in[pos + len] == in[cur + len]) { + ++len; + } + } + + // Check if match at next position is better + if (len > 3 && cur < last_match_pos) { + // Update lookup up to next position + lookup[blz_hash4(&in[hash_pos])] = hash_pos; + hash_pos++; + + // Look up match for next position + const unsigned long next_pos = lookup[blz_hash4(&in[cur + 1])]; + unsigned long next_len = 0; + + // Check match + if (next_pos != NO_MATCH_POS && next_pos != pos + 1) { + const unsigned long next_len_limit = src_size - (cur + 1); + + // If last byte matches, so this has a chance to be a better match + if (len - 1 < next_len_limit + && in[next_pos + len - 1] == in[cur + 1 + len - 1]) { + while (next_len < next_len_limit + && in[next_pos + next_len] == in[cur + 1 + next_len]) { + ++next_len; + } + } + } + + if (next_len >= len) { + // Replace with next match if it extends backwards + if (next_pos > 0 && in[next_pos - 1] == in[cur]) { + if (blz_match_better(cur, next_pos - 1, next_len + 1, pos, len)) { + pos = next_pos - 1; + len = next_len + 1; + } + } + else { + // Drop current match if next match is better + if (blz_next_match_better(cur, next_pos, next_len, pos, len)) { + len = 0; + } + } + + } + } + + // Output match or literal + if (len > 4 || (len == 4 && cur - pos - 1 < 0x3FE00UL)) { + const unsigned long offs = cur - pos - 1; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, len - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + + cur += len; + } + else { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + } + + // Output any remaining literals + while (cur < src_size) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + + // Trailing one bit to delimit any literal tags + blz_putbit(&bs, 1); + + // Shift last tag into position and store + bs.tag <<= bs.bits_left; + bs.tag_out[0] = bs.tag & 0x00FF; + bs.tag_out[1] = (bs.tag >> 8) & 0x00FF; + + // Return compressed size + return (unsigned long) (bs.next_out - (unsigned char *) dst); +} + +#endif /* BRIEFLZ_LAZY_H_INCLUDED */ diff --git a/source/Core/brieflz/brieflz_leparse.h b/source/Core/brieflz/brieflz_leparse.h new file mode 100644 index 00000000..bb1ecd57 --- /dev/null +++ b/source/Core/brieflz/brieflz_leparse.h @@ -0,0 +1,256 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// Backwards dynamic programming parse with left-extension of matches +// +// Copyright (c) 2016-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#ifndef BRIEFLZ_LEPARSE_H_INCLUDED +#define BRIEFLZ_LEPARSE_H_INCLUDED + +static size_t +blz_leparse_workmem_size(size_t src_size) +{ + return (LOOKUP_SIZE < 2 * src_size ? 3 * src_size : src_size + LOOKUP_SIZE) + * sizeof(blz_word); +} + +// Backwards dynamic programming parse with left-extension of matches. +// +// Whenever we find a match that improves the cost at the current position, +// we try to extend this match to the left, and if possible we use that +// left-extension for each position to the left. Since we are processing +// the input from right to left, this matches repeated patterns without +// searching at each position. +// +// Essentially, this improves the worst case for the parsing at a small cost +// in ratio. The match finding is still O(n^2) in number of matches though, +// so may have to limit max_depth on larger block sizes. +// +// This is usually within a few percent of the "optimal" parse with the same +// parameters. +// +static unsigned long +blz_pack_leparse(const void *src, void *dst, unsigned long src_size, void *workmem, + const unsigned long max_depth, const unsigned long accept_len) +{ + struct blz_state bs; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + if (src_size < 4) { + for (unsigned long i = 1; i < src_size; ++i) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[i]; + } + + // Return compressed size + return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst); + } + + // With a bit of careful ordering we can fit in 3 * src_size words. + // + // The idea is that the lookup is only used in the first phase to + // build the hash chains, so we overlap it with mpos and mlen. + // Also, since we are using prev from right to left in phase two, + // and that is the order we fill in cost, we can overlap these. + // + // One detail is that we actually use src_size + 1 elements of cost, + // but we put mpos after it, where we do not need the first element. + // + blz_word *const prev = (blz_word *) workmem; + blz_word *const mpos = prev + src_size; + blz_word *const mlen = mpos + src_size; + blz_word *const cost = prev; + blz_word *const lookup = mpos; + + // Phase 1: Build hash chains + const int bits = 2 * src_size < LOOKUP_SIZE ? BLZ_HASH_BITS : blz_log2(src_size); + + // Initialize lookup + for (unsigned long i = 0; i < (1UL << bits); ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Build hash chains in prev + if (last_match_pos > 0) { + for (unsigned long i = 0; i <= last_match_pos; ++i) { + const unsigned long hash = blz_hash4_bits(&in[i], bits); + prev[i] = lookup[hash]; + lookup[hash] = i; + } + } + + // Initialize last three positions as literals + mlen[src_size - 3] = 1; + mlen[src_size - 2] = 1; + mlen[src_size - 1] = 1; + + cost[src_size - 3] = 27; + cost[src_size - 2] = 18; + cost[src_size - 1] = 9; + cost[src_size] = 0; + + // Phase 2: Find lowest cost path from each position to end + for (unsigned long cur = last_match_pos; cur > 0; --cur) { + // Since we updated prev to the end in the first phase, we + // do not need to hash, but can simply look up the previous + // position directly. + unsigned long pos = prev[cur]; + + assert(pos == NO_MATCH_POS || pos < cur); + + // Start with a literal + cost[cur] = cost[cur + 1] + 9; + mlen[cur] = 1; + + unsigned long max_len = 3; + + const unsigned long len_limit = src_size - cur; + unsigned long num_chain = max_depth; + + // Go through the chain of prev matches + for (; pos != NO_MATCH_POS && num_chain--; pos = prev[pos]) { + unsigned long len = 0; + + // If next byte matches, so this has a chance to be a longer match + if (max_len < len_limit && in[pos + max_len] == in[cur + max_len]) { + // Find match len + while (len < len_limit && in[pos + len] == in[cur + len]) { + ++len; + } + } + + // Extend current match if possible + // + // Note that we are checking matches in order from the + // closest and back. This means for a match further + // away, the encoding of all lengths up to the current + // max length will always be longer or equal, so we need + // only consider the extension. + if (len > max_len) { + unsigned long min_cost = ULONG_MAX; + unsigned long min_cost_len = 3; + + // Find lowest cost match length + for (unsigned long i = max_len + 1; i <= len; ++i) { + unsigned long match_cost = blz_match_cost(cur - pos - 1, i); + assert(match_cost < BLZ_WORD_MAX - cost[cur + i]); + unsigned long cost_here = match_cost + cost[cur + i]; + + if (cost_here < min_cost) { + min_cost = cost_here; + min_cost_len = i; + } + } + + max_len = len; + + // Update cost if cheaper + if (min_cost < cost[cur]) { + cost[cur] = min_cost; + mpos[cur] = pos; + mlen[cur] = min_cost_len; + + // Left-extend current match if possible + if (pos > 0 && in[pos - 1] == in[cur - 1]) { + do { + --cur; + --pos; + ++min_cost_len; + unsigned long match_cost = blz_match_cost(cur - pos - 1, min_cost_len); + assert(match_cost < BLZ_WORD_MAX - cost[cur + min_cost_len]); + unsigned long cost_here = match_cost + cost[cur + min_cost_len]; + cost[cur] = cost_here; + mpos[cur] = pos; + mlen[cur] = min_cost_len; + } while (pos > 0 && in[pos - 1] == in[cur - 1]); + break; + } + } + } + + if (len >= accept_len || len == len_limit) { + break; + } + } + } + + mpos[0] = 0; + mlen[0] = 1; + + // Phase 3: Output compressed data, following lowest cost path + for (unsigned long i = 1; i < src_size; i += mlen[i]) { + if (mlen[i] == 1) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[i]; + } + else { + const unsigned long offs = i - mpos[i] - 1; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, mlen[i] - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + } + } + + // Return compressed size + return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst); +} + +#endif /* BRIEFLZ_LEPARSE_H_INCLUDED */ diff --git a/source/Core/brieflz/depack.c b/source/Core/brieflz/depack.c new file mode 100644 index 00000000..b2324cfd --- /dev/null +++ b/source/Core/brieflz/depack.c @@ -0,0 +1,271 @@ +/* + * BriefLZ - small fast Lempel-Ziv + * + * C depacker + * + * Copyright (c) 2002-2018 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#include "brieflz.h" + +/* Internal data structure */ +struct blz_state { + const unsigned char *src; + unsigned char *dst; + unsigned int tag; + int bits_left; +}; + +#if !defined(BLZ_NO_LUT) +static const unsigned char blz_gamma_lookup[256][2] = { + /* 00xxxxxx = 2 */ + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + + /* 0100xxxx = 4 */ + {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, + {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, + + /* 010100xx = 8 */ + {8, 6}, {8, 6}, {8, 6}, {8, 6}, + + /* 01010100 = 16 01010101 = 16+ 01010110 = 17 01010111 = 17+ */ + {16, 8}, {16, 0}, {17, 8}, {17, 0}, + + /* 010110xx = 9 */ + {9, 6}, {9, 6}, {9, 6}, {9, 6}, + + /* 01011100 = 18 01011101 = 18+ 01011110 = 19 01011111 = 19+ */ + {18, 8}, {18, 0}, {19, 8}, {19, 0}, + + /* 0110xxxx = 5 */ + {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, + {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, + + /* 011100xx = 10 */ + {10, 6}, {10, 6}, {10, 6}, {10, 6}, + + /* 01110100 = 20 01110101 = 20+ 01110110 = 21 01110111 = 21+ */ + {20, 8}, {20, 0}, {21, 8}, {21, 0}, + + /* 011110xx = 11 */ + {11, 6}, {11, 6}, {11, 6}, {11, 6}, + + /* 01111100 = 22 01111101 = 22+ 01111110 = 23 01111111 = 23+ */ + {22, 8}, {22, 0}, {23, 8}, {23, 0}, + + /* 10xxxxxx = 3 */ + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + + /* 1100xxxx = 6 */ + {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, + {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, + + /* 110100xx = 12 */ + {12, 6}, {12, 6}, {12, 6}, {12, 6}, + + /* 11010100 = 24 11010101 = 24+ 11010110 = 25 11010111 = 25+ */ + {24, 8}, {24, 0}, {25, 8}, {25, 0}, + + /* 110110xx = 13 */ + {13, 6}, {13, 6}, {13, 6}, {13, 6}, + + /* 11011100 = 26 11011101 = 26+ 11011110 = 27 11011111 = 27+ */ + {26, 8}, {26, 0}, {27, 8}, {27, 0}, + + /* 1110xxxx = 7 */ + {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, + {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, + + /* 111100xx = 14 */ + {14, 6}, {14, 6}, {14, 6}, {14, 6}, + + /* 11110100 = 28 11110101 = 28+ 11110110 = 29 11110111 = 29+ */ + {28, 8}, {28, 0}, {29, 8}, {29, 0}, + + /* 111110xx = 15 */ + {15, 6}, {15, 6}, {15, 6}, {15, 6}, + + /* 11111100 = 30 11111101 = 30+ 11111110 = 31 11111111 = 31+ */ + {30, 8}, {30, 0}, {31, 8}, {31, 0} +}; +#endif + +static unsigned int +blz_getbit(struct blz_state *bs) +{ + unsigned int bit; + + /* Check if tag is empty */ + if (!bs->bits_left--) { + /* Load next tag */ + bs->tag = (unsigned int) bs->src[0] + | ((unsigned int) bs->src[1] << 8); + bs->src += 2; + bs->bits_left = 15; + } + + /* Shift bit out of tag */ + bit = (bs->tag & 0x8000) ? 1 : 0; + bs->tag <<= 1; + + return bit; +} + +static unsigned long +blz_getgamma(struct blz_state *bs) +{ + unsigned long result = 1; + +#if !defined(BLZ_NO_LUT) + /* Decode up to 8 bits of gamma2 code using lookup if possible */ + if (bs->bits_left >= 8) { + unsigned int top8 = (bs->tag >> 8) & 0x00FF; + int shift; + + result = blz_gamma_lookup[top8][0]; + shift = (int) blz_gamma_lookup[top8][1]; + + if (shift) { + bs->tag <<= shift; + bs->bits_left -= shift; + return result; + } + + bs->tag <<= 8; + bs->bits_left -= 8; + } +#endif + + /* Input gamma2-encoded bits */ + do { + result = (result << 1) + blz_getbit(bs); + } while (blz_getbit(bs)); + + return result; +} + +unsigned long +blz_depack(const void *src, void *dst, unsigned long depacked_size) +{ + struct blz_state bs; + unsigned long dst_size = 0; + + bs.src = (const unsigned char *) src; + bs.dst = (unsigned char *) dst; + + /* Initialise to one bit left in tag; that bit is zero (a literal) */ + bs.bits_left = 1; + bs.tag = 0x4000; + + /* Main decompression loop */ + while (dst_size < depacked_size) { + if (blz_getbit(&bs)) { + /* Input match length and offset */ + unsigned long len = blz_getgamma(&bs) + 2; + unsigned long off = blz_getgamma(&bs) - 2; + + off = (off << 8) + (unsigned long) *bs.src++ + 1; + + /* Copy match */ + { + const unsigned char *p = bs.dst - off; + unsigned long i; + + for (i = len; i > 0; --i) { + *bs.dst++ = *p++; + } + } + + dst_size += len; + } + else { + /* Copy literal */ + *bs.dst++ = *bs.src++; + + dst_size++; + } + } + + /* Return decompressed size */ + return dst_size; +} + +unsigned long +blz_depack_srcsize(const void *src, void *dst, unsigned long src_size) +{ + struct blz_state bs; + unsigned long dst_size = 0; + const unsigned char *src_end = src + src_size; + + bs.src = (const unsigned char *) src; + bs.dst = (unsigned char *) dst; + + /* Initialise to one bit left in tag; that bit is zero (a literal) */ + bs.bits_left = 1; + bs.tag = 0x4000; + + /* Main decompression loop */ + while (bs.src < src_end) { + if (blz_getbit(&bs)) { + /* Input match length and offset */ + unsigned long len = blz_getgamma(&bs) + 2; + unsigned long off = blz_getgamma(&bs) - 2; + + off = (off << 8) + (unsigned long) *bs.src++ + 1; + + /* Copy match */ + { + const unsigned char *p = bs.dst - off; + unsigned long i; + + for (i = len; i > 0; --i) { + *bs.dst++ = *p++; + } + } + + dst_size += len; + } + else { + /* Copy literal */ + *bs.dst++ = *bs.src++; + + dst_size++; + } + } + + /* Return decompressed size */ + return dst_size; +} diff --git a/source/Core/lzfx/README.md b/source/Core/lzfx/README.md deleted file mode 100644 index 82420cf0..00000000 --- a/source/Core/lzfx/README.md +++ /dev/null @@ -1,79 +0,0 @@ -This directory contains file originally by other people. - - -## Simplified LZFX-based compression library - -- `lzfx.c` -- `lzfx.h` - -The above files are obtained from https://github.com/janding/lzfx (commit -448017f). It is a fork by Jan Ding (GitHub user "janding") based on the LZFX -compression library by Andrew Collette. - -### License: - -``` -LZFX is copyright (c) 2009 Andrew Collette and subject to the BSD license -(below). Original LZF copyright statement follows. - -Copyright (c) 2000-2007 Marc Alexander Lehmann <[email protected]> - -Redistribution and use in source and binary forms, with or without modifica- -tion, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- -CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- -CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- -ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. -``` - - -## lzfx-boot - -- `lzfx-host-compress.c` (original: `lzfx-raw.c`) - -The above file is obtained from https://github.com/janding/lzfx-boot (commit -88b1596). - -### License: - -``` -BSD 2-Clause License - -Copyright (c) 2017, Jan Ding -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -``` diff --git a/source/Core/lzfx/lzfx.c b/source/Core/lzfx/lzfx.c deleted file mode 100644 index 81d1560a..00000000 --- a/source/Core/lzfx/lzfx.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com> - * http://lzfx.googlecode.com - * - * Implements an LZF-compatible compressor/decompressor based on the liblzf - * codebase written by Marc Lehmann. This code is released under the BSD - * license. License and original copyright statement follow. - * - * - * Copyright (c) 2000-2008 Marc Alexander Lehmann <[email protected]> - * - * Redistribution and use in source and binary forms, with or without modifica- - * tion, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- - * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- - * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- - * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "lzfx.h" - -#define LZFX_HSIZE (1 << (LZFX_HLOG)) - -/* We need this for memset */ -#ifdef __cplusplus -#include <cstring> -#else -#include <string.h> -#endif - -#if __GNUC__ >= 3 -#define fx_expect_false(expr) __builtin_expect((expr) != 0, 0) -#define fx_expect_true(expr) __builtin_expect((expr) != 0, 1) -#else -#define fx_expect_false(expr) (expr) -#define fx_expect_true(expr) (expr) -#endif - -typedef unsigned char u8; -typedef const u8 * LZSTATE[LZFX_HSIZE]; - -/* Define the hash function */ -#define LZFX_FRST(p) (((p[0]) << 8) | p[1]) -#define LZFX_NEXT(v, p) (((v) << 8) | p[2]) -#define LZFX_IDX(h) (((h >> (3 * 8 - LZFX_HLOG)) - h) & (LZFX_HSIZE - 1)) - -/* These cannot be changed, as they are related to the compressed format. */ -#define LZFX_MAX_LIT (1 << 5) - 1 -#define LZFX_MAX_OFF (1 << 13) -#define LZFX_MAX_REF ((1 << 8) + (1 << 3) - 2) - -static int lzfx_getsize(const void *ibuf, unsigned int ilen, unsigned int *olen); - -/* Compressed format - - There are two kinds of structures in LZF/LZFX: literal runs and back - references. Literals are encoded as follows: - - LLLLL000 <L bytes> - - Back references are encoded as follows. The smallest possible encoded - length value is 1, as otherwise the control byte would be recognized as - a literal run. At least three bytes must match for a back reference - to be inserted. The offset (distance to the desired data in the output - buffer) is encoded as o - 1, as all offsets are at least 1. The binary - format is: - - oooooLLL oooooooo for backrefs of real length < 7 (1 <= L < 7) - ooooo111 LLLLLLLL oooooooo for backrefs of real length >= 7 (L >= 7) -*/ -int lzfx_compress(const void *const ibuf, const unsigned int ilen, void *obuf, unsigned int *const olen) { - - /* Hash table; an array of u8*'s which point - to various locations in the input buffer */ - const u8 *htab[LZFX_HSIZE]; - - const u8 ** hslot; /* Pointer to entry in hash table */ - unsigned int hval; /* Hash value generated by macros above */ - const u8 * ref; /* Pointer to candidate match location in input */ - - const u8 * ip = (const u8 *)ibuf; - const u8 *const in_end = ip + ilen; - - u8 * op = (u8 *)obuf; - const u8 *const out_end = (olen == NULL ? NULL : op + *olen); - - int lit; /* # of bytes in current literal run */ - -#if defined(WIN32) && defined(_M_X64) - unsigned _int64 off; /* workaround for missing POSIX compliance */ -#else - unsigned long off; -#endif - - if (olen == NULL) - return LZFX_EARGS; - if (ibuf == NULL) { - if (ilen != 0) - return LZFX_EARGS; - *olen = 0; - return 0; - } - if (obuf == NULL) - return LZFX_EARGS; - - memset(htab, 0, sizeof(htab)); - - /* Start a literal run. Whenever we do this the output pointer is - advanced because the current byte will hold the encoded length. */ - lit = 0; - op++; - - hval = LZFX_FRST(ip); - - while (ip + 2 < in_end) { /* The NEXT macro reads 2 bytes ahead */ - - hval = LZFX_NEXT(hval, ip); - hslot = htab + LZFX_IDX(hval); - - ref = *hslot; - *hslot = ip; - - if (ref < ip && (off = ip - ref - 1) < LZFX_MAX_OFF && ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */ - && ref > (u8 *)ibuf && ref[0] == ip[0] && ref[1] == ip[1] && ref[2] == ip[2]) { - - unsigned int len = 3; /* We already know 3 bytes match */ - const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ? LZFX_MAX_REF : in_end - ip - 2; - - /* lit == 0: op + 3 must be < out_end (because we undo the run) - lit != 0: op + 3 + 1 must be < out_end */ - if (fx_expect_false(op - !lit + 3 + 1 >= out_end)) - return LZFX_ESIZE; - - op[-lit - 1] = lit << 3; /* Terminate literal run */ - op -= !lit; /* Undo run if length is zero */ - - /* Start checking at the fourth byte */ - while (len < maxlen && ref[len] == ip[len]) - len++; - - /* Format 1: [oooooLLL oooooooo] */ - if (len < 7) { - *op++ = ((off >> 8) << 3) + len; - *op++ = off; - - /* Format 2: [ooooo111 LLLLLLLL oooooooo] */ - } else { - *op++ = ((off >> 8) << 3) + 7; - *op++ = len - 7; - *op++ = off; - } - - lit = 0; - op++; - - ip += len - 1; /* ip = initial ip + #octets - 1 */ - - if (fx_expect_false(ip + 3 >= in_end)) { - ip++; /* Code following expects exit at bottom of loop */ - break; - } - - hval = LZFX_FRST(ip); - hval = LZFX_NEXT(hval, ip); - htab[LZFX_IDX(hval)] = ip; - - ip++; /* ip = initial ip + #octets */ - - } else { - /* Keep copying literal bytes */ - - if (fx_expect_false(op >= out_end)) - return LZFX_ESIZE; - - lit++; - *op++ = *ip++; - - if (fx_expect_false(lit == LZFX_MAX_LIT)) { - op[-lit - 1] = lit << 3; /* stop run */ - lit = 0; - op++; /* start run */ - } - - } /* if() found match in htab */ - - } /* while(ip < ilen -2) */ - - /* At most 3 bytes remain in input. We therefore need 4 bytes available - in the output buffer to store them (3 data + ctrl byte).*/ - if (op + 3 > out_end) - return LZFX_ESIZE; - - while (ip < in_end) { - - lit++; - *op++ = *ip++; - - if (fx_expect_false(lit == LZFX_MAX_LIT)) { - op[-lit - 1] = lit << 3; - lit = 0; - op++; - } - } - - op[-lit - 1] = lit << 3; - op -= !lit; - - *olen = op - (u8 *)obuf; - return 0; -} - -/* Decompressor */ -int lzfx_decompress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen) { - - u8 const * ip = (const u8 *)ibuf; - u8 const *const in_end = ip + ilen; - u8 * op = (u8 *)obuf; - u8 const *const out_end = (olen == NULL ? NULL : op + *olen); - - unsigned int remain_len = 0; - int rc; - - if (olen == NULL) - return LZFX_EARGS; - if (ibuf == NULL) { - if (ilen != 0) - return LZFX_EARGS; - *olen = 0; - return 0; - } - if (obuf == NULL) { - if (olen != 0) - return LZFX_EARGS; - return lzfx_getsize(ibuf, ilen, olen); - } - - do { - unsigned int ctrl = *ip++; - - /* Format LLLLL000: a literal byte string follows, of length L */ - if ((ctrl & 0x7) == 0) { - unsigned int len = ctrl >> 3; - - if (fx_expect_false(op + len > out_end)) { - --ip; /* Rewind to control byte */ - goto guess; - } - if (fx_expect_false(ip + len > in_end)) - return LZFX_ECORRUPT; - - do - *op++ = *ip++; - while (--len); - - /* Format #1 [oooooLLL oooooooo]: backref of length L+1 - ^^^^^ ^^^^^^^^ - A B - #2 [ooooo111 LLLLLLLL oooooooo] backref of length L+7 - ^^^^^ ^^^^^^^^ - A B - In both cases the location of the backref is computed from the - remaining part of the data as follows: - - location = op - A*256 - B - 1 - */ - } else { - - unsigned int len = ctrl & 0x7; - u8 * ref = op - ((ctrl >> 3) << 8) - 1; - - if (len == 7) - len += *ip++; /* i.e. format #2 */ - - if (fx_expect_false(op + len > out_end)) { - ip -= (len >= 7) ? 2 : 1; /* Rewind to control byte */ - goto guess; - } - if (fx_expect_false(ip >= in_end)) - return LZFX_ECORRUPT; - - ref -= *ip++; - - if (fx_expect_false(ref < (u8 *)obuf)) - return LZFX_ECORRUPT; - - do - *op++ = *ref++; - while (--len); - } - - } while (ip < in_end); - - *olen = op - (u8 *)obuf; - - return 0; - -guess: - rc = lzfx_getsize(ip, ilen - (ip - (u8 *)ibuf), &remain_len); - if (rc >= 0) - *olen = remain_len + (op - (u8 *)obuf); - return rc; -} - -/* Guess len. No parameters may be NULL; this is not checked. */ -static int lzfx_getsize(const void *ibuf, unsigned int ilen, unsigned int *olen) { - - u8 const * ip = (const u8 *)ibuf; - u8 const *const in_end = ip + ilen; - int tot_len = 0; - - while (ip < in_end) { - - unsigned int ctrl = *ip++; - - if ((ctrl & 0x7) == 0) { - - if (ip + (ctrl >> 3) > in_end) - return LZFX_ECORRUPT; - - tot_len += (ctrl >> 3); - ip += (ctrl >> 3); - - } else { - - unsigned int len = ctrl & 0x7; - - if (len == 7) { /* i.e. format #2 */ - len += *ip++; - } - - if (ip >= in_end) - return LZFX_ECORRUPT; - - ip++; /* skip the ref byte */ - - tot_len += len; - } - } - - *olen = tot_len; - - return 0; -} diff --git a/source/Core/lzfx/lzfx.h b/source/Core/lzfx/lzfx.h deleted file mode 100644 index 7b6e472e..00000000 --- a/source/Core/lzfx/lzfx.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com> - * http://lzfx.googlecode.com - * - * Implements an LZF-compatible compressor/decompressor based on the liblzf - * codebase written by Marc Lehmann. This code is released under the BSD - * license. License and original copyright statement follow. - * - * - * Copyright (c) 2000-2008 Marc Alexander Lehmann <[email protected]> - * - * Redistribution and use in source and binary forms, with or without modifica- - * tion, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- - * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- - * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- - * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef LZFX_H -#define LZFX_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* Documented behavior, including function signatures and error codes, - is guaranteed to remain unchanged for releases with the same major - version number. Releases of the same major version are also able - to read each other's output, although the output itself is not - guaranteed to be byte-for-byte identical. -*/ -#define LZFX_VERSION_MAJOR 0 -#define LZFX_VERSION_MINOR 1 -#define LZFX_VERSION_STRING "0.1" - -/* Hashtable size (2**LZFX_HLOG entries) */ -#ifndef LZFX_HLOG -#define LZFX_HLOG 16 -#endif - -/* Predefined errors. */ -#define LZFX_ESIZE -1 /* Output buffer too small */ -#define LZFX_ECORRUPT -2 /* Invalid data for decompression */ -#define LZFX_EARGS -3 /* Arguments invalid (NULL) */ - -/* Buffer-to buffer compression. - - Supply pre-allocated input and output buffers via ibuf and obuf, and - their size in bytes via ilen and olen. Buffers may not overlap. - - On success, the function returns a non-negative value and the argument - olen contains the compressed size in bytes. On failure, a negative - value is returned and olen is not modified. -*/ -int lzfx_compress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen); - -/* Buffer-to-buffer decompression. - - Supply pre-allocated input and output buffers via ibuf and obuf, and - their size in bytes via ilen and olen. Buffers may not overlap. - - On success, the function returns a non-negative value and the argument - olen contains the uncompressed size in bytes. On failure, a negative - value is returned. - - If the failure code is LZFX_ESIZE, olen contains the minimum buffer size - required to hold the decompressed data. Otherwise, olen is not modified. - - Supplying a zero *olen is a valid and supported strategy to determine the - required buffer size. This does not require decompression of the entire - stream and is consequently very fast. Argument obuf may be NULL in - this case only. -*/ -int lzfx_decompress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/source/Makefile b/source/Makefile index b86c3c88..c6a4ebad 100644 --- a/source/Makefile +++ b/source/Makefile @@ -12,6 +12,17 @@ endif ALL_LANGUAGES=BG CS DA DE EN ES FI FR HR HU IT JA_JP LT NL NL_BE NO PL PT RU SK SL SR_CYRL SR_LATN SV TR UK YUE_HK ZH_CN ZH_TW
+LANGUAGE_GROUP_CJK_LANGS=EN JA_JP YUE_HK ZH_TW ZH_CN
+LANGUAGE_GROUP_CJK_NAME=Chinese+Japanese
+
+LANGUAGE_GROUP_CYRILLIC_LANGS=EN BG RU SR_CYRL SR_LATN UK
+LANGUAGE_GROUP_CYRILLIC_NAME=Bulgarian+Russian+Serbian+Ukrainian
+
+LANGUAGE_GROUP_EUR_LANGS=EN $(filter-out $(LANGUAGE_GROUP_CJK_LANGS) $(LANGUAGE_GROUP_CYRILLIC_LANGS),$(ALL_LANGUAGES))
+LANGUAGE_GROUP_EUR_NAME=European
+
+LANGUAGE_GROUPS=CJK CYRILLIC EUR
+
# Defines for host tools
ifeq ($(HOST_CC),)
@@ -22,7 +33,7 @@ HOST_OUTPUT_DIR=Objects/host # Enumerate all of the include directories
APP_INC_DIR = ./Core/Inc
-LZFX_INC_DIR = ./Core/lzfx
+BRIEFLZ_INC_DIR = ./Core/brieflz
MINIWARE_INC_CMSIS_DEVICE = ./Core/BSP/Miniware/Vendor/CMSIS/Device/ST/STM32F1xx/Include
MINIWARE_CMSIS_CORE_INC_DIR = ./Core/BSP/Miniware/Vendor/CMSIS/Include
MINIWARE_HAL_INC_DIR = ./Core/BSP/Miniware/Vendor/STM32F1xx_HAL_Driver/Inc
@@ -42,16 +53,15 @@ PINE_NMSIS_INC_DIR = ./Core/BSP/Pine64/Vendor/NMSIS/Core/Include PINE_FREERTOS_PORT_INC_DIR = ./Core/BSP/Pine64/Vendor/OS/FreeRTOS/Source/portable/GCC
SOURCE_THREADS_DIR = ./Core/Threads
SOURCE_CORE_DIR = ./Core/Src
-SOURCE_LZFX_DIR = ./Core/lzfx
+SOURCE_BRIEFLZ_DIR = ./Core/brieflz
SOURCE_DRIVERS_DIR = ./Core/Drivers
INC_PD_DRIVERS_DIR = ./Core/Drivers/FUSB302
SOURCE_MIDDLEWARES_DIR = ./Middlewares
# Find-all's used for formatting
-ALL_INCLUDES = $(shell find ./Core -type f -name '*.h') \
- $(shell find ./Core -type f -name '*.hpp')
+ALL_INCLUDES = $(shell find ./Core -path $(BRIEFLZ_INC_DIR) -prune -false -o \( -type f \( -name '*.h' -o -name '*.hpp' \) \) )
+
+ALL_SOURCE = $(shell find ./Core -path $(SOURCE_BRIEFLZ_DIR) -prune -false -o \( -type f \( -name '*.c' -o -name '*.cpp' \) \) )
-ALL_SOURCE = $(shell find ./Core -type f -name '*.c') \
- $(shell find ./Core -type f -name '*.cpp')
# Device dependent settings
ifeq ($(model),$(filter $(model),$(ALL_MINIWARE_MODELS)))
$(info Building for Miniware )
@@ -110,7 +120,7 @@ DEV_CXXFLAGS= -MMD -MP -MF "$(@:%.o=%.d)" -MT "$@" endif
INCLUDES = -I$(APP_INC_DIR) \
- -I$(LZFX_INC_DIR) \
+ -I$(BRIEFLZ_INC_DIR) \
-I$(FRTOS_CMIS_INC_DIR) \
-I$(FRTOS_INC_DIR) \
-I$(DRIVER_INC_DIR) \
@@ -118,14 +128,13 @@ INCLUDES = -I$(APP_INC_DIR) \ -I$(THREADS_INC_DIR) \
-I$(INC_PD_DRIVERS_DIR) \
$(DEVICE_INCLUDES)
-
-TRANSLATION_FILES=$(wildcard ../../Translations/translation_*.json)
+
SOURCE := $(shell find $(SOURCE_THREADS_DIR) -type f -name '*.c') \
$(shell find $(SOURCE_CORE_DIR) -type f -name '*.c') \
$(shell find $(SOURCE_DRIVERS_DIR) -type f -name '*.c') \
$(shell find $(DEVICE_BSP_DIR) -type f -name '*.c') \
$(shell find $(SOURCE_MIDDLEWARES_DIR) -type f -name '*.c') \
-$(SOURCE_LZFX_DIR)/lzfx.c
+$(SOURCE_BRIEFLZ_DIR)/depack.c
SOURCE_CPP := $(shell find $(SOURCE_THREADS_DIR) -type f -name '*.cpp') \
$(shell find $(SOURCE_CORE_DIR) -type f -name '*.cpp') \
$(shell find $(SOURCE_DRIVERS_DIR) -type f -name '*.cpp') \
@@ -310,31 +319,37 @@ all: $(ALL_FIRMWARE_TARGETS) $(HEXFILE_DIR)/$(model)_%.elf : \
$(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
$(OUTPUT_DIR)/Core/Gen/Translation.%.o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \
Makefile $(LDSCRIPT)
@test -d $(@D) || mkdir -p $(@D)
@echo Linking $@
@$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
$(OUTPUT_DIR)/Core/Gen/Translation.$*.o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \
$(LIBS) $(LINKER_FLAGS) -o$@ -Wl,[email protected]
$(HEXFILE_DIR)/$(model)_string_compressed_%.elf : \
$(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
- $(OUTPUT_DIR)/Core/Gen/Translation_lzfx.%.o \
+ $(OUTPUT_DIR)/Core/Gen/Translation_brieflz.%.o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \
Makefile $(LDSCRIPT)
@test -d $(@D) || mkdir -p $(@D)
@echo Linking $@
@$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
- $(OUTPUT_DIR)/Core/Gen/Translation_lzfx.$*.o \
+ $(OUTPUT_DIR)/Core/Gen/Translation_brieflz.$*.o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \
$(LIBS) $(LINKER_FLAGS) -o$@ -Wl,[email protected]
$(HEXFILE_DIR)/$(model)_font_compressed_%.elf : \
$(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
- $(OUTPUT_DIR)/Core/Gen/Translation_lzfx_font.%.o \
+ $(OUTPUT_DIR)/Core/Gen/Translation_brieflz_font.%.o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \
Makefile $(LDSCRIPT)
@test -d $(@D) || mkdir -p $(@D)
@echo Linking $@
@$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
- $(OUTPUT_DIR)/Core/Gen/Translation_lzfx_font.$*.o \
+ $(OUTPUT_DIR)/Core/Gen/Translation_brieflz_font.$*.o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \
$(LIBS) $(LINKER_FLAGS) -o$@ -Wl,[email protected]
$(OUT_OBJS): $(OUTPUT_DIR)/%.o : %.c Makefile
@@ -374,38 +389,95 @@ $(OUTPUT_DIR)/Core/Gen/translation.files/%.o: Core/Gen/Translation.%.cpp @echo Generating $@
@$(CPP) -c $(filter-out -flto -g3,$(CXXFLAGS)) $< -o $@
-$(HOST_OUTPUT_DIR)/lzfx/liblzfx.so: Core/lzfx/lzfx.c
+$(OUTPUT_DIR)/Core/Gen/translation.files/multi.%.o: Core/Gen/Translation_multi.%.cpp
+ @test -d $(@D) || mkdir -p $(@D)
+ @echo Generating $@
+ @$(CPP) -c $(filter-out -flto -g3,$(CXXFLAGS)) $< -o $@
+
+$(HOST_OUTPUT_DIR)/brieflz/libbrieflz.so: Core/brieflz/brieflz.c Core/brieflz/depack.c
@test -d $(@D) || mkdir -p $(@D)
- @echo Building host lzfx shared library $@
- @$(HOST_CC) -Wno-unused-result -fPIC -shared -O $^ -o $@
-
-$(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.bin: $(OUTPUT_DIR)/Core/Gen/translation.files/%.o
- @echo Dumping translation strings data from $<
- @# Extract the raw strings data from the object file
- @$(OBJCOPY) -O binary -j .rodata._ZL18TranslationIndices $< $(@D)/$*.data.TranslationIndices.bin
- @test -s $(@D)/$*.data.TranslationIndices.bin || (rm $(@D)/$*.data.TranslationIndices.bin; echo 'ERROR: Output for .rodata._ZL18TranslationIndices is empty!' >&2; false)
- @$(OBJCOPY) -O binary -j .rodata._ZL22TranslationStringsData $< $(@D)/$*.data.TranslationStrings.bin
- @test -s $(@D)/$*.data.TranslationStrings.bin || (rm $(@D)/$*.data.TranslationStrings.bin; echo 'ERROR: Output for .rodata._ZL22TranslationStringsData is empty!' >&2; false)
- @cat $(@D)/$*.data.TranslationIndices.bin $(@D)/$*.data.TranslationStrings.bin > $@
-
-Core/Gen/Translation_lzfx.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.bin $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle $(HOST_OUTPUT_DIR)/lzfx/liblzfx.so
+ @echo Building host brieflz shared library $@
+ @$(HOST_CC) -fPIC -shared -DBLZ_DLL -DBLZ_DLL_EXPORTS -O $^ -o $@
+
+Core/Gen/Translation_brieflz.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.o $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle $(HOST_OUTPUT_DIR)/brieflz/libbrieflz.so
@test -d $(@D) || mkdir -p $(@D)
- @echo Generating lzfx compressed translation for $*
- @python3 ../Translations/make_translation.py \
- -o $(PWD)/Core/Gen/Translation_lzfx.$*.cpp \
+ @echo Generating BriefLZ compressed translation for $*
+ @OBJCOPY=$(OBJCOPY) python3 ../Translations/make_translation.py \
+ -o $(PWD)/Core/Gen/Translation_brieflz.$*.cpp \
--input-pickled $(OUTPUT_DIR)/Core/Gen/translation.files/$*.pickle \
- --strings-bin $(OUTPUT_DIR)/Core/Gen/translation.files/$*.strings.bin \
+ --strings-obj $(OUTPUT_DIR)/Core/Gen/translation.files/$*.o \
$*
-Core/Gen/Translation_lzfx_font.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle $(HOST_OUTPUT_DIR)/lzfx/liblzfx.so
+Core/Gen/Translation_brieflz_font.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle $(HOST_OUTPUT_DIR)/brieflz/libbrieflz.so
@test -d $(@D) || mkdir -p $(@D)
- @echo Generating lzfx compressed translation for $*
+ @echo Generating BriefLZ compressed translation for $*
@python3 ../Translations/make_translation.py \
- -o $(PWD)/Core/Gen/Translation_lzfx_font.$*.cpp \
+ -o $(PWD)/Core/Gen/Translation_brieflz_font.$*.cpp \
--input-pickled $(OUTPUT_DIR)/Core/Gen/translation.files/$*.pickle \
--compress-font \
$*
+#
+# The recipes to produce multi-language firmwares:
+#
+
+# Usage: $(eval $(call multi_lang_rule,$(1)=group_code,$(2)=group_name,$(3)=lang_codes))
+define multi_lang_rule
+
+$(HEXFILE_DIR)/$(model)_multi_$(2).elf : \
+ $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
+ $(OUTPUT_DIR)/Core/Gen/Translation_multi.$(1).o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_multi.o \
+ Makefile $(LDSCRIPT)
+ @test -d $$(@D) || mkdir -p $$(@D)
+ @echo Linking $$@
+ @$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
+ $(OUTPUT_DIR)/Core/Gen/Translation_multi.$(1).o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_multi.o \
+ $(LIBS) $(LINKER_FLAGS) -o$$@ -Wl,[email protected]
+
+$(HEXFILE_DIR)/$(model)_multi_compressed_$(2).elf : \
+ $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
+ $(OUTPUT_DIR)/Core/Gen/Translation_brieflz_multi.$(1).o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_multi.o \
+ Makefile $(LDSCRIPT)
+ @test -d $$(@D) || mkdir -p $$(@D)
+ @echo Linking $$@
+ @$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \
+ $(OUTPUT_DIR)/Core/Gen/Translation_brieflz_multi.$(1).o \
+ $(OUTPUT_DIR)/Core/LangSupport/lang_multi.o \
+ $(LIBS) $(LINKER_FLAGS) -o$$@ -Wl,[email protected]
+
+Core/Gen/Translation_multi.$(1).cpp: $(patsubst %,../Translations/translation_%.json,$(3)) \
+ ../Translations/make_translation.py \
+ ../Translations/translations_def.js \
+ ../Translations/font_tables.py \
+ Makefile ../Translations/wqy-bitmapsong/wenquanyi_9pt.bdf
+ @test -d Core/Gen || mkdir -p Core/Gen
+ @test -d $(OUTPUT_DIR)/Core/Gen/translation.files || mkdir -p $(OUTPUT_DIR)/Core/Gen/translation.files
+ @echo 'Generating translations for multi-language $(2)'
+ @python3 ../Translations/make_translation.py \
+ -o $(PWD)/Core/Gen/Translation_multi.$(1).cpp \
+ --output-pickled $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).pickle \
+ $(3)
+
+$(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).pickle: Core/Gen/Translation_multi.$(1).cpp
+
+Core/Gen/Translation_brieflz_multi.$(1).cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).o $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).pickle $(HOST_OUTPUT_DIR)/brieflz/libbrieflz.so
+ @test -d $$(@D) || mkdir -p $$(@D)
+ @echo Generating BriefLZ compressed translation for multi-language $(2)
+ @OBJCOPY=$(OBJCOPY) python3 ../Translations/make_translation.py \
+ -o $(PWD)/Core/Gen/Translation_brieflz_multi.$(1).cpp \
+ --input-pickled $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).pickle \
+ --strings-obj $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).o \
+ --compress-font \
+ $(3)
+
+endef # multi_lang_rule
+
+
+# Add multi-language firmware rules:
+$(foreach group_code,$(LANGUAGE_GROUPS),$(eval $(call multi_lang_rule,$(group_code),$(LANGUAGE_GROUP_$(group_code)_NAME),$(LANGUAGE_GROUP_$(group_code)_LANGS))))
clean :
|