rendello · March 11, 2025 21:53 · rendello · Nov 1, 2024
diff --git a/_utf8_case_data.rs b/_utf8_case_data.rs
 /*
 Copyright (c) 2024 Rendello

 Permission to use, copy, modify, and/or distribute this software for any
 purpose with or without fee is hereby granted.

 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
 INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
 OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 PERFORMANCE OF THIS SOFTWARE.
 */

 // ==========================================================================
 //! Unicode codepoints that expand or contract when case is changed in UTF-8.
 // ==========================================================================

 pub const LOWERCASING_CONTRACTS: [&str; 22] = [
    "ẞ",	/* ß	(3->2), -1 bytes */
    "Ω",	/* ω	(3->2), -1 bytes */
    "Å",	/* å	(3->2), -1 bytes */
    "Ɫ",	/* ɫ	(3->2), -1 bytes */
    "Ɽ",	/* ɽ	(3->2), -1 bytes */
    "Ɑ",	/* ɑ	(3->2), -1 bytes */
    "Ɱ",	/* ɱ	(3->2), -1 bytes */
    "Ɐ",	/* ɐ	(3->2), -1 bytes */
    "Ɒ",	/* ɒ	(3->2), -1 bytes */
    "Ȿ",	/* ȿ	(3->2), -1 bytes */
    "Ɀ",	/* ɀ	(3->2), -1 bytes */
    "Ɥ",	/* ɥ	(3->2), -1 bytes */
    "Ɦ",	/* ɦ	(3->2), -1 bytes */
    "Ɜ",	/* ɜ	(3->2), -1 bytes */
    "Ɡ",	/* ɡ	(3->2), -1 bytes */
    "Ɬ",	/* ɬ	(3->2), -1 bytes */
    "Ɪ",	/* ɪ	(3->2), -1 bytes */
    "Ʞ",	/* ʞ	(3->2), -1 bytes */
    "Ʇ",	/* ʇ	(3->2), -1 bytes */
    "Ʝ",	/* ʝ	(3->2), -1 bytes */
    "Ʂ",	/* ʂ	(3->2), -1 bytes */
    "K",	/* k	(3->1), -2 bytes */
 ];

 pub const LOWERCASING_EXPANDS: [&str; 2] = [
    "Ⱥ",	/* ⱥ	(2->3), +1 bytes */
    "Ⱦ",	/* ⱦ	(2->3), +1 bytes */
 ];

 pub const LOWERCASING_EXPANDS_MULTI_CHAR: [&str; 1] = [
    "İ",	/* i̇	(2->3), +1 bytes, +1 chars */
 ];

 pub const UPPERCASING_CONTRACTS: [&str; 13] = [
    "ı",	/* I	(2->1), -1 bytes */
    "ſ",	/* S	(2->1), -1 bytes */
    "ᲀ",	/* В	(3->2), -1 bytes */
    "ᲁ",	/* Д	(3->2), -1 bytes */
    "ᲂ",	/* О	(3->2), -1 bytes */
    "ᲃ",	/* С	(3->2), -1 bytes */
    "ᲄ",	/* Т	(3->2), -1 bytes */
    "ᲅ",	/* Т	(3->2), -1 bytes */
    "ᲆ",	/* Ъ	(3->2), -1 bytes */
    "ᲇ",	/* Ѣ	(3->2), -1 bytes */
    "ι",	/* Ι	(3->2), -1 bytes */
    "ⱥ",	/* Ⱥ	(3->2), -1 bytes */
    "ⱦ",	/* Ⱦ	(3->2), -1 bytes */
 ];

 pub const UPPERCASING_CONTRACTS_MULTI_CHAR: [&str; 5] = [
    "ﬀ",	/* FF	(3->2), -1 bytes, +1 chars */
    "ﬁ",	/* FI	(3->2), -1 bytes, +1 chars */
    "ﬂ",	/* FL	(3->2), -1 bytes, +1 chars */
    "ﬅ",	/* ST	(3->2), -1 bytes, +1 chars */
    "ﬆ",	/* ST	(3->2), -1 bytes, +1 chars */
 ];

 pub const UPPERCASING_EXPANDS: [&str; 18] = [
    "ȿ",	/* Ȿ	(2->3), +1 bytes */
    "ɀ",	/* Ɀ	(2->3), +1 bytes */
    "ɐ",	/* Ɐ	(2->3), +1 bytes */
    "ɑ",	/* Ɑ	(2->3), +1 bytes */
    "ɒ",	/* Ɒ	(2->3), +1 bytes */
    "ɜ",	/* Ɜ	(2->3), +1 bytes */
    "ɡ",	/* Ɡ	(2->3), +1 bytes */
    "ɥ",	/* Ɥ	(2->3), +1 bytes */
    "ɦ",	/* Ɦ	(2->3), +1 bytes */
    "ɪ",	/* Ɪ	(2->3), +1 bytes */
    "ɫ",	/* Ɫ	(2->3), +1 bytes */
    "ɬ",	/* Ɬ	(2->3), +1 bytes */
    "ɱ",	/* Ɱ	(2->3), +1 bytes */
    "ɽ",	/* Ɽ	(2->3), +1 bytes */
    "ʂ",	/* Ʂ	(2->3), +1 bytes */
    "ʇ",	/* Ʇ	(2->3), +1 bytes */
    "ʝ",	/* Ʝ	(2->3), +1 bytes */
    "ʞ",	/* Ʞ	(2->3), +1 bytes */
 ];

 pub const UPPERCASING_EXPANDS_MULTI_CHAR: [&str; 89] = [
    "ΐ",	/* Ϊ́	(2->6), +4 bytes, +2 chars */
    "ΰ",	/* Ϋ́	(2->6), +4 bytes, +2 chars */
    "ὒ",	/* Υ̓̀	(3->6), +3 bytes, +2 chars */
    "ὔ",	/* Υ̓́	(3->6), +3 bytes, +2 chars */
    "ὖ",	/* Υ̓͂	(3->6), +3 bytes, +2 chars */
    "ᾷ",	/* Α͂Ι	(3->6), +3 bytes, +2 chars */
    "ῇ",	/* Η͂Ι	(3->6), +3 bytes, +2 chars */
    "ῒ",	/* Ϊ̀	(3->6), +3 bytes, +2 chars */
    "ΐ",	/* Ϊ́	(3->6), +3 bytes, +2 chars */
    "ῗ",	/* Ϊ͂	(3->6), +3 bytes, +2 chars */
    "ῢ",	/* Ϋ̀	(3->6), +3 bytes, +2 chars */
    "ΰ",	/* Ϋ́	(3->6), +3 bytes, +2 chars */
    "ῧ",	/* Ϋ͂	(3->6), +3 bytes, +2 chars */
    "ῷ",	/* Ω͂Ι	(3->6), +3 bytes, +2 chars */
    "և",	/* ԵՒ	(2->4), +2 bytes, +1 chars */
    "ᾀ",	/* ἈΙ	(3->5), +2 bytes, +1 chars */
    "ᾁ",	/* ἉΙ	(3->5), +2 bytes, +1 chars */
    "ᾂ",	/* ἊΙ	(3->5), +2 bytes, +1 chars */
    "ᾃ",	/* ἋΙ	(3->5), +2 bytes, +1 chars */
    "ᾄ",	/* ἌΙ	(3->5), +2 bytes, +1 chars */
    "ᾅ",	/* ἍΙ	(3->5), +2 bytes, +1 chars */
    "ᾆ",	/* ἎΙ	(3->5), +2 bytes, +1 chars */
    "ᾇ",	/* ἏΙ	(3->5), +2 bytes, +1 chars */
    "ᾈ",	/* ἈΙ	(3->5), +2 bytes, +1 chars */
    "ᾉ",	/* ἉΙ	(3->5), +2 bytes, +1 chars */
    "ᾊ",	/* ἊΙ	(3->5), +2 bytes, +1 chars */
    "ᾋ",	/* ἋΙ	(3->5), +2 bytes, +1 chars */
    "ᾌ",	/* ἌΙ	(3->5), +2 bytes, +1 chars */
    "ᾍ",	/* ἍΙ	(3->5), +2 bytes, +1 chars */
    "ᾎ",	/* ἎΙ	(3->5), +2 bytes, +1 chars */
    "ᾏ",	/* ἏΙ	(3->5), +2 bytes, +1 chars */
    "ᾐ",	/* ἨΙ	(3->5), +2 bytes, +1 chars */
    "ᾑ",	/* ἩΙ	(3->5), +2 bytes, +1 chars */
    "ᾒ",	/* ἪΙ	(3->5), +2 bytes, +1 chars */
    "ᾓ",	/* ἫΙ	(3->5), +2 bytes, +1 chars */
    "ᾔ",	/* ἬΙ	(3->5), +2 bytes, +1 chars */
    "ᾕ",	/* ἭΙ	(3->5), +2 bytes, +1 chars */
    "ᾖ",	/* ἮΙ	(3->5), +2 bytes, +1 chars */
    "ᾗ",	/* ἯΙ	(3->5), +2 bytes, +1 chars */
    "ᾘ",	/* ἨΙ	(3->5), +2 bytes, +1 chars */
    "ᾙ",	/* ἩΙ	(3->5), +2 bytes, +1 chars */
    "ᾚ",	/* ἪΙ	(3->5), +2 bytes, +1 chars */
    "ᾛ",	/* ἫΙ	(3->5), +2 bytes, +1 chars */
    "ᾜ",	/* ἬΙ	(3->5), +2 bytes, +1 chars */
    "ᾝ",	/* ἭΙ	(3->5), +2 bytes, +1 chars */
    "ᾞ",	/* ἮΙ	(3->5), +2 bytes, +1 chars */
    "ᾟ",	/* ἯΙ	(3->5), +2 bytes, +1 chars */
    "ᾠ",	/* ὨΙ	(3->5), +2 bytes, +1 chars */
    "ᾡ",	/* ὩΙ	(3->5), +2 bytes, +1 chars */
    "ᾢ",	/* ὪΙ	(3->5), +2 bytes, +1 chars */
    "ᾣ",	/* ὫΙ	(3->5), +2 bytes, +1 chars */
    "ᾤ",	/* ὬΙ	(3->5), +2 bytes, +1 chars */
    "ᾥ",	/* ὭΙ	(3->5), +2 bytes, +1 chars */
    "ᾦ",	/* ὮΙ	(3->5), +2 bytes, +1 chars */
    "ᾧ",	/* ὯΙ	(3->5), +2 bytes, +1 chars */
    "ᾨ",	/* ὨΙ	(3->5), +2 bytes, +1 chars */
    "ᾩ",	/* ὩΙ	(3->5), +2 bytes, +1 chars */
    "ᾪ",	/* ὪΙ	(3->5), +2 bytes, +1 chars */
    "ᾫ",	/* ὫΙ	(3->5), +2 bytes, +1 chars */
    "ᾬ",	/* ὬΙ	(3->5), +2 bytes, +1 chars */
    "ᾭ",	/* ὭΙ	(3->5), +2 bytes, +1 chars */
    "ᾮ",	/* ὮΙ	(3->5), +2 bytes, +1 chars */
    "ᾯ",	/* ὯΙ	(3->5), +2 bytes, +1 chars */
    "ᾲ",	/* ᾺΙ	(3->5), +2 bytes, +1 chars */
    "ῂ",	/* ῊΙ	(3->5), +2 bytes, +1 chars */
    "ῲ",	/* ῺΙ	(3->5), +2 bytes, +1 chars */
    "ŉ",	/* ʼN	(2->3), +1 bytes, +1 chars */
    "ǰ",	/* J̌	(2->3), +1 bytes, +1 chars */
    "ὐ",	/* Υ̓	(3->4), +1 bytes, +1 chars */
    "ᾳ",	/* ΑΙ	(3->4), +1 bytes, +1 chars */
    "ᾴ",	/* ΆΙ	(3->4), +1 bytes, +1 chars */
    "ᾶ",	/* Α͂	(3->4), +1 bytes, +1 chars */
    "ᾼ",	/* ΑΙ	(3->4), +1 bytes, +1 chars */
    "ῃ",	/* ΗΙ	(3->4), +1 bytes, +1 chars */
    "ῄ",	/* ΉΙ	(3->4), +1 bytes, +1 chars */
    "ῆ",	/* Η͂	(3->4), +1 bytes, +1 chars */
    "ῌ",	/* ΗΙ	(3->4), +1 bytes, +1 chars */
    "ῖ",	/* Ι͂	(3->4), +1 bytes, +1 chars */
    "ῤ",	/* Ρ̓	(3->4), +1 bytes, +1 chars */
    "ῦ",	/* Υ͂	(3->4), +1 bytes, +1 chars */
    "ῳ",	/* ΩΙ	(3->4), +1 bytes, +1 chars */
    "ῴ",	/* ΏΙ	(3->4), +1 bytes, +1 chars */
    "ῶ",	/* Ω͂	(3->4), +1 bytes, +1 chars */
    "ῼ",	/* ΩΙ	(3->4), +1 bytes, +1 chars */
    "ﬓ",	/* ՄՆ	(3->4), +1 bytes, +1 chars */
    "ﬔ",	/* ՄԵ	(3->4), +1 bytes, +1 chars */
    "ﬕ",	/* ՄԻ	(3->4), +1 bytes, +1 chars */
    "ﬖ",	/* ՎՆ	(3->4), +1 bytes, +1 chars */
    "ﬗ",	/* ՄԽ	(3->4), +1 bytes, +1 chars */
 ];
diff --git a/generate_utf8.py b/generate_utf8.py
 """
 Copyright (c) 2024 Rendello

 Permission to use, copy, modify, and/or distribute this software for any
 purpose with or without fee is hereby granted.

 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
 INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
 OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 PERFORMANCE OF THIS SOFTWARE.
 """

 import sys
 from dataclasses import dataclass
 from typing import List, Dict

 @dataclass
 class Entry:
    a: str
    b: str
    a_len: int
    b_len: int
    delta: int
    a_char_count: int
    b_char_count: int
    delta_char_count: int


 def sort_entries(l: List[Entry]) -> List[Entry]:
    """ Sorted by size delta, then alphabetically. """
    return sorted(l, key=
        lambda p: (-(p.delta_char_count), -(p.delta), p.a))


 def create_entry_map() -> dict[str, list[Entry]]:
    entry_map = {}
    for i in range(sys.maxunicode + 1):
        a = chr(i)

        for (case, b) in (('uppercasing', a.upper()), ('lowercasing', a.lower())):
            attributes = [case]

            try:
                a_len = len(a.encode("utf8"))
                b_len = len(b.encode("utf8"))
            except UnicodeEncodeError:
                continue

            if a_len == b_len:
                continue

            delta = b_len - a_len

            a_char_count = len(a)
            b_char_count = len(b)
            delta_char_count = b_char_count - a_char_count

            if a_len < b_len:
                attributes.append('expands')
            elif a_len > b_len:
                attributes.append('contracts')

            if b_char_count > 1:
                attributes.append('multi_char')

            key = "_".join(attributes)
            value = Entry(a, b, a_len, b_len, delta, a_char_count, b_char_count, delta_char_count)

            if key not in entry_map:
                entry_map[key] = [value]
            else:
                entry_map[key].append(value)

    return entry_map


 def entry_map_to_string(entry_map: Dict[str, List[Entry]]) -> str:
    buffer = (
        f'''// =======================================================================\n'''
        f'''//! Automatically generated using `task generate-utf8-case-data`.\n//!\n'''
        f'''//! Unicode characters that behave oddly when the case is changed, for use\n'''
        f'''//! with property tests.\n'''
        f'''// =======================================================================\n\n'''
    )
    for key, unsorted_entries in sorted(list(entry_map.items())):
        entries = sort_entries(unsorted_entries)

        buffer += f'pub const {key.upper()}: [&str; {len(entries)}] = [\n'
        for e in entries:

            ds = ""
            if e.delta_char_count != 0:
                ds = f", {e.delta_char_count:+} chars"

            buffer += f'    "{e.a}",\t/* {e.b}\t({e.a_len}->{e.b_len}), {e.delta:+} bytes{ds} */\n'
        buffer += "];\n\n"
    return buffer.strip()


 def generate_utf8_case_data():
    return entry_map_to_string(create_entry_map())
	/*
	Copyright (c) 2024 Rendello

	Permission to use, copy, modify, and/or distribute this software for any
	purpose with or without fee is hereby granted.

	THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
	REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
	AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
	INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
	LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
	OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
	PERFORMANCE OF THIS SOFTWARE.
	*/

	// ==========================================================================
	//! Unicode codepoints that expand or contract when case is changed in UTF-8.
	// ==========================================================================

	pub const LOWERCASING_CONTRACTS: [&str; 22] = [
	"ẞ", /* ß (3->2), -1 bytes */
	"Ω", /* ω (3->2), -1 bytes */
	"Å", /* å (3->2), -1 bytes */
	"Ɫ", /* ɫ (3->2), -1 bytes */
	"Ɽ", /* ɽ (3->2), -1 bytes */
	"Ɑ", /* ɑ (3->2), -1 bytes */
	"Ɱ", /* ɱ (3->2), -1 bytes */
	"Ɐ", /* ɐ (3->2), -1 bytes */
	"Ɒ", /* ɒ (3->2), -1 bytes */
	"Ȿ", /* ȿ (3->2), -1 bytes */
	"Ɀ", /* ɀ (3->2), -1 bytes */
	"Ɥ", /* ɥ (3->2), -1 bytes */
	"Ɦ", /* ɦ (3->2), -1 bytes */
	"Ɜ", /* ɜ (3->2), -1 bytes */
	"Ɡ", /* ɡ (3->2), -1 bytes */
	"Ɬ", /* ɬ (3->2), -1 bytes */
	"Ɪ", /* ɪ (3->2), -1 bytes */
	"Ʞ", /* ʞ (3->2), -1 bytes */
	"Ʇ", /* ʇ (3->2), -1 bytes */
	"Ʝ", /* ʝ (3->2), -1 bytes */
	"Ʂ", /* ʂ (3->2), -1 bytes */
	"K", /* k (3->1), -2 bytes */
	];

	pub const LOWERCASING_EXPANDS: [&str; 2] = [
	"Ⱥ", /* ⱥ (2->3), +1 bytes */
	"Ⱦ", /* ⱦ (2->3), +1 bytes */
	];

	pub const LOWERCASING_EXPANDS_MULTI_CHAR: [&str; 1] = [
	"İ", /* i̇ (2->3), +1 bytes, +1 chars */
	];

	pub const UPPERCASING_CONTRACTS: [&str; 13] = [
	"ı", /* I (2->1), -1 bytes */
	"ſ", /* S (2->1), -1 bytes */
	"ᲀ", /* В (3->2), -1 bytes */
	"ᲁ", /* Д (3->2), -1 bytes */
	"ᲂ", /* О (3->2), -1 bytes */
	"ᲃ", /* С (3->2), -1 bytes */
	"ᲄ", /* Т (3->2), -1 bytes */
	"ᲅ", /* Т (3->2), -1 bytes */
	"ᲆ", /* Ъ (3->2), -1 bytes */
	"ᲇ", /* Ѣ (3->2), -1 bytes */
	"ι", /* Ι (3->2), -1 bytes */
	"ⱥ", /* Ⱥ (3->2), -1 bytes */
	"ⱦ", /* Ⱦ (3->2), -1 bytes */
	];

	pub const UPPERCASING_CONTRACTS_MULTI_CHAR: [&str; 5] = [
	"ﬀ", /* FF (3->2), -1 bytes, +1 chars */
	"ﬁ", /* FI (3->2), -1 bytes, +1 chars */
	"ﬂ", /* FL (3->2), -1 bytes, +1 chars */
	"ﬅ", /* ST (3->2), -1 bytes, +1 chars */
	"ﬆ", /* ST (3->2), -1 bytes, +1 chars */
	];

	pub const UPPERCASING_EXPANDS: [&str; 18] = [
	"ȿ", /* Ȿ (2->3), +1 bytes */
	"ɀ", /* Ɀ (2->3), +1 bytes */
	"ɐ", /* Ɐ (2->3), +1 bytes */
	"ɑ", /* Ɑ (2->3), +1 bytes */
	"ɒ", /* Ɒ (2->3), +1 bytes */
	"ɜ", /* Ɜ (2->3), +1 bytes */
	"ɡ", /* Ɡ (2->3), +1 bytes */
	"ɥ", /* Ɥ (2->3), +1 bytes */
	"ɦ", /* Ɦ (2->3), +1 bytes */
	"ɪ", /* Ɪ (2->3), +1 bytes */
	"ɫ", /* Ɫ (2->3), +1 bytes */
	"ɬ", /* Ɬ (2->3), +1 bytes */
	"ɱ", /* Ɱ (2->3), +1 bytes */
	"ɽ", /* Ɽ (2->3), +1 bytes */
	"ʂ", /* Ʂ (2->3), +1 bytes */
	"ʇ", /* Ʇ (2->3), +1 bytes */
	"ʝ", /* Ʝ (2->3), +1 bytes */
	"ʞ", /* Ʞ (2->3), +1 bytes */
	];

	pub const UPPERCASING_EXPANDS_MULTI_CHAR: [&str; 89] = [
	"ΐ", /* Ϊ́ (2->6), +4 bytes, +2 chars */
	"ΰ", /* Ϋ́ (2->6), +4 bytes, +2 chars */
	"ὒ", /* Υ̓̀ (3->6), +3 bytes, +2 chars */
	"ὔ", /* Υ̓́ (3->6), +3 bytes, +2 chars */
	"ὖ", /* Υ̓͂ (3->6), +3 bytes, +2 chars */
	"ᾷ", /* Α͂Ι (3->6), +3 bytes, +2 chars */
	"ῇ", /* Η͂Ι (3->6), +3 bytes, +2 chars */
	"ῒ", /* Ϊ̀ (3->6), +3 bytes, +2 chars */
	"ΐ", /* Ϊ́ (3->6), +3 bytes, +2 chars */
	"ῗ", /* Ϊ͂ (3->6), +3 bytes, +2 chars */
	"ῢ", /* Ϋ̀ (3->6), +3 bytes, +2 chars */
	"ΰ", /* Ϋ́ (3->6), +3 bytes, +2 chars */
	"ῧ", /* Ϋ͂ (3->6), +3 bytes, +2 chars */
	"ῷ", /* Ω͂Ι (3->6), +3 bytes, +2 chars */
	"և", /* ԵՒ (2->4), +2 bytes, +1 chars */
	"ᾀ", /* ἈΙ (3->5), +2 bytes, +1 chars */
	"ᾁ", /* ἉΙ (3->5), +2 bytes, +1 chars */
	"ᾂ", /* ἊΙ (3->5), +2 bytes, +1 chars */
	"ᾃ", /* ἋΙ (3->5), +2 bytes, +1 chars */
	"ᾄ", /* ἌΙ (3->5), +2 bytes, +1 chars */
	"ᾅ", /* ἍΙ (3->5), +2 bytes, +1 chars */
	"ᾆ", /* ἎΙ (3->5), +2 bytes, +1 chars */
	"ᾇ", /* ἏΙ (3->5), +2 bytes, +1 chars */
	"ᾈ", /* ἈΙ (3->5), +2 bytes, +1 chars */
	"ᾉ", /* ἉΙ (3->5), +2 bytes, +1 chars */
	"ᾊ", /* ἊΙ (3->5), +2 bytes, +1 chars */
	"ᾋ", /* ἋΙ (3->5), +2 bytes, +1 chars */
	"ᾌ", /* ἌΙ (3->5), +2 bytes, +1 chars */
	"ᾍ", /* ἍΙ (3->5), +2 bytes, +1 chars */
	"ᾎ", /* ἎΙ (3->5), +2 bytes, +1 chars */
	"ᾏ", /* ἏΙ (3->5), +2 bytes, +1 chars */
	"ᾐ", /* ἨΙ (3->5), +2 bytes, +1 chars */
	"ᾑ", /* ἩΙ (3->5), +2 bytes, +1 chars */
	"ᾒ", /* ἪΙ (3->5), +2 bytes, +1 chars */
	"ᾓ", /* ἫΙ (3->5), +2 bytes, +1 chars */
	"ᾔ", /* ἬΙ (3->5), +2 bytes, +1 chars */
	"ᾕ", /* ἭΙ (3->5), +2 bytes, +1 chars */
	"ᾖ", /* ἮΙ (3->5), +2 bytes, +1 chars */
	"ᾗ", /* ἯΙ (3->5), +2 bytes, +1 chars */
	"ᾘ", /* ἨΙ (3->5), +2 bytes, +1 chars */
	"ᾙ", /* ἩΙ (3->5), +2 bytes, +1 chars */
	"ᾚ", /* ἪΙ (3->5), +2 bytes, +1 chars */
	"ᾛ", /* ἫΙ (3->5), +2 bytes, +1 chars */
	"ᾜ", /* ἬΙ (3->5), +2 bytes, +1 chars */
	"ᾝ", /* ἭΙ (3->5), +2 bytes, +1 chars */
	"ᾞ", /* ἮΙ (3->5), +2 bytes, +1 chars */
	"ᾟ", /* ἯΙ (3->5), +2 bytes, +1 chars */
	"ᾠ", /* ὨΙ (3->5), +2 bytes, +1 chars */
	"ᾡ", /* ὩΙ (3->5), +2 bytes, +1 chars */
	"ᾢ", /* ὪΙ (3->5), +2 bytes, +1 chars */
	"ᾣ", /* ὫΙ (3->5), +2 bytes, +1 chars */
	"ᾤ", /* ὬΙ (3->5), +2 bytes, +1 chars */
	"ᾥ", /* ὭΙ (3->5), +2 bytes, +1 chars */
	"ᾦ", /* ὮΙ (3->5), +2 bytes, +1 chars */
	"ᾧ", /* ὯΙ (3->5), +2 bytes, +1 chars */
	"ᾨ", /* ὨΙ (3->5), +2 bytes, +1 chars */
	"ᾩ", /* ὩΙ (3->5), +2 bytes, +1 chars */
	"ᾪ", /* ὪΙ (3->5), +2 bytes, +1 chars */
	"ᾫ", /* ὫΙ (3->5), +2 bytes, +1 chars */
	"ᾬ", /* ὬΙ (3->5), +2 bytes, +1 chars */
	"ᾭ", /* ὭΙ (3->5), +2 bytes, +1 chars */
	"ᾮ", /* ὮΙ (3->5), +2 bytes, +1 chars */
	"ᾯ", /* ὯΙ (3->5), +2 bytes, +1 chars */
	"ᾲ", /* ᾺΙ (3->5), +2 bytes, +1 chars */
	"ῂ", /* ῊΙ (3->5), +2 bytes, +1 chars */
	"ῲ", /* ῺΙ (3->5), +2 bytes, +1 chars */
	"ŉ", /* ʼN (2->3), +1 bytes, +1 chars */
	"ǰ", /* J̌ (2->3), +1 bytes, +1 chars */
	"ὐ", /* Υ̓ (3->4), +1 bytes, +1 chars */
	"ᾳ", /* ΑΙ (3->4), +1 bytes, +1 chars */
	"ᾴ", /* ΆΙ (3->4), +1 bytes, +1 chars */
	"ᾶ", /* Α͂ (3->4), +1 bytes, +1 chars */
	"ᾼ", /* ΑΙ (3->4), +1 bytes, +1 chars */
	"ῃ", /* ΗΙ (3->4), +1 bytes, +1 chars */
	"ῄ", /* ΉΙ (3->4), +1 bytes, +1 chars */
	"ῆ", /* Η͂ (3->4), +1 bytes, +1 chars */
	"ῌ", /* ΗΙ (3->4), +1 bytes, +1 chars */
	"ῖ", /* Ι͂ (3->4), +1 bytes, +1 chars */
	"ῤ", /* Ρ̓ (3->4), +1 bytes, +1 chars */
	"ῦ", /* Υ͂ (3->4), +1 bytes, +1 chars */
	"ῳ", /* ΩΙ (3->4), +1 bytes, +1 chars */
	"ῴ", /* ΏΙ (3->4), +1 bytes, +1 chars */
	"ῶ", /* Ω͂ (3->4), +1 bytes, +1 chars */
	"ῼ", /* ΩΙ (3->4), +1 bytes, +1 chars */
	"ﬓ", /* ՄՆ (3->4), +1 bytes, +1 chars */
	"ﬔ", /* ՄԵ (3->4), +1 bytes, +1 chars */
	"ﬕ", /* ՄԻ (3->4), +1 bytes, +1 chars */
	"ﬖ", /* ՎՆ (3->4), +1 bytes, +1 chars */
	"ﬗ", /* ՄԽ (3->4), +1 bytes, +1 chars */
	];