Merge pull request #2159 from ZyX-I/auto-unicode

Generate unicode tables with script
This commit is contained in:
Justin M. Keyes 2015-08-19 23:41:12 -04:00
commit 2647677618
8 changed files with 33136 additions and 962 deletions

View File

@ -0,0 +1,35 @@
#!/bin/sh
set -e
files="UnicodeData.txt CaseFolding.txt EastAsianWidth.txt"
UNIDIR_DEFAULT=unicode
DOWNLOAD_URL_BASE_DEFAULT='http://unicode.org/Public/UNIDATA'
if test x$1 = 'x--help' ; then
echo 'Usage:'
echo " $0[ TARGET_DIRECTORY[ URL_BASE]]"
echo
echo "Downloads files $files to TARGET_DIRECTORY."
echo "Each file is downloaded from URL_BASE/\$filename."
echo
echo "Default target directory is $PWD/${UNIDIR_DEFAULT}."
echo "Default URL base is ${DOWNLOAD_URL_BASE_DEFAULT}."
fi
UNIDIR=${1:-$UNIDIR_DEFAULT}
DOWNLOAD_URL_BASE=${2:-$DOWNLOAD_URL_BASE_DEFAULT}
for filename in $files ; do
curl -o "$UNIDIR/$filename" "$DOWNLOAD_URL_BASE/$filename"
(
cd "$UNIDIR"
git add $filename
)
done
(
cd "$UNIDIR"
git commit -m "Update unicode files" -- $files
)

View File

@ -0,0 +1,239 @@
-- Script creates the following tables in unicode_tables.generated.h:
--
-- 1. doublewidth and ambiguous tables: sorted list of non-overlapping closed
-- intervals. Codepoints in these intervals have double (W or F) or ambiguous
-- (A) east asian width respectively.
-- 2. combining table: same as the above, but characters inside are combining
-- characters (i.e. have general categories equal to Mn, Mc or Me).
-- 3. foldCase, toLower and toUpper tables used to convert characters to
-- folded/lower/upper variants. In these tables first two values are
-- character ranges: like in previous tables they are sorted and must be
-- non-overlapping. Third value means step inside the range: e.g. if it is
-- 2 then interval applies only to first, third, fifth, … character in range.
-- Fourth value is number that should be added to the codepoint to yield
-- folded/lower/upper codepoint.
if arg[1] == '--help' then
print('Usage:')
print(' genunicodetables.lua UnicodeData.txt CaseFolding.txt ' ..
'EastAsianWidth.txt')
print(' unicode_tables.generated.h')
os.exit(0)
end
local unicodedata_fname = arg[1]
local casefolding_fname = arg[2]
local eastasianwidth_fname = arg[3]
local utf_tables_fname = arg[4]
local split_on_semicolons = function(s)
local ret = {}
local idx = 1
while idx <= #s + 1 do
item = s:match('^[^;]*', idx)
idx = idx + #item + 1
if idx <= #s + 1 then
assert(s:sub(idx - 1, idx - 1) == ';')
end
item = item:gsub('^%s*', '')
item = item:gsub('%s*$', '')
table.insert(ret, item)
end
return ret
end
local fp_lines_to_lists = function(fp, n, has_comments)
local ret = {}
local line
local i = 0
while true do
i = i + 1
line = fp:read('*l')
if not line then
break
end
if (not has_comments
or (line:sub(1, 1) ~= '#' and not line:match('^%s*$'))) then
local l = split_on_semicolons(line)
if #l ~= n then
io.stderr:write(('Found %s items in line %u, expected %u\n'):format(
#l, i, n))
io.stderr:write('Line: ' .. line .. '\n')
return nil
end
table.insert(ret, l)
end
end
return ret
end
local parse_data_to_props = function(ud_fp)
return fp_lines_to_lists(ud_fp, 15, false)
end
local parse_fold_props = function(cf_fp)
return fp_lines_to_lists(cf_fp, 4, true)
end
local parse_width_props = function(eaw_fp)
return fp_lines_to_lists(eaw_fp, 2, true)
end
local make_range = function(start, end_, step, add)
if step and add then
return (' {0x%x, 0x%x, %d, %d},\n'):format(
start, end_, step == 0 and -1 or step, add)
else
return (' {0x%04x, 0x%04x},\n'):format(start, end_)
end
end
local build_convert_table = function(ut_fp, props, cond_func, nl_index,
table_name)
ut_fp:write('static const convertStruct ' .. table_name .. '[] = {\n')
local start = -1
local end_ = -1
local step = 0
local add = -1
for _, p in ipairs(props) do
if cond_func(p) then
local n = tonumber(p[1], 16)
local nl = tonumber(p[nl_index], 16)
if start >= 0 and add == (nl - n) and (step == 0 or n - end_ == step) then
-- Continue with the same range.
step = n - end_
end_ = n
else
if start >= 0 then
-- Produce previous range.
ut_fp:write(make_range(start, end_, step, add))
end
start = n
end_ = n
step = 0
add = nl - n
end
end
end
if start >= 0 then
ut_fp:write(make_range(start, end_, step, add))
end
ut_fp:write('};\n')
end
local build_case_table = function(ut_fp, dataprops, table_name, index)
local cond_func = function(p)
return p[index] ~= ''
end
return build_convert_table(ut_fp, dataprops, cond_func, index,
'to' .. table_name)
end
local build_fold_table = function(ut_fp, foldprops)
local cond_func = function(p)
return (p[2] == 'C' or p[2] == 'S')
end
return build_convert_table(ut_fp, foldprops, cond_func, 3, 'foldCase')
end
local build_combining_table = function(ut_fp, dataprops)
ut_fp:write('static const struct interval combining[] = {\n')
local start = -1
local end_ = -1
for _, p in ipairs(dataprops) do
if (({Mn=true, Mc=true, Me=true})[p[3]]) then
local n = tonumber(p[1], 16)
if start >= 0 and end_ + 1 == n then
-- Continue with the same range.
end_ = n
else
if start >= 0 then
-- Produce previous range.
ut_fp:write(make_range(start, end_))
end
start = n
end_ = n
end
end
end
if start >= 0 then
ut_fp:write(make_range(start, end_))
end
ut_fp:write('};\n')
end
local build_width_table = function(ut_fp, dataprops, widthprops, widths,
table_name)
ut_fp:write('static const struct interval ' .. table_name .. '[] = {\n')
local start = -1
local end_ = -1
local dataidx = 1
for _, p in ipairs(widthprops) do
if widths[p[2]:sub(1, 1)] then
local rng_start, rng_end = p[1]:find('%.%.')
local n, n_last
if rng_start then
-- It is a range. We dont check for composing char then.
n = tonumber(p[1]:sub(1, rng_start - 1), 16)
n_last = tonumber(p[1]:sub(rng_end + 1), 16)
else
n = tonumber(p[1], 16)
n_last = n
end
local dn
while true do
dn = tonumber(dataprops[dataidx][1], 16)
if dn >= n then
break
end
dataidx = dataidx + 1
end
if dn ~= n and n_last == n then
io.stderr:write('Cannot find character ' .. n .. ' in data table.\n')
end
-- Only use the char when its not a composing char.
-- But use all chars from a range.
local dp = dataprops[dataidx]
if (n_last > n) or (not (({Mn=true, Mc=true, Me=true})[dp[3]])) then
if start >= 0 and end_ + 1 == n then
-- Continue with the same range.
else
if start >= 0 then
ut_fp:write(make_range(start, end_))
end
start = n
end
end_ = n_last
end
end
end
if start >= 0 then
ut_fp:write(make_range(start, end_))
end
ut_fp:write('};\n')
end
local ud_fp = io.open(unicodedata_fname, 'r')
local dataprops = parse_data_to_props(ud_fp)
ud_fp:close()
local ut_fp = io.open(utf_tables_fname, 'w')
build_case_table(ut_fp, dataprops, 'Lower', 14)
build_case_table(ut_fp, dataprops, 'Upper', 13)
build_combining_table(ut_fp, dataprops)
local cf_fp = io.open(casefolding_fname, 'r')
local foldprops = parse_fold_props(cf_fp)
cf_fp:close()
build_fold_table(ut_fp, foldprops)
local eaw_fp = io.open(eastasianwidth_fname, 'r')
local widthprops = parse_width_props(eaw_fp)
eaw_fp:close()
build_width_table(ut_fp, dataprops, widthprops, {W=true, F=true}, 'doublewidth')
build_width_table(ut_fp, dataprops, widthprops, {A=true}, 'ambiguous')
ut_fp:close()

View File

@ -27,6 +27,12 @@ set(OPTIONS_GENERATOR ${PROJECT_SOURCE_DIR}/scripts/genoptions.lua)
set(EVENTS_LIST_FILE ${PROJECT_SOURCE_DIR}/src/nvim/auevents.lua) set(EVENTS_LIST_FILE ${PROJECT_SOURCE_DIR}/src/nvim/auevents.lua)
set(EX_CMDS_DEFS_FILE ${PROJECT_SOURCE_DIR}/src/nvim/ex_cmds.lua) set(EX_CMDS_DEFS_FILE ${PROJECT_SOURCE_DIR}/src/nvim/ex_cmds.lua)
set(OPTIONS_LIST_FILE ${PROJECT_SOURCE_DIR}/src/nvim/options.lua) set(OPTIONS_LIST_FILE ${PROJECT_SOURCE_DIR}/src/nvim/options.lua)
set(UNICODE_TABLES_GENERATOR ${PROJECT_SOURCE_DIR}/scripts/genunicodetables.lua)
set(UNICODE_DIR ${PROJECT_SOURCE_DIR}/unicode)
set(UNICODEDATA_FILE ${UNICODE_DIR}/UnicodeData.txt)
set(CASEFOLDING_FILE ${UNICODE_DIR}/CaseFolding.txt)
set(EASTASIANWIDTH_FILE ${UNICODE_DIR}/EastAsianWidth.txt)
set(GENERATED_UNICODE_TABLES ${GENERATED_DIR}/unicode_tables.generated.h)
include_directories(${GENERATED_DIR}) include_directories(${GENERATED_DIR})
include_directories(${GENERATED_INCLUDES_DIR}) include_directories(${GENERATED_INCLUDES_DIR})
@ -143,6 +149,19 @@ foreach(sfile ${NEOVIM_SOURCES}
endif() endif()
endforeach() endforeach()
add_custom_command(OUTPUT ${GENERATED_UNICODE_TABLES}
COMMAND ${LUA_PRG} ${UNICODE_TABLES_GENERATOR}
${UNICODEDATA_FILE}
${CASEFOLDING_FILE}
${EASTASIANWIDTH_FILE}
${GENERATED_UNICODE_TABLES}
DEPENDS
${UNICODE_TABLES_GENERATOR}
${UNICODEDATA_FILE}
${CASEFOLDING_FILE}
${EASTASIANWIDTH_FILE}
)
add_custom_command(OUTPUT ${MSGPACK_DISPATCH} add_custom_command(OUTPUT ${MSGPACK_DISPATCH}
COMMAND ${LUA_PRG} ${DISPATCH_GENERATOR} ${API_HEADERS} ${MSGPACK_DISPATCH} COMMAND ${LUA_PRG} ${DISPATCH_GENERATOR} ${API_HEADERS} ${MSGPACK_DISPATCH}
DEPENDS DEPENDS
@ -159,6 +178,7 @@ list(APPEND NEOVIM_GENERATED_SOURCES
"${GENERATED_EVENTS_ENUM}" "${GENERATED_EVENTS_ENUM}"
"${GENERATED_EVENTS_NAMES_MAP}" "${GENERATED_EVENTS_NAMES_MAP}"
"${GENERATED_OPTIONS}" "${GENERATED_OPTIONS}"
"${GENERATED_UNICODE_TABLES}"
) )
add_custom_command(OUTPUT ${GENERATED_EX_CMDS_ENUM} ${GENERATED_EX_CMDS_DEFS} add_custom_command(OUTPUT ${GENERATED_EX_CMDS_ENUM} ${GENERATED_EX_CMDS_DEFS}

View File

@ -115,6 +115,7 @@ struct interval {
#ifdef INCLUDE_GENERATED_DECLARATIONS #ifdef INCLUDE_GENERATED_DECLARATIONS
# include "mbyte.c.generated.h" # include "mbyte.c.generated.h"
# include "unicode_tables.generated.h"
#endif #endif
/* /*
@ -944,225 +945,6 @@ static bool intable(const struct interval *table, size_t n_items, int c)
*/ */
int utf_char2cells(int c) int utf_char2cells(int c)
{ {
/* Sorted list of non-overlapping intervals of East Asian double width
* characters, generated with ../runtime/tools/unicode.vim. */
static struct interval doublewidth[] =
{
{0x1100, 0x115f},
{0x2329, 0x232a},
{0x2e80, 0x2e99},
{0x2e9b, 0x2ef3},
{0x2f00, 0x2fd5},
{0x2ff0, 0x2ffb},
{0x3000, 0x303e},
{0x3041, 0x3096},
{0x3099, 0x30ff},
{0x3105, 0x312d},
{0x3131, 0x318e},
{0x3190, 0x31ba},
{0x31c0, 0x31e3},
{0x31f0, 0x321e},
{0x3220, 0x3247},
{0x3250, 0x32fe},
{0x3300, 0x4dbf},
{0x4e00, 0xa48c},
{0xa490, 0xa4c6},
{0xa960, 0xa97c},
{0xac00, 0xd7a3},
{0xf900, 0xfaff},
{0xfe10, 0xfe19},
{0xfe30, 0xfe52},
{0xfe54, 0xfe66},
{0xfe68, 0xfe6b},
{0xff01, 0xff60},
{0xffe0, 0xffe6},
{0x1b000, 0x1b001},
{0x1f200, 0x1f202},
{0x1f210, 0x1f23a},
{0x1f240, 0x1f248},
{0x1f250, 0x1f251},
{0x20000, 0x2fffd},
{0x30000, 0x3fffd}
};
/* Sorted list of non-overlapping intervals of East Asian Ambiguous
* characters, generated with ../runtime/tools/unicode.vim. */
static struct interval ambiguous[] =
{
{0x00a1, 0x00a1},
{0x00a4, 0x00a4},
{0x00a7, 0x00a8},
{0x00aa, 0x00aa},
{0x00ad, 0x00ae},
{0x00b0, 0x00b4},
{0x00b6, 0x00ba},
{0x00bc, 0x00bf},
{0x00c6, 0x00c6},
{0x00d0, 0x00d0},
{0x00d7, 0x00d8},
{0x00de, 0x00e1},
{0x00e6, 0x00e6},
{0x00e8, 0x00ea},
{0x00ec, 0x00ed},
{0x00f0, 0x00f0},
{0x00f2, 0x00f3},
{0x00f7, 0x00fa},
{0x00fc, 0x00fc},
{0x00fe, 0x00fe},
{0x0101, 0x0101},
{0x0111, 0x0111},
{0x0113, 0x0113},
{0x011b, 0x011b},
{0x0126, 0x0127},
{0x012b, 0x012b},
{0x0131, 0x0133},
{0x0138, 0x0138},
{0x013f, 0x0142},
{0x0144, 0x0144},
{0x0148, 0x014b},
{0x014d, 0x014d},
{0x0152, 0x0153},
{0x0166, 0x0167},
{0x016b, 0x016b},
{0x01ce, 0x01ce},
{0x01d0, 0x01d0},
{0x01d2, 0x01d2},
{0x01d4, 0x01d4},
{0x01d6, 0x01d6},
{0x01d8, 0x01d8},
{0x01da, 0x01da},
{0x01dc, 0x01dc},
{0x0251, 0x0251},
{0x0261, 0x0261},
{0x02c4, 0x02c4},
{0x02c7, 0x02c7},
{0x02c9, 0x02cb},
{0x02cd, 0x02cd},
{0x02d0, 0x02d0},
{0x02d8, 0x02db},
{0x02dd, 0x02dd},
{0x02df, 0x02df},
{0x0300, 0x036f},
{0x0391, 0x03a1},
{0x03a3, 0x03a9},
{0x03b1, 0x03c1},
{0x03c3, 0x03c9},
{0x0401, 0x0401},
{0x0410, 0x044f},
{0x0451, 0x0451},
{0x2010, 0x2010},
{0x2013, 0x2016},
{0x2018, 0x2019},
{0x201c, 0x201d},
{0x2020, 0x2022},
{0x2024, 0x2027},
{0x2030, 0x2030},
{0x2032, 0x2033},
{0x2035, 0x2035},
{0x203b, 0x203b},
{0x203e, 0x203e},
{0x2074, 0x2074},
{0x207f, 0x207f},
{0x2081, 0x2084},
{0x20ac, 0x20ac},
{0x2103, 0x2103},
{0x2105, 0x2105},
{0x2109, 0x2109},
{0x2113, 0x2113},
{0x2116, 0x2116},
{0x2121, 0x2122},
{0x2126, 0x2126},
{0x212b, 0x212b},
{0x2153, 0x2154},
{0x215b, 0x215e},
{0x2160, 0x216b},
{0x2170, 0x2179},
{0x2189, 0x2189},
{0x2190, 0x2199},
{0x21b8, 0x21b9},
{0x21d2, 0x21d2},
{0x21d4, 0x21d4},
{0x21e7, 0x21e7},
{0x2200, 0x2200},
{0x2202, 0x2203},
{0x2207, 0x2208},
{0x220b, 0x220b},
{0x220f, 0x220f},
{0x2211, 0x2211},
{0x2215, 0x2215},
{0x221a, 0x221a},
{0x221d, 0x2220},
{0x2223, 0x2223},
{0x2225, 0x2225},
{0x2227, 0x222c},
{0x222e, 0x222e},
{0x2234, 0x2237},
{0x223c, 0x223d},
{0x2248, 0x2248},
{0x224c, 0x224c},
{0x2252, 0x2252},
{0x2260, 0x2261},
{0x2264, 0x2267},
{0x226a, 0x226b},
{0x226e, 0x226f},
{0x2282, 0x2283},
{0x2286, 0x2287},
{0x2295, 0x2295},
{0x2299, 0x2299},
{0x22a5, 0x22a5},
{0x22bf, 0x22bf},
{0x2312, 0x2312},
{0x2460, 0x24e9},
{0x24eb, 0x254b},
{0x2550, 0x2573},
{0x2580, 0x258f},
{0x2592, 0x2595},
{0x25a0, 0x25a1},
{0x25a3, 0x25a9},
{0x25b2, 0x25b3},
{0x25b6, 0x25b7},
{0x25bc, 0x25bd},
{0x25c0, 0x25c1},
{0x25c6, 0x25c8},
{0x25cb, 0x25cb},
{0x25ce, 0x25d1},
{0x25e2, 0x25e5},
{0x25ef, 0x25ef},
{0x2605, 0x2606},
{0x2609, 0x2609},
{0x260e, 0x260f},
{0x2614, 0x2615},
{0x261c, 0x261c},
{0x261e, 0x261e},
{0x2640, 0x2640},
{0x2642, 0x2642},
{0x2660, 0x2661},
{0x2663, 0x2665},
{0x2667, 0x266a},
{0x266c, 0x266d},
{0x266f, 0x266f},
{0x269e, 0x269f},
{0x26be, 0x26bf},
{0x26c4, 0x26cd},
{0x26cf, 0x26e1},
{0x26e3, 0x26e3},
{0x26e8, 0x26ff},
{0x273d, 0x273d},
{0x2757, 0x2757},
{0x2776, 0x277f},
{0x2b55, 0x2b59},
{0x3248, 0x324f},
{0xe000, 0xf8ff},
{0xfe00, 0xfe0f},
{0xfffd, 0xfffd},
{0x1f100, 0x1f10a},
{0x1f110, 0x1f12d},
{0x1f130, 0x1f169},
{0x1f170, 0x1f19a},
{0xe0100, 0xe01ef},
{0xf0000, 0xffffd},
{0x100000, 0x10fffd}
};
if (c >= 0x100) { if (c >= 0x100) {
#ifdef USE_WCHAR_FUNCTIONS #ifdef USE_WCHAR_FUNCTIONS
/* /*
@ -1806,241 +1588,6 @@ int utf_char2bytes(int c, char_u *buf)
*/ */
bool utf_iscomposing(int c) bool utf_iscomposing(int c)
{ {
/* Sorted list of non-overlapping intervals.
* Generated by ../runtime/tools/unicode.vim. */
static struct interval combining[] =
{
{0x0300, 0x036f},
{0x0483, 0x0489},
{0x0591, 0x05bd},
{0x05bf, 0x05bf},
{0x05c1, 0x05c2},
{0x05c4, 0x05c5},
{0x05c7, 0x05c7},
{0x0610, 0x061a},
{0x064b, 0x065f},
{0x0670, 0x0670},
{0x06d6, 0x06dc},
{0x06df, 0x06e4},
{0x06e7, 0x06e8},
{0x06ea, 0x06ed},
{0x0711, 0x0711},
{0x0730, 0x074a},
{0x07a6, 0x07b0},
{0x07eb, 0x07f3},
{0x0816, 0x0819},
{0x081b, 0x0823},
{0x0825, 0x0827},
{0x0829, 0x082d},
{0x0859, 0x085b},
{0x08e4, 0x0903},
{0x093a, 0x093c},
{0x093e, 0x094f},
{0x0951, 0x0957},
{0x0962, 0x0963},
{0x0981, 0x0983},
{0x09bc, 0x09bc},
{0x09be, 0x09c4},
{0x09c7, 0x09c8},
{0x09cb, 0x09cd},
{0x09d7, 0x09d7},
{0x09e2, 0x09e3},
{0x0a01, 0x0a03},
{0x0a3c, 0x0a3c},
{0x0a3e, 0x0a42},
{0x0a47, 0x0a48},
{0x0a4b, 0x0a4d},
{0x0a51, 0x0a51},
{0x0a70, 0x0a71},
{0x0a75, 0x0a75},
{0x0a81, 0x0a83},
{0x0abc, 0x0abc},
{0x0abe, 0x0ac5},
{0x0ac7, 0x0ac9},
{0x0acb, 0x0acd},
{0x0ae2, 0x0ae3},
{0x0b01, 0x0b03},
{0x0b3c, 0x0b3c},
{0x0b3e, 0x0b44},
{0x0b47, 0x0b48},
{0x0b4b, 0x0b4d},
{0x0b56, 0x0b57},
{0x0b62, 0x0b63},
{0x0b82, 0x0b82},
{0x0bbe, 0x0bc2},
{0x0bc6, 0x0bc8},
{0x0bca, 0x0bcd},
{0x0bd7, 0x0bd7},
{0x0c00, 0x0c03},
{0x0c3e, 0x0c44},
{0x0c46, 0x0c48},
{0x0c4a, 0x0c4d},
{0x0c55, 0x0c56},
{0x0c62, 0x0c63},
{0x0c81, 0x0c83},
{0x0cbc, 0x0cbc},
{0x0cbe, 0x0cc4},
{0x0cc6, 0x0cc8},
{0x0cca, 0x0ccd},
{0x0cd5, 0x0cd6},
{0x0ce2, 0x0ce3},
{0x0d01, 0x0d03},
{0x0d3e, 0x0d44},
{0x0d46, 0x0d48},
{0x0d4a, 0x0d4d},
{0x0d57, 0x0d57},
{0x0d62, 0x0d63},
{0x0d82, 0x0d83},
{0x0dca, 0x0dca},
{0x0dcf, 0x0dd4},
{0x0dd6, 0x0dd6},
{0x0dd8, 0x0ddf},
{0x0df2, 0x0df3},
{0x0e31, 0x0e31},
{0x0e34, 0x0e3a},
{0x0e47, 0x0e4e},
{0x0eb1, 0x0eb1},
{0x0eb4, 0x0eb9},
{0x0ebb, 0x0ebc},
{0x0ec8, 0x0ecd},
{0x0f18, 0x0f19},
{0x0f35, 0x0f35},
{0x0f37, 0x0f37},
{0x0f39, 0x0f39},
{0x0f3e, 0x0f3f},
{0x0f71, 0x0f84},
{0x0f86, 0x0f87},
{0x0f8d, 0x0f97},
{0x0f99, 0x0fbc},
{0x0fc6, 0x0fc6},
{0x102b, 0x103e},
{0x1056, 0x1059},
{0x105e, 0x1060},
{0x1062, 0x1064},
{0x1067, 0x106d},
{0x1071, 0x1074},
{0x1082, 0x108d},
{0x108f, 0x108f},
{0x109a, 0x109d},
{0x135d, 0x135f},
{0x1712, 0x1714},
{0x1732, 0x1734},
{0x1752, 0x1753},
{0x1772, 0x1773},
{0x17b4, 0x17d3},
{0x17dd, 0x17dd},
{0x180b, 0x180d},
{0x18a9, 0x18a9},
{0x1920, 0x192b},
{0x1930, 0x193b},
{0x19b0, 0x19c0},
{0x19c8, 0x19c9},
{0x1a17, 0x1a1b},
{0x1a55, 0x1a5e},
{0x1a60, 0x1a7c},
{0x1a7f, 0x1a7f},
{0x1ab0, 0x1abe},
{0x1b00, 0x1b04},
{0x1b34, 0x1b44},
{0x1b6b, 0x1b73},
{0x1b80, 0x1b82},
{0x1ba1, 0x1bad},
{0x1be6, 0x1bf3},
{0x1c24, 0x1c37},
{0x1cd0, 0x1cd2},
{0x1cd4, 0x1ce8},
{0x1ced, 0x1ced},
{0x1cf2, 0x1cf4},
{0x1cf8, 0x1cf9},
{0x1dc0, 0x1df5},
{0x1dfc, 0x1dff},
{0x20d0, 0x20f0},
{0x2cef, 0x2cf1},
{0x2d7f, 0x2d7f},
{0x2de0, 0x2dff},
{0x302a, 0x302f},
{0x3099, 0x309a},
{0xa66f, 0xa672},
{0xa674, 0xa67d},
{0xa69f, 0xa69f},
{0xa6f0, 0xa6f1},
{0xa802, 0xa802},
{0xa806, 0xa806},
{0xa80b, 0xa80b},
{0xa823, 0xa827},
{0xa880, 0xa881},
{0xa8b4, 0xa8c4},
{0xa8e0, 0xa8f1},
{0xa926, 0xa92d},
{0xa947, 0xa953},
{0xa980, 0xa983},
{0xa9b3, 0xa9c0},
{0xa9e5, 0xa9e5},
{0xaa29, 0xaa36},
{0xaa43, 0xaa43},
{0xaa4c, 0xaa4d},
{0xaa7b, 0xaa7d},
{0xaab0, 0xaab0},
{0xaab2, 0xaab4},
{0xaab7, 0xaab8},
{0xaabe, 0xaabf},
{0xaac1, 0xaac1},
{0xaaeb, 0xaaef},
{0xaaf5, 0xaaf6},
{0xabe3, 0xabea},
{0xabec, 0xabed},
{0xfb1e, 0xfb1e},
{0xfe00, 0xfe0f},
{0xfe20, 0xfe2d},
{0x101fd, 0x101fd},
{0x102e0, 0x102e0},
{0x10376, 0x1037a},
{0x10a01, 0x10a03},
{0x10a05, 0x10a06},
{0x10a0c, 0x10a0f},
{0x10a38, 0x10a3a},
{0x10a3f, 0x10a3f},
{0x10ae5, 0x10ae6},
{0x11000, 0x11002},
{0x11038, 0x11046},
{0x1107f, 0x11082},
{0x110b0, 0x110ba},
{0x11100, 0x11102},
{0x11127, 0x11134},
{0x11173, 0x11173},
{0x11180, 0x11182},
{0x111b3, 0x111c0},
{0x1122c, 0x11237},
{0x112df, 0x112ea},
{0x11301, 0x11303},
{0x1133c, 0x1133c},
{0x1133e, 0x11344},
{0x11347, 0x11348},
{0x1134b, 0x1134d},
{0x11357, 0x11357},
{0x11362, 0x11363},
{0x11366, 0x1136c},
{0x11370, 0x11374},
{0x114b0, 0x114c3},
{0x115af, 0x115b5},
{0x115b8, 0x115c0},
{0x11630, 0x11640},
{0x116ab, 0x116b7},
{0x16af0, 0x16af4},
{0x16b30, 0x16b36},
{0x16f51, 0x16f7e},
{0x16f8f, 0x16f92},
{0x1bc9d, 0x1bc9e},
{0x1d165, 0x1d169},
{0x1d16d, 0x1d172},
{0x1d17b, 0x1d182},
{0x1d185, 0x1d18b},
{0x1d1aa, 0x1d1ad},
{0x1d242, 0x1d244},
{0x1e8d0, 0x1e8d6},
{0xe0100, 0xe01ef}
};
return intable(combining, ARRAY_SIZE(combining), c); return intable(combining, ARRAY_SIZE(combining), c);
} }
@ -2187,193 +1734,12 @@ int utf_class(int c)
* Last updated for Unicode 5.2. * Last updated for Unicode 5.2.
*/ */
/*
* The following tables are built by ../runtime/tools/unicode.vim.
* They must be in numeric order, because we use binary search.
* An entry such as {0x41,0x5a,1,32} means that Unicode characters in the
* range from 0x41 to 0x5a inclusive, stepping by 1, are changed to
* folded/upper/lower by adding 32.
*/
static convertStruct foldCase[] =
{
{0x41,0x5a,1,32},
{0xb5,0xb5,-1,775},
{0xc0,0xd6,1,32},
{0xd8,0xde,1,32},
{0x100,0x12e,2,1},
{0x132,0x136,2,1},
{0x139,0x147,2,1},
{0x14a,0x176,2,1},
{0x178,0x178,-1,-121},
{0x179,0x17d,2,1},
{0x17f,0x17f,-1,-268},
{0x181,0x181,-1,210},
{0x182,0x184,2,1},
{0x186,0x186,-1,206},
{0x187,0x187,-1,1},
{0x189,0x18a,1,205},
{0x18b,0x18b,-1,1},
{0x18e,0x18e,-1,79},
{0x18f,0x18f,-1,202},
{0x190,0x190,-1,203},
{0x191,0x191,-1,1},
{0x193,0x193,-1,205},
{0x194,0x194,-1,207},
{0x196,0x196,-1,211},
{0x197,0x197,-1,209},
{0x198,0x198,-1,1},
{0x19c,0x19c,-1,211},
{0x19d,0x19d,-1,213},
{0x19f,0x19f,-1,214},
{0x1a0,0x1a4,2,1},
{0x1a6,0x1a6,-1,218},
{0x1a7,0x1a7,-1,1},
{0x1a9,0x1a9,-1,218},
{0x1ac,0x1ac,-1,1},
{0x1ae,0x1ae,-1,218},
{0x1af,0x1af,-1,1},
{0x1b1,0x1b2,1,217},
{0x1b3,0x1b5,2,1},
{0x1b7,0x1b7,-1,219},
{0x1b8,0x1bc,4,1},
{0x1c4,0x1c4,-1,2},
{0x1c5,0x1c5,-1,1},
{0x1c7,0x1c7,-1,2},
{0x1c8,0x1c8,-1,1},
{0x1ca,0x1ca,-1,2},
{0x1cb,0x1db,2,1},
{0x1de,0x1ee,2,1},
{0x1f1,0x1f1,-1,2},
{0x1f2,0x1f4,2,1},
{0x1f6,0x1f6,-1,-97},
{0x1f7,0x1f7,-1,-56},
{0x1f8,0x21e,2,1},
{0x220,0x220,-1,-130},
{0x222,0x232,2,1},
{0x23a,0x23a,-1,10795},
{0x23b,0x23b,-1,1},
{0x23d,0x23d,-1,-163},
{0x23e,0x23e,-1,10792},
{0x241,0x241,-1,1},
{0x243,0x243,-1,-195},
{0x244,0x244,-1,69},
{0x245,0x245,-1,71},
{0x246,0x24e,2,1},
{0x345,0x345,-1,116},
{0x370,0x372,2,1},
{0x376,0x376,-1,1},
{0x37f,0x37f,-1,116},
{0x386,0x386,-1,38},
{0x388,0x38a,1,37},
{0x38c,0x38c,-1,64},
{0x38e,0x38f,1,63},
{0x391,0x3a1,1,32},
{0x3a3,0x3ab,1,32},
{0x3c2,0x3c2,-1,1},
{0x3cf,0x3cf,-1,8},
{0x3d0,0x3d0,-1,-30},
{0x3d1,0x3d1,-1,-25},
{0x3d5,0x3d5,-1,-15},
{0x3d6,0x3d6,-1,-22},
{0x3d8,0x3ee,2,1},
{0x3f0,0x3f0,-1,-54},
{0x3f1,0x3f1,-1,-48},
{0x3f4,0x3f4,-1,-60},
{0x3f5,0x3f5,-1,-64},
{0x3f7,0x3f7,-1,1},
{0x3f9,0x3f9,-1,-7},
{0x3fa,0x3fa,-1,1},
{0x3fd,0x3ff,1,-130},
{0x400,0x40f,1,80},
{0x410,0x42f,1,32},
{0x460,0x480,2,1},
{0x48a,0x4be,2,1},
{0x4c0,0x4c0,-1,15},
{0x4c1,0x4cd,2,1},
{0x4d0,0x52e,2,1},
{0x531,0x556,1,48},
{0x10a0,0x10c5,1,7264},
{0x10c7,0x10cd,6,7264},
{0x1e00,0x1e94,2,1},
{0x1e9b,0x1e9b,-1,-58},
{0x1e9e,0x1e9e,-1,-7615},
{0x1ea0,0x1efe,2,1},
{0x1f08,0x1f0f,1,-8},
{0x1f18,0x1f1d,1,-8},
{0x1f28,0x1f2f,1,-8},
{0x1f38,0x1f3f,1,-8},
{0x1f48,0x1f4d,1,-8},
{0x1f59,0x1f5f,2,-8},
{0x1f68,0x1f6f,1,-8},
{0x1f88,0x1f8f,1,-8},
{0x1f98,0x1f9f,1,-8},
{0x1fa8,0x1faf,1,-8},
{0x1fb8,0x1fb9,1,-8},
{0x1fba,0x1fbb,1,-74},
{0x1fbc,0x1fbc,-1,-9},
{0x1fbe,0x1fbe,-1,-7173},
{0x1fc8,0x1fcb,1,-86},
{0x1fcc,0x1fcc,-1,-9},
{0x1fd8,0x1fd9,1,-8},
{0x1fda,0x1fdb,1,-100},
{0x1fe8,0x1fe9,1,-8},
{0x1fea,0x1feb,1,-112},
{0x1fec,0x1fec,-1,-7},
{0x1ff8,0x1ff9,1,-128},
{0x1ffa,0x1ffb,1,-126},
{0x1ffc,0x1ffc,-1,-9},
{0x2126,0x2126,-1,-7517},
{0x212a,0x212a,-1,-8383},
{0x212b,0x212b,-1,-8262},
{0x2132,0x2132,-1,28},
{0x2160,0x216f,1,16},
{0x2183,0x2183,-1,1},
{0x24b6,0x24cf,1,26},
{0x2c00,0x2c2e,1,48},
{0x2c60,0x2c60,-1,1},
{0x2c62,0x2c62,-1,-10743},
{0x2c63,0x2c63,-1,-3814},
{0x2c64,0x2c64,-1,-10727},
{0x2c67,0x2c6b,2,1},
{0x2c6d,0x2c6d,-1,-10780},
{0x2c6e,0x2c6e,-1,-10749},
{0x2c6f,0x2c6f,-1,-10783},
{0x2c70,0x2c70,-1,-10782},
{0x2c72,0x2c75,3,1},
{0x2c7e,0x2c7f,1,-10815},
{0x2c80,0x2ce2,2,1},
{0x2ceb,0x2ced,2,1},
{0x2cf2,0xa640,31054,1},
{0xa642,0xa66c,2,1},
{0xa680,0xa69a,2,1},
{0xa722,0xa72e,2,1},
{0xa732,0xa76e,2,1},
{0xa779,0xa77b,2,1},
{0xa77d,0xa77d,-1,-35332},
{0xa77e,0xa786,2,1},
{0xa78b,0xa78b,-1,1},
{0xa78d,0xa78d,-1,-42280},
{0xa790,0xa792,2,1},
{0xa796,0xa7a8,2,1},
{0xa7aa,0xa7aa,-1,-42308},
{0xa7ab,0xa7ab,-1,-42319},
{0xa7ac,0xa7ac,-1,-42315},
{0xa7ad,0xa7ad,-1,-42305},
{0xa7b0,0xa7b0,-1,-42258},
{0xa7b1,0xa7b1,-1,-42282},
{0xff21,0xff3a,1,32},
{0x10400,0x10427,1,40},
{0x118a0,0x118bf,1,32}
};
/* /*
* Generic conversion function for case operations. * Generic conversion function for case operations.
* Return the converted equivalent of "a", which is a UCS-4 character. Use * Return the converted equivalent of "a", which is a UCS-4 character. Use
* the given conversion "table". Uses binary search on "table". * the given conversion "table". Uses binary search on "table".
*/ */
static int utf_convert(int a, convertStruct *table, size_t n_items) static int utf_convert(int a, const convertStruct *const table, size_t n_items)
{ {
size_t start, mid, end; /* indices into table */ size_t start, mid, end; /* indices into table */
@ -2405,332 +1771,6 @@ int utf_fold(int a)
return utf_convert(a, foldCase, ARRAY_SIZE(foldCase)); return utf_convert(a, foldCase, ARRAY_SIZE(foldCase));
} }
static convertStruct toLower[] =
{
{0x41,0x5a,1,32},
{0xc0,0xd6,1,32},
{0xd8,0xde,1,32},
{0x100,0x12e,2,1},
{0x130,0x130,-1,-199},
{0x132,0x136,2,1},
{0x139,0x147,2,1},
{0x14a,0x176,2,1},
{0x178,0x178,-1,-121},
{0x179,0x17d,2,1},
{0x181,0x181,-1,210},
{0x182,0x184,2,1},
{0x186,0x186,-1,206},
{0x187,0x187,-1,1},
{0x189,0x18a,1,205},
{0x18b,0x18b,-1,1},
{0x18e,0x18e,-1,79},
{0x18f,0x18f,-1,202},
{0x190,0x190,-1,203},
{0x191,0x191,-1,1},
{0x193,0x193,-1,205},
{0x194,0x194,-1,207},
{0x196,0x196,-1,211},
{0x197,0x197,-1,209},
{0x198,0x198,-1,1},
{0x19c,0x19c,-1,211},
{0x19d,0x19d,-1,213},
{0x19f,0x19f,-1,214},
{0x1a0,0x1a4,2,1},
{0x1a6,0x1a6,-1,218},
{0x1a7,0x1a7,-1,1},
{0x1a9,0x1a9,-1,218},
{0x1ac,0x1ac,-1,1},
{0x1ae,0x1ae,-1,218},
{0x1af,0x1af,-1,1},
{0x1b1,0x1b2,1,217},
{0x1b3,0x1b5,2,1},
{0x1b7,0x1b7,-1,219},
{0x1b8,0x1bc,4,1},
{0x1c4,0x1c4,-1,2},
{0x1c5,0x1c5,-1,1},
{0x1c7,0x1c7,-1,2},
{0x1c8,0x1c8,-1,1},
{0x1ca,0x1ca,-1,2},
{0x1cb,0x1db,2,1},
{0x1de,0x1ee,2,1},
{0x1f1,0x1f1,-1,2},
{0x1f2,0x1f4,2,1},
{0x1f6,0x1f6,-1,-97},
{0x1f7,0x1f7,-1,-56},
{0x1f8,0x21e,2,1},
{0x220,0x220,-1,-130},
{0x222,0x232,2,1},
{0x23a,0x23a,-1,10795},
{0x23b,0x23b,-1,1},
{0x23d,0x23d,-1,-163},
{0x23e,0x23e,-1,10792},
{0x241,0x241,-1,1},
{0x243,0x243,-1,-195},
{0x244,0x244,-1,69},
{0x245,0x245,-1,71},
{0x246,0x24e,2,1},
{0x370,0x372,2,1},
{0x376,0x376,-1,1},
{0x37f,0x37f,-1,116},
{0x386,0x386,-1,38},
{0x388,0x38a,1,37},
{0x38c,0x38c,-1,64},
{0x38e,0x38f,1,63},
{0x391,0x3a1,1,32},
{0x3a3,0x3ab,1,32},
{0x3cf,0x3cf,-1,8},
{0x3d8,0x3ee,2,1},
{0x3f4,0x3f4,-1,-60},
{0x3f7,0x3f7,-1,1},
{0x3f9,0x3f9,-1,-7},
{0x3fa,0x3fa,-1,1},
{0x3fd,0x3ff,1,-130},
{0x400,0x40f,1,80},
{0x410,0x42f,1,32},
{0x460,0x480,2,1},
{0x48a,0x4be,2,1},
{0x4c0,0x4c0,-1,15},
{0x4c1,0x4cd,2,1},
{0x4d0,0x52e,2,1},
{0x531,0x556,1,48},
{0x10a0,0x10c5,1,7264},
{0x10c7,0x10cd,6,7264},
{0x1e00,0x1e94,2,1},
{0x1e9e,0x1e9e,-1,-7615},
{0x1ea0,0x1efe,2,1},
{0x1f08,0x1f0f,1,-8},
{0x1f18,0x1f1d,1,-8},
{0x1f28,0x1f2f,1,-8},
{0x1f38,0x1f3f,1,-8},
{0x1f48,0x1f4d,1,-8},
{0x1f59,0x1f5f,2,-8},
{0x1f68,0x1f6f,1,-8},
{0x1f88,0x1f8f,1,-8},
{0x1f98,0x1f9f,1,-8},
{0x1fa8,0x1faf,1,-8},
{0x1fb8,0x1fb9,1,-8},
{0x1fba,0x1fbb,1,-74},
{0x1fbc,0x1fbc,-1,-9},
{0x1fc8,0x1fcb,1,-86},
{0x1fcc,0x1fcc,-1,-9},
{0x1fd8,0x1fd9,1,-8},
{0x1fda,0x1fdb,1,-100},
{0x1fe8,0x1fe9,1,-8},
{0x1fea,0x1feb,1,-112},
{0x1fec,0x1fec,-1,-7},
{0x1ff8,0x1ff9,1,-128},
{0x1ffa,0x1ffb,1,-126},
{0x1ffc,0x1ffc,-1,-9},
{0x2126,0x2126,-1,-7517},
{0x212a,0x212a,-1,-8383},
{0x212b,0x212b,-1,-8262},
{0x2132,0x2132,-1,28},
{0x2160,0x216f,1,16},
{0x2183,0x2183,-1,1},
{0x24b6,0x24cf,1,26},
{0x2c00,0x2c2e,1,48},
{0x2c60,0x2c60,-1,1},
{0x2c62,0x2c62,-1,-10743},
{0x2c63,0x2c63,-1,-3814},
{0x2c64,0x2c64,-1,-10727},
{0x2c67,0x2c6b,2,1},
{0x2c6d,0x2c6d,-1,-10780},
{0x2c6e,0x2c6e,-1,-10749},
{0x2c6f,0x2c6f,-1,-10783},
{0x2c70,0x2c70,-1,-10782},
{0x2c72,0x2c75,3,1},
{0x2c7e,0x2c7f,1,-10815},
{0x2c80,0x2ce2,2,1},
{0x2ceb,0x2ced,2,1},
{0x2cf2,0xa640,31054,1},
{0xa642,0xa66c,2,1},
{0xa680,0xa69a,2,1},
{0xa722,0xa72e,2,1},
{0xa732,0xa76e,2,1},
{0xa779,0xa77b,2,1},
{0xa77d,0xa77d,-1,-35332},
{0xa77e,0xa786,2,1},
{0xa78b,0xa78b,-1,1},
{0xa78d,0xa78d,-1,-42280},
{0xa790,0xa792,2,1},
{0xa796,0xa7a8,2,1},
{0xa7aa,0xa7aa,-1,-42308},
{0xa7ab,0xa7ab,-1,-42319},
{0xa7ac,0xa7ac,-1,-42315},
{0xa7ad,0xa7ad,-1,-42305},
{0xa7b0,0xa7b0,-1,-42258},
{0xa7b1,0xa7b1,-1,-42282},
{0xff21,0xff3a,1,32},
{0x10400,0x10427,1,40},
{0x118a0,0x118bf,1,32}
};
static convertStruct toUpper[] =
{
{0x61,0x7a,1,-32},
{0xb5,0xb5,-1,743},
{0xe0,0xf6,1,-32},
{0xf8,0xfe,1,-32},
{0xff,0xff,-1,121},
{0x101,0x12f,2,-1},
{0x131,0x131,-1,-232},
{0x133,0x137,2,-1},
{0x13a,0x148,2,-1},
{0x14b,0x177,2,-1},
{0x17a,0x17e,2,-1},
{0x17f,0x17f,-1,-300},
{0x180,0x180,-1,195},
{0x183,0x185,2,-1},
{0x188,0x18c,4,-1},
{0x192,0x192,-1,-1},
{0x195,0x195,-1,97},
{0x199,0x199,-1,-1},
{0x19a,0x19a,-1,163},
{0x19e,0x19e,-1,130},
{0x1a1,0x1a5,2,-1},
{0x1a8,0x1ad,5,-1},
{0x1b0,0x1b4,4,-1},
{0x1b6,0x1b9,3,-1},
{0x1bd,0x1bd,-1,-1},
{0x1bf,0x1bf,-1,56},
{0x1c5,0x1c5,-1,-1},
{0x1c6,0x1c6,-1,-2},
{0x1c8,0x1c8,-1,-1},
{0x1c9,0x1c9,-1,-2},
{0x1cb,0x1cb,-1,-1},
{0x1cc,0x1cc,-1,-2},
{0x1ce,0x1dc,2,-1},
{0x1dd,0x1dd,-1,-79},
{0x1df,0x1ef,2,-1},
{0x1f2,0x1f2,-1,-1},
{0x1f3,0x1f3,-1,-2},
{0x1f5,0x1f9,4,-1},
{0x1fb,0x21f,2,-1},
{0x223,0x233,2,-1},
{0x23c,0x23c,-1,-1},
{0x23f,0x240,1,10815},
{0x242,0x247,5,-1},
{0x249,0x24f,2,-1},
{0x250,0x250,-1,10783},
{0x251,0x251,-1,10780},
{0x252,0x252,-1,10782},
{0x253,0x253,-1,-210},
{0x254,0x254,-1,-206},
{0x256,0x257,1,-205},
{0x259,0x259,-1,-202},
{0x25b,0x25b,-1,-203},
{0x25c,0x25c,-1,42319},
{0x260,0x260,-1,-205},
{0x261,0x261,-1,42315},
{0x263,0x263,-1,-207},
{0x265,0x265,-1,42280},
{0x266,0x266,-1,42308},
{0x268,0x268,-1,-209},
{0x269,0x269,-1,-211},
{0x26b,0x26b,-1,10743},
{0x26c,0x26c,-1,42305},
{0x26f,0x26f,-1,-211},
{0x271,0x271,-1,10749},
{0x272,0x272,-1,-213},
{0x275,0x275,-1,-214},
{0x27d,0x27d,-1,10727},
{0x280,0x283,3,-218},
{0x287,0x287,-1,42282},
{0x288,0x288,-1,-218},
{0x289,0x289,-1,-69},
{0x28a,0x28b,1,-217},
{0x28c,0x28c,-1,-71},
{0x292,0x292,-1,-219},
{0x29e,0x29e,-1,42258},
{0x345,0x345,-1,84},
{0x371,0x373,2,-1},
{0x377,0x377,-1,-1},
{0x37b,0x37d,1,130},
{0x3ac,0x3ac,-1,-38},
{0x3ad,0x3af,1,-37},
{0x3b1,0x3c1,1,-32},
{0x3c2,0x3c2,-1,-31},
{0x3c3,0x3cb,1,-32},
{0x3cc,0x3cc,-1,-64},
{0x3cd,0x3ce,1,-63},
{0x3d0,0x3d0,-1,-62},
{0x3d1,0x3d1,-1,-57},
{0x3d5,0x3d5,-1,-47},
{0x3d6,0x3d6,-1,-54},
{0x3d7,0x3d7,-1,-8},
{0x3d9,0x3ef,2,-1},
{0x3f0,0x3f0,-1,-86},
{0x3f1,0x3f1,-1,-80},
{0x3f2,0x3f2,-1,7},
{0x3f3,0x3f3,-1,-116},
{0x3f5,0x3f5,-1,-96},
{0x3f8,0x3fb,3,-1},
{0x430,0x44f,1,-32},
{0x450,0x45f,1,-80},
{0x461,0x481,2,-1},
{0x48b,0x4bf,2,-1},
{0x4c2,0x4ce,2,-1},
{0x4cf,0x4cf,-1,-15},
{0x4d1,0x52f,2,-1},
{0x561,0x586,1,-48},
{0x1d79,0x1d79,-1,35332},
{0x1d7d,0x1d7d,-1,3814},
{0x1e01,0x1e95,2,-1},
{0x1e9b,0x1e9b,-1,-59},
{0x1ea1,0x1eff,2,-1},
{0x1f00,0x1f07,1,8},
{0x1f10,0x1f15,1,8},
{0x1f20,0x1f27,1,8},
{0x1f30,0x1f37,1,8},
{0x1f40,0x1f45,1,8},
{0x1f51,0x1f57,2,8},
{0x1f60,0x1f67,1,8},
{0x1f70,0x1f71,1,74},
{0x1f72,0x1f75,1,86},
{0x1f76,0x1f77,1,100},
{0x1f78,0x1f79,1,128},
{0x1f7a,0x1f7b,1,112},
{0x1f7c,0x1f7d,1,126},
{0x1f80,0x1f87,1,8},
{0x1f90,0x1f97,1,8},
{0x1fa0,0x1fa7,1,8},
{0x1fb0,0x1fb1,1,8},
{0x1fb3,0x1fb3,-1,9},
{0x1fbe,0x1fbe,-1,-7205},
{0x1fc3,0x1fc3,-1,9},
{0x1fd0,0x1fd1,1,8},
{0x1fe0,0x1fe1,1,8},
{0x1fe5,0x1fe5,-1,7},
{0x1ff3,0x1ff3,-1,9},
{0x214e,0x214e,-1,-28},
{0x2170,0x217f,1,-16},
{0x2184,0x2184,-1,-1},
{0x24d0,0x24e9,1,-26},
{0x2c30,0x2c5e,1,-48},
{0x2c61,0x2c61,-1,-1},
{0x2c65,0x2c65,-1,-10795},
{0x2c66,0x2c66,-1,-10792},
{0x2c68,0x2c6c,2,-1},
{0x2c73,0x2c76,3,-1},
{0x2c81,0x2ce3,2,-1},
{0x2cec,0x2cee,2,-1},
{0x2cf3,0x2cf3,-1,-1},
{0x2d00,0x2d25,1,-7264},
{0x2d27,0x2d2d,6,-7264},
{0xa641,0xa66d,2,-1},
{0xa681,0xa69b,2,-1},
{0xa723,0xa72f,2,-1},
{0xa733,0xa76f,2,-1},
{0xa77a,0xa77c,2,-1},
{0xa77f,0xa787,2,-1},
{0xa78c,0xa791,5,-1},
{0xa793,0xa797,4,-1},
{0xa799,0xa7a9,2,-1},
{0xff41,0xff5a,1,-32},
{0x10428,0x1044f,1,-40},
{0x118c0,0x118df,1,-32}
};
/* /*
* Return the upper-case equivalent of "a", which is a UCS-4 character. Use * Return the upper-case equivalent of "a", which is a UCS-4 character. Use
* simple case folding. * simple case folding.

1414
unicode/CaseFolding.txt Normal file

File diff suppressed because it is too large Load Diff

37
unicode/Copyright.txt Normal file
View File

@ -0,0 +1,37 @@
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2015 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in
http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that
(a) this copyright and permission notice appear with all copies
of the Data Files or Software,
(b) this copyright and permission notice appear in associated
documentation, and
(c) there is clear notice in each modified Data File or in the Software
as well as in the documentation associated with the Data File(s) or
Software that the data or software has been modified.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.

2174
unicode/EastAsianWidth.txt Normal file

File diff suppressed because it is too large Load Diff

29215
unicode/UnicodeData.txt Normal file

File diff suppressed because it is too large Load Diff