diff options
author | Brad King <brad.king@kitware.com> | 2011-12-20 16:50:51 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2011-12-20 16:54:25 (GMT) |
commit | 3e5a80f063859aafe4fd8969ad18af5300a23a50 (patch) | |
tree | e052440cdad541a1b4c70dd5e77907e1df31a2e8 /Utilities/cmlibarchive/build/utils | |
parent | f6ac86d70e582a9b089a357ddfbfb621a8b6e6ea (diff) | |
parent | 2f4a3792bbfdb4e05cf7468059b3f6308f5ed91f (diff) | |
download | CMake-3e5a80f063859aafe4fd8969ad18af5300a23a50.zip CMake-3e5a80f063859aafe4fd8969ad18af5300a23a50.tar.gz CMake-3e5a80f063859aafe4fd8969ad18af5300a23a50.tar.bz2 |
Merge branch 'libarchive-upstream' into update-libarchive
Add Utilities/cmlibarchive using upstream libarchive 3.0.0-r3950
snapshot.
Diffstat (limited to 'Utilities/cmlibarchive/build/utils')
-rw-r--r-- | Utilities/cmlibarchive/build/utils/gen_archive_string_composition_h.sh | 418 |
1 files changed, 418 insertions, 0 deletions
diff --git a/Utilities/cmlibarchive/build/utils/gen_archive_string_composition_h.sh b/Utilities/cmlibarchive/build/utils/gen_archive_string_composition_h.sh new file mode 100644 index 0000000..95dbe16 --- /dev/null +++ b/Utilities/cmlibarchive/build/utils/gen_archive_string_composition_h.sh @@ -0,0 +1,418 @@ +#!/bin/sh +# +# This needs http://unicode.org/Public/UNIDATA/UnicodeData.txt +# +inputfile="$1" # Expect UnicodeData.txt +outfile=archive_string_composition.h +pickout=/tmp/mk_unicode_composition_tbl$$.awk +################################################################################# +# +# Append the file header of "archive_string_composition.h" +# +################################################################################# +append_copyright() +{ +cat > ${outfile} <<CR_END +/*- + * Copyright (c) 2011 libarchive Project + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * \$FreeBSD\$ + * + */ + +/* + * ATTENTION! + * This file is generated by build/utils/gen_archive_string_composition_h.sh + * from http://unicode.org/Public/UNIDATA/UnicodeData.txt + * + * See also http://unicode.org/report/tr15/ + */ + +#ifndef __LIBARCHIVE_BUILD +#error This header is only to be used internally to libarchive. +#endif + +#ifndef ARCHIVE_STRING_COMPOSITION_H_INCLUDED +#define ARCHIVE_STRING_COMPOSITION_H_INCLUDED + +struct unicode_composition_table { + uint32_t cp1; + uint32_t cp2; + uint32_t nfc; +}; + +CR_END +} +################################################################################# +# +# awk script +# +################################################################################# +cat > ${pickout} <<AWK_END +# +BEGIN { + FS = ";" + min = ""; + max = ""; + cmd="sort | awk -F ' ' '{printf \"\\\\t{ 0x%s , 0x%s , 0x%s },\\\\n\",\$1,\$2,\$3}'" + print "static const struct unicode_composition_table u_composition_table[] = {" +} +END { + close(cmd) + print "};" + print "" + # + # Output Canonical Combining Class tables used for translating NFD to NFC. + # + printf "#define CANONICAL_CLASS_MIN\\t0x%s\\n", min + printf "#define CANONICAL_CLASS_MAX\\t0x%s\\n", max + print "" + printf "#define IS_DECOMPOSABLE_BLOCK(uc)\\t\\\\\n" + printf "\\t(((uc)>>8) <= 0x%X && u_decomposable_blocks[(uc)>>8])\\n", highnum + printf "static const char u_decomposable_blocks[0x%X+1] = {\\n\\t", highnum + # + # Output blockmap + for (i = 0; i <= highnum; i++) { + if (i != 0 && i % 32 == 0) + printf "\\n\\t" + # Additionally Hangul[11XX(17), AC00(172) - D7FF(215)] is decomposable. + if (blockmap[i] || i == 17 || (i >= 172 && i <= 215)) + printf "1," + else + printf "0," + } + printf "\\n};\\n\\n" + # + # Output a macro to get a canonical combining class. + # + print "/* Get Canonical Combining Class(CCC). */" + printf "#define CCC(uc)\\t\\\\\n" + printf "\\t(((uc) > 0x%s)?0:\\\\\\n", max + printf "\\tccc_val[ccc_val_index[ccc_index[(uc)>>8]][((uc)>>4)&0x0F]][(uc)&0x0F])\\n" + print "" + # + # Output a canonical combining class value table. + # + midcnt = 0 + printf "/* The table of the value of Canonical Cimbining Class */\\n" + print "static const unsigned char ccc_val[][16] = {" + print " /* idx=0: XXXX0 - XXXXF */" + print " { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }," + for (h = 0; h <= highnum; h++) { + if (!blockmap[h]) + continue; + for (m = 0; m < 16; m++) { + if (!xx_blockmap[h, m]) + continue; + midcnt++ + printf " /* idx=%d: %03X%1X0 - %03X%1XF */\\n {", midcnt, h, m, h, m + for (l = 0; l < 15; l++) { + printf "%d, ", xxx_blockmap[h, m, l] + } + printf "%d },\n", xxx_blockmap[h, m, 15] + } + } + printf "};\n" + # + # Output the index table of the canonical combining class value table. + # + cnt = 0 + midcnt = 0 + printf "\\n/* The index table to ccc_val[*][16] */\\n" + print "static const unsigned char ccc_val_index[][16] = {" + print " /* idx=0: XXX00 - XXXFF */" + print " { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }," + for (h = 0; h <= highnum; h++) { + if (!blockmap[h]) + continue; + cnt++ + printf " /* idx=%d: %03X00 - %03XFF */\\n {", cnt, h, h + for (m = 0; m < 16; m++) { + if (m != 0) + printf "," + if (xx_blockmap[h, m]) { + midcnt++ + printf "%2d", midcnt + } else + printf " 0" + } + printf " },\\n" + } + printf "};\\n" + # + # Output the index table to the index table of the canonical combining + # class value table. + # + printf "\\n/* The index table to ccc_val_index[*][16] */\\n" + printf "static const unsigned char ccc_index[] = {\\n ", h + cnt = 0 + for (h = 0; h <= highnum; h++) { + if (h != 0 && h % 24 == 0) + printf "\\n " + if (blockmap[h]) { + cnt++; + printf "%2d,", cnt + } else + printf " 0," + } + print "};" + print "" + print "#endif /* ARCHIVE_STRING_COMPOSITION_H_INCLUDED */" +} +# +# +function hextoi(hex) +{ + dec = 0 + for (i=0; i < length(hex); i++) { + x = substr(hex, i+1, 1) + if (x ~/[0-9]/) + dec = dec * 16 + x; + else if (x == "A") + dec = dec * 16 + 10; + else if (x == "B") + dec = dec * 16 + 11; + else if (x == "C") + dec = dec * 16 + 12; + else if (x == "D") + dec = dec * 16 + 13; + else if (x == "E") + dec = dec * 16 + 14; + else if (x == "F") + dec = dec * 16 + 15; + } + return dec +} +# +# Collect Canonical Combining Class values. +# +\$4 ~/^[0-9A-F]+$/ { + if (\$4 !~/^0$/) { + if (min == "") { + min = \$1 + } + max = \$1 + high = substr(\$1, 1, length(\$1) -2) + highnum = hextoi(high) + mid = substr(\$1, length(\$1) -1, 1) + midnum = hextoi(mid) + low = substr(\$1, length(\$1), 1) + lownum = hextoi(low) + blockmap[highnum] = 1 + xx_blockmap[highnum, midnum] = 1 + xxx_blockmap[highnum, midnum, lownum] = \$4 + } +} +# +# Following code points are not decomposed in MAC OS. +# U+2000 - U+2FFF +# U+F900 - U+FAFF +# U+2F800 - U+2FAFF +# +#\$1 ~/^2[0-9A-F][0-9A-F][0-9A-F]\$/ { +# next +#} +#\$1 ~/^F[9A][0-9A-F][0-9A-F]\$/ { +# next +#} +#\$1 ~/^2F[89A][0-9A-F][0-9A-F]\$/ { +# next +#} +# +# Exclusion code points specified by +# http://unicode.org/Public/UNIDATA/CompositionExclusions.txt +## +# 1. Script Specifices +## +\$1 ~/^095[89ABCDEF]\$/ { + next +} +\$1 ~/^09D[CDF]\$/ { + next +} +\$1 ~/^0A3[36]\$/ { + next +} +\$1 ~/^0A5[9ABE]\$/ { + next +} +\$1 ~/^0B5[CD]\$/ { + next +} +\$1 ~/^0F4[3D]\$/ { + next +} +\$1 ~/^0F5[27C]\$/ { + next +} +\$1 ~/^0F69\$/ { + next +} +\$1 ~/^0F7[68]\$/ { + next +} +\$1 ~/^0F9[3D]\$/ { + next +} +\$1 ~/^0FA[27C]\$/ { + next +} +\$1 ~/^0FB9\$/ { + next +} +\$1 ~/^FB1[DF]\$/ { + next +} +\$1 ~/^FB2[ABCDEF]\$/ { + next +} +\$1 ~/^FB3[012345689ABCE]\$/ { + next +} +\$1 ~/^FB4[01346789ABCDE]\$/ { + next +} +## +# 2. Post Composition Version precomposed characters +## +\$1 ~/^2ADC\$/ { + next +} +\$1 ~/^1D15[EF]\$/ { + next +} +\$1 ~/^1D16[01234]\$/ { + next +} +\$1 ~/^1D1B[BCDEF]\$/ { + next +} +\$1 ~/^1D1C0\$/ { + next +} +## +# 3. Singleton Decompositions +## +\$1 ~/^034[01]\$/ { + next +} +\$1 ~/^037[4E]\$/ { + next +} +\$1 ~/^0387\$/ { + next +} +\$1 ~/^1F7[13579BD]\$/ { + next +} +\$1 ~/^1FB[BE]\$/ { + next +} +\$1 ~/^1FC[9B]\$/ { + next +} +\$1 ~/^1FD[3B]\$/ { + next +} +\$1 ~/^1FE[3BEF]\$/ { + next +} +\$1 ~/^1FF[9BD]\$/ { + next +} +\$1 ~/^200[01]\$/ { + next +} +\$1 ~/^212[6AB]\$/ { + next +} +\$1 ~/^232[9A]\$/ { + next +} +\$1 ~/^F9[0-9A-F][0-9A-F]\$/ { + next +} +\$1 ~/^FA0[0-9A-D]\$/ { + next +} +\$1 ~/^FA1[025-9A-E]\$/ { + next +} +\$1 ~/^FA2[0256A-D]\$/ { + next +} +\$1 ~/^FA[3-5][0-9A-F]\$/ { + next +} +\$1 ~/^FA6[0-9A-D]\$/ { + next +} +\$1 ~/^FA[7-9A-C][0-9A-F]\$/ { + next +} +\$1 ~/^FAD[0-9]\$/ { + next +} +\$1 ~/^2F[89][0-9A-F][0-9A-F]\$/ { + next +} +\$1 ~/^2FA0[0-9A-F]\$/ { + next +} +\$1 ~/^2FA1[0-9A-D]\$/ { + next +} +## +# 4. Non-Starter Decompositions +## +\$1 ~/^0344\$/ { + next +} +\$1 ~/^0F7[35]\$/ { + next +} +\$1 ~/^0F81\$/ { + next +} +# +# Output combinations for NFD ==> NFC. +# +\$6 ~/^[0-9A-F]+ [0-9A-F]+\$/ { + split(\$6, cp, " ") + if (length(\$1) == 4) + print "0"cp[1], "0"cp[2], "0"\$1 | cmd + else + print cp[1], cp[2], \$1 | cmd +} +AWK_END +################################################################################# +# +# Run awk a script. +# +################################################################################# +append_copyright +awk -f ${pickout} ${inputfile} >> ${outfile} +# +# Remove awk the script. +rm ${pickout} |