summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGregorio Litenstein <g.litenstein@gmail.com>2019-10-21 05:41:12 (GMT)
committerTony Theodore <tonyt@logyst.com>2019-10-23 13:17:41 (GMT)
commitf594f30f9f83c92a752ef6193adadc0b8ec6c5ab (patch)
tree3cc248f6add18737a01bfff0e3286851244a1c1f
parentccea7d82385517f1fe4ca85b8c39317f91a1d97c (diff)
downloadmxe-f594f30f9f83c92a752ef6193adadc0b8ec6c5ab.zip
mxe-f594f30f9f83c92a752ef6193adadc0b8ec6c5ab.tar.gz
mxe-f594f30f9f83c92a752ef6193adadc0b8ec6c5ab.tar.bz2
icu4c: add fixes and test from #2361
-rw-r--r--src/icu4c-test.c126
-rw-r--r--src/icu4c.mk18
2 files changed, 140 insertions, 4 deletions
diff --git a/src/icu4c-test.c b/src/icu4c-test.c
new file mode 100644
index 0000000..3fa4a22
--- /dev/null
+++ b/src/icu4c-test.c
@@ -0,0 +1,126 @@
+/*
+ * This file is part of MXE. See LICENSE.md for licensing information.
+ */
+
+/*** ugrep.c ***/
+
+/*** https://begriffs.com/posts/2019-05-23-unicode-icu.html ***/
+
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <unicode/ucol.h>
+#include <unicode/usearch.h>
+#include <unicode/ustdio.h>
+#include <unicode/ustring.h>
+
+#define BUFSZ 1024
+
+int main(int argc, char **argv)
+{
+ char *locale;
+ UFILE *in;
+ UCollator *col;
+ UStringSearch *srch = NULL;
+ UErrorCode status = U_ZERO_ERROR;
+ UChar *needle, line[BUFSZ];
+ UColAttributeValue strength;
+ int ignoreInsignificant = 0, asymmetric = 0;
+ size_t n;
+ long i;
+
+ if (argc != 3)
+ {
+ fprintf(stderr,
+ "Usage: %s {1,2,@,3}[i] pattern\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ /* cryptic parsing for our cryptic options */
+ switch (*argv[1])
+ {
+ case '1':
+ strength = UCOL_PRIMARY;
+ break;
+ case '2':
+ strength = UCOL_SECONDARY;
+ break;
+ case '@':
+ strength = UCOL_SECONDARY, asymmetric = 1;
+ break;
+ case '3':
+ strength = UCOL_TERTIARY;
+ break;
+ default:
+ fprintf(stderr,
+ "Unknown strength: %s\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+ /* length of argv[1] is >0 or we would have died */
+ ignoreInsignificant = argv[1][strlen(argv[1])-1] == 'i';
+
+ n = strlen(argv[2]) + 1;
+ /* if UTF-8 could encode it in n, then UTF-16
+ * should be able to as well */
+ needle = malloc(n * sizeof(*needle));
+ u_strFromUTF8(needle, n, NULL, argv[2], -1, &status);
+
+ /* searching is a degenerate case of collation,
+ * so we read the LC_COLLATE locale */
+ if (!(locale = setlocale(LC_COLLATE, "")))
+ {
+ fputs("Cannot determine system collation locale\n",
+ stderr);
+ return EXIT_FAILURE;
+ }
+
+ if (!(in = u_finit(stdin, NULL, NULL)))
+ {
+ fputs("Error opening stdin as UFILE\n", stderr);
+ return EXIT_FAILURE;
+ }
+
+ col = ucol_open(locale, &status);
+ ucol_setStrength(col, strength);
+
+ if (ignoreInsignificant)
+ /* shift ignorable characters down to
+ * quaternary level */
+ ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING,
+ UCOL_SHIFTED, &status);
+
+ /* Assumes all lines fit in BUFSZ. Should
+ * fix this in real code and not increment i */
+ for (i = 1; u_fgets(line, BUFSZ, in); ++i)
+ {
+ /* first time through, set up all options */
+ if (!srch)
+ {
+ srch = usearch_openFromCollator(
+ needle, -1, line, -1,
+ col, NULL, &status
+ );
+ if (asymmetric)
+ usearch_setAttribute(
+ srch, USEARCH_ELEMENT_COMPARISON,
+ USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD,
+ &status
+ );
+ }
+ /* afterward just switch text */
+ else
+ usearch_setText(srch, line, -1, &status);
+
+ /* check if keyword appears in line */
+ if (usearch_first(srch, &status) != USEARCH_DONE)
+ u_printf("%ld: %S", i, line);
+ }
+
+ usearch_close(srch);
+ ucol_close(col);
+ u_fclose(in);
+ free(needle);
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/icu4c.mk b/src/icu4c.mk
index 133bffc..b1b9b4c 100644
--- a/src/icu4c.mk
+++ b/src/icu4c.mk
@@ -33,23 +33,32 @@ define $(PKG)_BUILD_COMMON
--with-cross-build='$(PREFIX)/$(BUILD)/$(PKG)' \
CFLAGS=-DU_USING_ICU_NAMESPACE=0 \
CXXFLAGS='--std=gnu++0x' \
- SHELL=bash \
+ SHELL=$(SHELL) \
+ LIBS='-lstdc++' \
$($(PKG)_CONFIGURE_OPTS)
- $(MAKE) -C '$(BUILD_DIR)' -j '$(JOBS)'
- $(MAKE) -C '$(BUILD_DIR)' -j 1 install
+ $(MAKE) -C '$(BUILD_DIR)' -j '$(JOBS)' VERBOSE=1
+ $(MAKE) -C '$(BUILD_DIR)' -j 1 install VERBOSE=1
ln -sf '$(PREFIX)/$(TARGET)/bin/icu-config' '$(PREFIX)/bin/$(TARGET)-icu-config'
endef
+define $(PKG)_BUILD_TEST
+ '$(TARGET)-gcc' \
+ -W -Wall -Werror -ansi -pedantic \
+ '$(TEST_FILE)' -o '$(PREFIX)/$(TARGET)/bin/test-$(PKG).exe' \
+ `'$(TARGET)-pkg-config' icu-uc icu-io --cflags --libs`
+endef
+
define $(PKG)_BUILD_SHARED
$($(PKG)_BUILD_COMMON)
# icu4c installs its DLLs to lib/. Move them to bin/.
mv -fv $(PREFIX)/$(TARGET)/lib/icu*.dll '$(PREFIX)/$(TARGET)/bin/'
# add symlinks icu*<version>.dll.a to icu*.dll.a
- for lib in `ls '$(PREFIX)/$(TARGET)/lib/' | grep 'icu.*\.dll\.a' | cut -d '.' -f 1 | tr '\n' ' '`; \
+ for lib in $$(ls '$(PREFIX)/$(TARGET)/lib/' | grep 'icu.*\.dll\.a' | cut -d '.' -f 1 | tr '\n' ' '); \
do \
ln -fs "$(PREFIX)/$(TARGET)/lib/$${lib}.dll.a" "$(PREFIX)/$(TARGET)/lib/$${lib}$($(PKG)_MAJOR).dll.a"; \
done
+ $($(PKG)_BUILD_TEST)
endef
define $(PKG)_BUILD
@@ -57,4 +66,5 @@ define $(PKG)_BUILD
# Static libs are prefixed with an `s` but the config script
# doesn't detect it properly, despite the STATIC_PREFIX="s" line
$(SED) -i 's,ICUPREFIX="icu",ICUPREFIX="sicu",' '$(PREFIX)/$(TARGET)/bin/icu-config'
+ $($(PKG)_BUILD_TEST)
endef