diff options
Diffstat (limited to 'libxslt/xsltlocale.c')
-rw-r--r-- | libxslt/xsltlocale.c | 525 |
1 files changed, 525 insertions, 0 deletions
diff --git a/libxslt/xsltlocale.c b/libxslt/xsltlocale.c new file mode 100644 index 0000000..b5fe986 --- /dev/null +++ b/libxslt/xsltlocale.c @@ -0,0 +1,525 @@ +/* + * xsltlocale.c: locale handling + * + * Reference: + * RFC 3066: Tags for the Identification of Languages + * http://www.ietf.org/rfc/rfc3066.txt + * ISO 639-1, ISO 3166-1 + * + * Author: Nick Wellnhofer + * winapi port: Roumen Petrov + */ + +#define IN_LIBXSLT +#include "libxslt.h" + +#include <string.h> +#include <libxml/xmlmemory.h> + +#include "xsltlocale.h" +#include "xsltutils.h" + +#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2 +#define newlocale __newlocale +#define freelocale __freelocale +#define strxfrm_l __strxfrm_l +#define LC_COLLATE_MASK (1 << LC_COLLATE) +#endif + +#define TOUPPER(c) (c & ~0x20) +#define TOLOWER(c) (c | 0x20) +#define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26) + +/*without terminating null character*/ +#define XSLTMAX_ISO639LANGLEN 8 +#define XSLTMAX_ISO3166CNTRYLEN 8 + /* <lang>-<cntry> */ +#define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN) + +static const xmlChar* xsltDefaultRegion(const xmlChar *localeName); + +#ifdef XSLT_LOCALE_WINAPI +xmlRMutexPtr xsltLocaleMutex = NULL; + +struct xsltRFC1766Info_s { + /*note typedef unsigned char xmlChar !*/ + xmlChar tag[XSLTMAX_LANGTAGLEN+1]; + /*note typedef LCID xsltLocale !*/ + xsltLocale lcid; +}; +typedef struct xsltRFC1766Info_s xsltRFC1766Info; + +static int xsltLocaleListSize = 0; +static xsltRFC1766Info *xsltLocaleList = NULL; + + +static xsltLocale +xslt_locale_WINAPI(const xmlChar *languageTag) { + int k; + xsltRFC1766Info *p = xsltLocaleList; + + for (k=0; k<xsltLocaleListSize; k++, p++) + if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid; + return((xsltLocale)0); +} + +static void xsltEnumSupportedLocales(void); +#endif + +/** + * xsltFreeLocales: + * + * Cleanup function for the locale support on shutdown + */ +void +xsltFreeLocales(void) { +#ifdef XSLT_LOCALE_WINAPI + xmlRMutexLock(xsltLocaleMutex); + xmlFree(xsltLocaleList); + xsltLocaleList = NULL; + xmlRMutexUnlock(xsltLocaleMutex); +#endif +} + +/** + * xsltNewLocale: + * @languageTag: RFC 3066 language tag + * + * Creates a new locale of an opaque system dependent type based on the + * language tag. + * + * Returns the locale or NULL on error or if no matching locale was found + */ +xsltLocale +xsltNewLocale(const xmlChar *languageTag) { +#ifdef XSLT_LOCALE_XLOCALE + xsltLocale locale; + char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */ + const xmlChar *p = languageTag; + const char *region = NULL; + char *q = localeName; + int i, llen; + + /* Convert something like "pt-br" to "pt_BR.utf8" */ + + if (languageTag == NULL) + return(NULL); + + for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) + *q++ = TOLOWER(*p++); + + if (i == 0) + return(NULL); + + llen = i; + + if (*p) { + if (*p++ != '-') + return(NULL); + *q++ = '_'; + + for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) + *q++ = TOUPPER(*p++); + + if (i == 0 || *p) + return(NULL); + + memcpy(q, ".utf8", 6); + locale = newlocale(LC_COLLATE_MASK, localeName, NULL); + if (locale != NULL) + return(locale); + + /* Continue without using country code */ + + q = localeName + llen; + } + + /* Try locale without territory, e.g. for Esperanto (eo) */ + + memcpy(q, ".utf8", 6); + locale = newlocale(LC_COLLATE_MASK, localeName, NULL); + if (locale != NULL) + return(locale); + + /* Try to find most common country for language */ + + if (llen != 2) + return(NULL); + + region = (char *)xsltDefaultRegion((xmlChar *)localeName); + if (region == NULL) + return(NULL); + + q = localeName + llen; + *q++ = '_'; + *q++ = region[0]; + *q++ = region[1]; + memcpy(q, ".utf8", 6); + locale = newlocale(LC_COLLATE_MASK, localeName, NULL); + + return(locale); +#endif + +#ifdef XSLT_LOCALE_WINAPI +{ + xsltLocale locale = (xsltLocale)0; + xmlChar localeName[XSLTMAX_LANGTAGLEN+1]; + xmlChar *q = localeName; + const xmlChar *p = languageTag; + int i, llen; + const xmlChar *region = NULL; + + if (languageTag == NULL) goto end; + + xsltEnumSupportedLocales(); + + for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) + *q++ = TOLOWER(*p++); + if (i == 0) goto end; + + llen = i; + *q++ = '-'; + if (*p) { /*if country tag is given*/ + if (*p++ != '-') goto end; + + for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) + *q++ = TOUPPER(*p++); + if (i == 0 || *p) goto end; + + *q = '\0'; + locale = xslt_locale_WINAPI(localeName); + if (locale != (xsltLocale)0) goto end; + } + /* Try to find most common country for language */ + region = xsltDefaultRegion(localeName); + if (region == NULL) goto end; + + strcpy(localeName + llen + 1, region); + locale = xslt_locale_WINAPI(localeName); +end: + return(locale); +} +#endif + +#ifdef XSLT_LOCALE_NONE + return(NULL); +#endif +} + +static const xmlChar* +xsltDefaultRegion(const xmlChar *localeName) { + xmlChar c; + /* region should be xmlChar, but gcc warns on all string assignments */ + const char *region = NULL; + + c = localeName[1]; + /* This is based on the locales from glibc 2.3.3 */ + + switch (localeName[0]) { + case 'a': + if (c == 'a' || c == 'm') region = "ET"; + else if (c == 'f') region = "ZA"; + else if (c == 'n') region = "ES"; + else if (c == 'r') region = "AE"; + else if (c == 'z') region = "AZ"; + break; + case 'b': + if (c == 'e') region = "BY"; + else if (c == 'g') region = "BG"; + else if (c == 'n') region = "BD"; + else if (c == 'r') region = "FR"; + else if (c == 's') region = "BA"; + break; + case 'c': + if (c == 'a') region = "ES"; + else if (c == 's') region = "CZ"; + else if (c == 'y') region = "GB"; + break; + case 'd': + if (c == 'a') region = "DK"; + else if (c == 'e') region = "DE"; + break; + case 'e': + if (c == 'l') region = "GR"; + else if (c == 'n' || c == 'o') region = "US"; + else if (c == 's' || c == 'u') region = "ES"; + else if (c == 't') region = "EE"; + break; + case 'f': + if (c == 'a') region = "IR"; + else if (c == 'i') region = "FI"; + else if (c == 'o') region = "FO"; + else if (c == 'r') region = "FR"; + break; + case 'g': + if (c == 'a') region = "IE"; + else if (c == 'l') region = "ES"; + else if (c == 'v') region = "GB"; + break; + case 'h': + if (c == 'e') region = "IL"; + else if (c == 'i') region = "IN"; + else if (c == 'r') region = "HT"; + else if (c == 'u') region = "HU"; + break; + case 'i': + if (c == 'd') region = "ID"; + else if (c == 's') region = "IS"; + else if (c == 't') region = "IT"; + else if (c == 'w') region = "IL"; + break; + case 'j': + if (c == 'a') region = "JP"; + break; + case 'k': + if (c == 'l') region = "GL"; + else if (c == 'o') region = "KR"; + else if (c == 'w') region = "GB"; + break; + case 'l': + if (c == 't') region = "LT"; + else if (c == 'v') region = "LV"; + break; + case 'm': + if (c == 'k') region = "MK"; + else if (c == 'l' || c == 'r') region = "IN"; + else if (c == 'n') region = "MN"; + else if (c == 's') region = "MY"; + else if (c == 't') region = "MT"; + break; + case 'n': + if (c == 'b' || c == 'n' || c == 'o') region = "NO"; + else if (c == 'e') region = "NP"; + else if (c == 'l') region = "NL"; + break; + case 'o': + if (c == 'm') region = "ET"; + break; + case 'p': + if (c == 'a') region = "IN"; + else if (c == 'l') region = "PL"; + else if (c == 't') region = "PT"; + break; + case 'r': + if (c == 'o') region = "RO"; + else if (c == 'u') region = "RU"; + break; + case 's': + switch (c) { + case 'e': region = "NO"; break; + case 'h': region = "YU"; break; + case 'k': region = "SK"; break; + case 'l': region = "SI"; break; + case 'o': region = "ET"; break; + case 'q': region = "AL"; break; + case 't': region = "ZA"; break; + case 'v': region = "SE"; break; + } + break; + case 't': + if (c == 'a' || c == 'e') region = "IN"; + else if (c == 'h') region = "TH"; + else if (c == 'i') region = "ER"; + else if (c == 'r') region = "TR"; + else if (c == 't') region = "RU"; + break; + case 'u': + if (c == 'k') region = "UA"; + else if (c == 'r') region = "PK"; + break; + case 'v': + if (c == 'i') region = "VN"; + break; + case 'w': + if (c == 'a') region = "BE"; + break; + case 'x': + if (c == 'h') region = "ZA"; + break; + case 'z': + if (c == 'h') region = "CN"; + else if (c == 'u') region = "ZA"; + break; + } + return((xmlChar *)region); +} + +/** + * xsltFreeLocale: + * @locale: the locale to free + * + * Frees a locale created with xsltNewLocale + */ +void +xsltFreeLocale(xsltLocale locale) { +#ifdef XSLT_LOCALE_XLOCALE + freelocale(locale); +#endif +} + +/** + * xsltStrxfrm: + * @locale: locale created with xsltNewLocale + * @string: UTF-8 string to transform + * + * Transforms a string according to locale. The transformed string must then be + * compared with xsltLocaleStrcmp and freed with xmlFree. + * + * Returns the transformed string or NULL on error + */ +xsltLocaleChar * +xsltStrxfrm(xsltLocale locale, const xmlChar *string) +{ +#ifdef XSLT_LOCALE_NONE + return(NULL); +#else + size_t xstrlen, r; + xsltLocaleChar *xstr; + +#ifdef XSLT_LOCALE_XLOCALE + xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1; + xstr = (xsltLocaleChar *) xmlMalloc(xstrlen); + if (xstr == NULL) { + xsltTransformError(NULL, NULL, NULL, + "xsltStrxfrm : out of memory error\n"); + return(NULL); + } + + r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale); +#endif + +#ifdef XSLT_LOCALE_WINAPI + xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0); + if (xstrlen == 0) { + xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n"); + return(NULL); + } + xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar)); + if (xstr == NULL) { + xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n"); + return(NULL); + } + r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen); + if (r == 0) { + xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n"); + xmlFree(xstr); + return(NULL); + } + return(xstr); +#endif /* XSLT_LOCALE_WINAPI */ + + if (r >= xstrlen) { + xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n"); + xmlFree(xstr); + return(NULL); + } + + return(xstr); +#endif /* XSLT_LOCALE_NONE */ +} + +/** + * xsltLocaleStrcmp: + * @locale: a locale identifier + * @str1: a string transformed with xsltStrxfrm + * @str2: a string transformed with xsltStrxfrm + * + * Compares two strings transformed with xsltStrxfrm + * + * Returns a value < 0 if str1 sorts before str2, + * a value > 0 if str1 sorts after str2, + * 0 if str1 and str2 are equal wrt sorting + */ +int +xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) { + (void)locale; +#ifdef XSLT_LOCALE_WINAPI +{ + int ret; + if (str1 == str2) return(0); + if (str1 == NULL) return(-1); + if (str2 == NULL) return(1); + ret = CompareStringW(locale, 0, str1, -1, str2, -1); + if (ret == 0) { + xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n"); + return(0); + } + return(ret - 2); +} +#else + return(xmlStrcmp(str1, str2)); +#endif +} + +#ifdef XSLT_LOCALE_WINAPI +/** + * xsltCountSupportedLocales: + * @lcid: not used + * + * callback used to count locales + * + * Returns TRUE + */ +BOOL CALLBACK +xsltCountSupportedLocales(LPSTR lcid) { + (void) lcid; + ++xsltLocaleListSize; + return(TRUE); +} + +/** + * xsltIterateSupportedLocales: + * @lcid: not used + * + * callback used to track locales + * + * Returns TRUE if not at the end of the array + */ +BOOL CALLBACK +xsltIterateSupportedLocales(LPSTR lcid) { + static int count = 0; + xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1]; + xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1]; + int k, l; + xsltRFC1766Info *p = xsltLocaleList + count; + + k = sscanf(lcid, "%lx", (long*)&p->lcid); + if (k < 1) goto end; + /*don't count terminating null character*/ + k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang )); + if (--k < 1) goto end; + l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry)); + if (--l < 1) goto end; + + { /*fill results*/ + xmlChar *q = p->tag; + memcpy(q, iso639lang, k); + q += k; + *q++ = '-'; + memcpy(q, iso3136ctry, l); + q += l; + *q = '\0'; + } + ++count; +end: + return((count < xsltLocaleListSize) ? TRUE : FALSE); +} + + +static void +xsltEnumSupportedLocales(void) { + xmlRMutexLock(xsltLocaleMutex); + if (xsltLocaleListSize <= 0) { + size_t len; + + EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED); + + len = xsltLocaleListSize * sizeof(xsltRFC1766Info); + xsltLocaleList = xmlMalloc(len); + memset(xsltLocaleList, 0, len); + EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED); + } + xmlRMutexUnlock(xsltLocaleMutex); +} + +#endif /*def XSLT_LOCALE_WINAPI*/ |