From b556c1feccfb6f7985b80cc4fc906ab529ea6f01 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 6 Nov 2019 09:29:29 +0000 Subject: A few post-1.2.0 commits, extracted from support/1.x branch --- libtommath/appveyor.yml | 1 + libtommath/bn_mp_mul.c | 2 +- libtommath/bn_s_mp_mul_high_digs_fast.c | 4 +- libtommath/changes.txt | 2 +- libtommath/demo/shared.c | 42 + libtommath/demo/shared.h | 21 + libtommath/demo/test.c | 2522 +++++++++++++++++++++++++++++++ libtommath/helper.pl | 2 +- libtommath/makefile_include.mk | 8 +- libtommath/testme.sh | 394 +++++ libtommath/tommath.h | 15 +- 11 files changed, 3003 insertions(+), 10 deletions(-) create mode 100644 libtommath/demo/shared.c create mode 100644 libtommath/demo/shared.h create mode 100644 libtommath/demo/test.c create mode 100755 libtommath/testme.sh diff --git a/libtommath/appveyor.yml b/libtommath/appveyor.yml index 187a09a..08bb013 100644 --- a/libtommath/appveyor.yml +++ b/libtommath/appveyor.yml @@ -4,6 +4,7 @@ branches: - master - develop - /^release/ + - /^support/ - /^travis/ image: - Visual Studio 2019 diff --git a/libtommath/bn_mp_mul.c b/libtommath/bn_mp_mul.c index 561913a..91707cd 100644 --- a/libtommath/bn_mp_mul.c +++ b/libtommath/bn_mp_mul.c @@ -17,7 +17,7 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c) * The bigger one needs to be at least about one MP_KARATSUBA_MUL_CUTOFF bigger * to make some sense, but it depends on architecture, OS, position of the * stars... so YMMV. - * Using it to cut the input into slices small enough for fast_s_mp_mul_digs + * Using it to cut the input into slices small enough for s_mp_mul_digs_fast * was actually slower on the author's machine, but YMMV. */ (min_len >= MP_KARATSUBA_MUL_CUTOFF) && diff --git a/libtommath/bn_s_mp_mul_high_digs_fast.c b/libtommath/bn_s_mp_mul_high_digs_fast.c index a2c4fb6..a0513b4 100644 --- a/libtommath/bn_s_mp_mul_high_digs_fast.c +++ b/libtommath/bn_s_mp_mul_high_digs_fast.c @@ -3,8 +3,8 @@ /* LibTomMath, multiple-precision integer library -- Tom St Denis */ /* SPDX-License-Identifier: Unlicense */ -/* this is a modified version of fast_s_mul_digs that only produces - * output digits *above* digs. See the comments for fast_s_mul_digs +/* this is a modified version of s_mp_mul_digs_fast that only produces + * output digits *above* digs. See the comments for s_mp_mul_digs_fast * to see how it works. * * This is used in the Barrett reduction since for one of the multiplications diff --git a/libtommath/changes.txt b/libtommath/changes.txt index ebf7382..1b3a7a3 100644 --- a/libtommath/changes.txt +++ b/libtommath/changes.txt @@ -1,4 +1,4 @@ -XXX XXth, 2019 +Oct 22nd, 2019 v1.2.0 -- A huge refactoring of the library happened - renaming, deprecating and replacing existing functions by improved API's. diff --git a/libtommath/demo/shared.c b/libtommath/demo/shared.c new file mode 100644 index 0000000..dc8e05a --- /dev/null +++ b/libtommath/demo/shared.c @@ -0,0 +1,42 @@ +#include "shared.h" + +void ndraw(mp_int *a, const char *name) +{ + char *buf = NULL; + int size; + + mp_radix_size(a, 10, &size); + buf = (char *)malloc((size_t) size); + if (buf == NULL) { + fprintf(stderr, "\nndraw: malloc(%d) failed\n", size); + exit(EXIT_FAILURE); + } + + printf("%s: ", name); + mp_to_decimal(a, buf, (size_t) size); + printf("%s\n", buf); + mp_to_hex(a, buf, (size_t) size); + printf("0x%s\n", buf); + + free(buf); +} + +void print_header(void) +{ +#ifdef MP_8BIT + printf("Digit size 8 Bit \n"); +#endif +#ifdef MP_16BIT + printf("Digit size 16 Bit \n"); +#endif +#ifdef MP_32BIT + printf("Digit size 32 Bit \n"); +#endif +#ifdef MP_64BIT + printf("Digit size 64 Bit \n"); +#endif + printf("Size of mp_digit: %u\n", (unsigned int)sizeof(mp_digit)); + printf("Size of mp_word: %u\n", (unsigned int)sizeof(mp_word)); + printf("MP_DIGIT_BIT: %d\n", MP_DIGIT_BIT); + printf("MP_PREC: %d\n", MP_PREC); +} diff --git a/libtommath/demo/shared.h b/libtommath/demo/shared.h new file mode 100644 index 0000000..4d5eb53 --- /dev/null +++ b/libtommath/demo/shared.h @@ -0,0 +1,21 @@ +#include +#include +#include + +/* + * Configuration + */ +#ifndef LTM_DEMO_TEST_REDUCE_2K_L +/* This test takes a moment so we disable it by default, but it can be: + * 0 to disable testing + * 1 to make the test with P = 2^1024 - 0x2A434 B9FDEC95 D8F9D550 FFFFFFFF FFFFFFFF + * 2 to make the test with P = 2^2048 - 0x1 00000000 00000000 00000000 00000000 4945DDBF 8EA2A91D 5776399B B83E188F + */ +#define LTM_DEMO_TEST_REDUCE_2K_L 0 +#endif + +#define MP_WUR /* TODO: result checks disabled for now */ +#include "tommath_private.h" + +extern void ndraw(mp_int* a, const char* name); +extern void print_header(void); diff --git a/libtommath/demo/test.c b/libtommath/demo/test.c new file mode 100644 index 0000000..7b29a4c --- /dev/null +++ b/libtommath/demo/test.c @@ -0,0 +1,2522 @@ +#include +#include "shared.h" + +static long rand_long(void) +{ + long x; + if (s_mp_rand_source(&x, sizeof(x)) != MP_OKAY) { + fprintf(stderr, "s_mp_rand_source failed\n"); + exit(EXIT_FAILURE); + } + return x; +} + +static int rand_int(void) +{ + int x; + if (s_mp_rand_source(&x, sizeof(x)) != MP_OKAY) { + fprintf(stderr, "s_mp_rand_source failed\n"); + exit(EXIT_FAILURE); + } + return x; +} + +static int32_t rand_int32(void) +{ + int32_t x; + if (s_mp_rand_source(&x, sizeof(x)) != MP_OKAY) { + fprintf(stderr, "s_mp_rand_source failed\n"); + exit(EXIT_FAILURE); + } + return x; +} + +static int64_t rand_int64(void) +{ + int64_t x; + if (s_mp_rand_source(&x, sizeof(x)) != MP_OKAY) { + fprintf(stderr, "s_mp_rand_source failed\n"); + exit(EXIT_FAILURE); + } + return x; +} + +static uint32_t uabs32(int32_t x) +{ + return x > 0 ? (uint32_t)x : -(uint32_t)x; +} + +static uint64_t uabs64(int64_t x) +{ + return x > 0 ? (uint64_t)x : -(uint64_t)x; +} + +/* This function prototype is needed + * to test dead code elimination + * which is used for feature detection. + * + * If the feature detection does not + * work as desired we will get a linker error. + */ +void does_not_exist(void); + +static int test_feature_detection(void) +{ +#define BN_TEST_FEATURE1_C + if (!MP_HAS(TEST_FEATURE1)) { + does_not_exist(); + return EXIT_FAILURE; + } + +#define BN_TEST_FEATURE2_C 1 + if (MP_HAS(TEST_FEATURE2)) { + does_not_exist(); + return EXIT_FAILURE; + } + +#define BN_TEST_FEATURE3_C 0 + if (MP_HAS(TEST_FEATURE3)) { + does_not_exist(); + return EXIT_FAILURE; + } + +#define BN_TEST_FEATURE4_C something + if (MP_HAS(TEST_FEATURE4)) { + does_not_exist(); + return EXIT_FAILURE; + } + + if (MP_HAS(TEST_FEATURE5)) { + does_not_exist(); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +static int test_trivial_stuff(void) +{ + mp_int a, b, c, d; + mp_err e; + if ((e = mp_init_multi(&a, &b, &c, &d, NULL)) != MP_OKAY) { + return EXIT_FAILURE; + } + (void)mp_error_to_string(e); + + /* a: 0->5 */ + mp_set(&a, 5u); + /* a: 5-> b: -5 */ + mp_neg(&a, &b); + if (mp_cmp(&a, &b) != MP_GT) { + goto LBL_ERR; + } + if (mp_cmp(&b, &a) != MP_LT) { + goto LBL_ERR; + } + /* a: 5-> a: -5 */ + mp_neg(&a, &a); + if (mp_cmp(&b, &a) != MP_EQ) { + goto LBL_ERR; + } + /* a: -5-> b: 5 */ + mp_abs(&a, &b); + if (mp_isneg(&b) != MP_NO) { + goto LBL_ERR; + } + /* a: -5-> b: -4 */ + mp_add_d(&a, 1uL, &b); + if (mp_isneg(&b) != MP_YES) { + goto LBL_ERR; + } + if (mp_get_i32(&b) != -4) { + goto LBL_ERR; + } + if (mp_get_u32(&b) != (uint32_t)-4) { + goto LBL_ERR; + } + if (mp_get_mag_u32(&b) != 4) { + goto LBL_ERR; + } + /* a: -5-> b: 1 */ + mp_add_d(&a, 6uL, &b); + if (mp_get_u32(&b) != 1) { + goto LBL_ERR; + } + /* a: -5-> a: 1 */ + mp_add_d(&a, 6uL, &a); + if (mp_get_u32(&a) != 1) { + goto LBL_ERR; + } + mp_zero(&a); + /* a: 0-> a: 6 */ + mp_add_d(&a, 6uL, &a); + if (mp_get_u32(&a) != 6) { + goto LBL_ERR; + } + + mp_set(&a, 42u); + mp_set(&b, 1u); + mp_neg(&b, &b); + mp_set(&c, 1u); + mp_exptmod(&a, &b, &c, &d); + + mp_set(&c, 7u); + mp_exptmod(&a, &b, &c, &d); + + if (mp_iseven(&a) == mp_isodd(&a)) { + goto LBL_ERR; + } + + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_FAILURE; +} + +static int check_get_set_i32(mp_int *a, int32_t b) +{ + mp_clear(a); + if (mp_shrink(a) != MP_OKAY) return EXIT_FAILURE; + + mp_set_i32(a, b); + if (mp_shrink(a) != MP_OKAY) return EXIT_FAILURE; + if (mp_get_i32(a) != b) return EXIT_FAILURE; + if (mp_get_u32(a) != (uint32_t)b) return EXIT_FAILURE; + if (mp_get_mag_u32(a) != uabs32(b)) return EXIT_FAILURE; + + mp_set_u32(a, (uint32_t)b); + if (mp_get_u32(a) != (uint32_t)b) return EXIT_FAILURE; + if (mp_get_i32(a) != (int32_t)(uint32_t)b) return EXIT_FAILURE; + + return EXIT_SUCCESS; +} + +static int test_mp_get_set_i32(void) +{ + int i; + mp_int a; + + if (mp_init(&a) != MP_OKAY) { + return EXIT_FAILURE; + } + + check_get_set_i32(&a, 0); + check_get_set_i32(&a, -1); + check_get_set_i32(&a, 1); + check_get_set_i32(&a, INT32_MIN); + check_get_set_i32(&a, INT32_MAX); + + for (i = 0; i < 1000; ++i) { + int32_t b = rand_int32(); + if (check_get_set_i32(&a, b) != EXIT_SUCCESS) { + goto LBL_ERR; + } + } + + mp_clear(&a); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear(&a); + return EXIT_FAILURE; +} + +static int check_get_set_i64(mp_int *a, int64_t b) +{ + mp_clear(a); + if (mp_shrink(a) != MP_OKAY) return EXIT_FAILURE; + + mp_set_i64(a, b); + if (mp_shrink(a) != MP_OKAY) return EXIT_FAILURE; + if (mp_get_i64(a) != b) return EXIT_FAILURE; + if (mp_get_u64(a) != (uint64_t)b) return EXIT_FAILURE; + if (mp_get_mag_u64(a) != uabs64(b)) return EXIT_FAILURE; + + mp_set_u64(a, (uint64_t)b); + if (mp_get_u64(a) != (uint64_t)b) return EXIT_FAILURE; + if (mp_get_i64(a) != (int64_t)(uint64_t)b) return EXIT_FAILURE; + + return EXIT_SUCCESS; +} + +static int test_mp_get_set_i64(void) +{ + int i; + mp_int a; + + if (mp_init(&a) != MP_OKAY) { + return EXIT_FAILURE; + } + + check_get_set_i64(&a, 0); + check_get_set_i64(&a, -1); + check_get_set_i64(&a, 1); + check_get_set_i64(&a, INT64_MIN); + check_get_set_i64(&a, INT64_MAX); + + for (i = 0; i < 1000; ++i) { + int64_t b = rand_int64(); + if (check_get_set_i64(&a, b) != EXIT_SUCCESS) { + goto LBL_ERR; + } + } + + mp_clear(&a); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear(&a); + return EXIT_FAILURE; +} + +static int test_mp_fread_fwrite(void) +{ + mp_int a, b; + mp_err e; + FILE *tmp = NULL; + if ((e = mp_init_multi(&a, &b, NULL)) != MP_OKAY) { + return EXIT_FAILURE; + } + + mp_set_ul(&a, 123456uL); + tmp = tmpfile(); + if ((e = mp_fwrite(&a, 64, tmp)) != MP_OKAY) { + goto LBL_ERR; + } + rewind(tmp); + if ((e = mp_fread(&b, 64, tmp)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_get_u32(&b) != 123456uL) { + goto LBL_ERR; + } + fclose(tmp); + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + if (tmp != NULL) fclose(tmp); + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +} + +static mp_err very_random_source(void *out, size_t size) +{ + memset(out, 0xff, size); + return MP_OKAY; +} + +static int test_mp_rand(void) +{ + mp_int a, b; + int n; + mp_err err; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + mp_rand_source(very_random_source); + for (n = 1; n < 1024; ++n) { + if ((err = mp_rand(&a, n)) != MP_OKAY) { + printf("Failed mp_rand() %s.\n", mp_error_to_string(err)); + break; + } + if ((err = mp_incr(&a)) != MP_OKAY) { + printf("Failed mp_incr() %s.\n", mp_error_to_string(err)); + break; + } + if ((err = mp_div_2d(&a, n * MP_DIGIT_BIT, &b, NULL)) != MP_OKAY) { + printf("Failed mp_div_2d() %s.\n", mp_error_to_string(err)); + break; + } + if (mp_cmp_d(&b, 1) != MP_EQ) { + ndraw(&a, "mp_rand() a"); + ndraw(&b, "mp_rand() b"); + err = MP_ERR; + break; + } + } + mp_rand_source(s_mp_rand_jenkins); + mp_clear_multi(&a, &b, NULL); + return err == MP_OKAY ? EXIT_SUCCESS : EXIT_FAILURE; +} + +static int test_mp_kronecker(void) +{ + struct mp_kronecker_st { + long n; + int c[21]; + }; + static struct mp_kronecker_st kronecker[] = { + /*-10, -9, -8, -7,-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10*/ + { -10, { 0, -1, 0, -1, 0, 0, 0, 1, 0, -1, 0, 1, 0, -1, 0, 0, 0, 1, 0, 1, 0 } }, + { -9, { -1, 0, -1, 1, 0, -1, -1, 0, -1, -1, 0, 1, 1, 0, 1, 1, 0, -1, 1, 0, 1 } }, + { -8, { 0, -1, 0, 1, 0, 1, 0, -1, 0, -1, 0, 1, 0, 1, 0, -1, 0, -1, 0, 1, 0 } }, + { -7, { 1, -1, -1, 0, 1, 1, -1, 1, -1, -1, 0, 1, 1, -1, 1, -1, -1, 0, 1, 1, -1 } }, + { -6, { 0, 0, 0, -1, 0, -1, 0, 0, 0, -1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0 } }, + { -5, { 0, -1, 1, -1, 1, 0, -1, -1, 1, -1, 0, 1, -1, 1, 1, 0, -1, 1, -1, 1, 0 } }, + { -4, { 0, -1, 0, 1, 0, -1, 0, 1, 0, -1, 0, 1, 0, -1, 0, 1, 0, -1, 0, 1, 0 } }, + { -3, { -1, 0, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1 } }, + { -2, { 0, -1, 0, 1, 0, 1, 0, -1, 0, -1, 0, 1, 0, 1, 0, -1, 0, -1, 0, 1, 0 } }, + { -1, { -1, -1, -1, 1, 1, -1, -1, 1, -1, -1, 1, 1, 1, -1, 1, 1, -1, -1, 1, 1, 1 } }, + { 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { 1, { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } }, + { 2, { 0, 1, 0, 1, 0, -1, 0, -1, 0, 1, 0, 1, 0, -1, 0, -1, 0, 1, 0, 1, 0 } }, + { 3, { 1, 0, -1, -1, 0, -1, 1, 0, -1, 1, 0, 1, -1, 0, 1, -1, 0, -1, -1, 0, 1 } }, + { 4, { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 } }, + { 5, { 0, 1, -1, -1, 1, 0, 1, -1, -1, 1, 0, 1, -1, -1, 1, 0, 1, -1, -1, 1, 0 } }, + { 6, { 0, 0, 0, -1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, -1, 0, 0, 0 } }, + { 7, { -1, 1, 1, 0, 1, -1, 1, 1, 1, 1, 0, 1, 1, 1, 1, -1, 1, 0, 1, 1, -1 } }, + { 8, { 0, 1, 0, 1, 0, -1, 0, -1, 0, 1, 0, 1, 0, -1, 0, -1, 0, 1, 0, 1, 0 } }, + { 9, { 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 } }, + { 10, { 0, 1, 0, -1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, -1, 0, 1, 0 } } + }; + + long k, m; + int i, cnt; + mp_err err; + mp_int a, b; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + mp_set_ul(&a, 0uL); + mp_set_ul(&b, 1uL); + if ((err = mp_kronecker(&a, &b, &i)) != MP_OKAY) { + printf("Failed executing mp_kronecker(0 | 1) %s.\n", mp_error_to_string(err)); + goto LBL_ERR; + } + if (i != 1) { + printf("Failed trivial mp_kronecker(0 | 1) %d != 1\n", i); + goto LBL_ERR; + } + for (cnt = 0; cnt < (int)(sizeof(kronecker)/sizeof(kronecker[0])); ++cnt) { + k = kronecker[cnt].n; + mp_set_l(&a, k); + /* only test positive values of a */ + for (m = -10; m <= 10; m++) { + mp_set_l(&b, m); + if ((err = mp_kronecker(&a, &b, &i)) != MP_OKAY) { + printf("Failed executing mp_kronecker(%ld | %ld) %s.\n", kronecker[cnt].n, m, mp_error_to_string(err)); + goto LBL_ERR; + } + if ((err == MP_OKAY) && (i != kronecker[cnt].c[m + 10])) { + printf("Failed trivial mp_kronecker(%ld | %ld) %d != %d\n", kronecker[cnt].n, m, i, kronecker[cnt].c[m + 10]); + goto LBL_ERR; + } + } + } + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +} + +static int test_mp_complement(void) +{ + int i; + + mp_int a, b, c; + if (mp_init_multi(&a, &b, &c, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < 1000; ++i) { + long l = rand_long(); + mp_set_l(&a, l); + mp_complement(&a, &b); + + l = ~l; + mp_set_l(&c, l); + + if (mp_cmp(&b, &c) != MP_EQ) { + printf("\nmp_complement() bad result!"); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +static int test_mp_signed_rsh(void) +{ + int i; + + mp_int a, b, d; + if (mp_init_multi(&a, &b, &d, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < 1000; ++i) { + long l; + int em; + + l = rand_long(); + mp_set_l(&a, l); + + em = abs(rand_int()) % 32; + + mp_set_l(&d, l >> em); + + mp_signed_rsh(&a, em, &b); + if (mp_cmp(&b, &d) != MP_EQ) { + printf("\nmp_signed_rsh() bad result!"); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, &d, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &d, NULL); + return EXIT_FAILURE; + +} + +static int test_mp_xor(void) +{ + int i; + + mp_int a, b, c, d; + if (mp_init_multi(&a, &b, &c, &d, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < 1000; ++i) { + long l, em; + + l = rand_long(); + mp_set_l(&a,l); + + em = rand_long(); + mp_set_l(&b, em); + + mp_set_l(&d, l ^ em); + + mp_xor(&a, &b, &c); + if (mp_cmp(&c, &d) != MP_EQ) { + printf("\nmp_xor() bad result!"); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_FAILURE; + +} + +static int test_mp_or(void) +{ + int i; + + mp_int a, b, c, d; + if (mp_init_multi(&a, &b, &c, &d, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < 1000; ++i) { + long l, em; + + l = rand_long(); + mp_set_l(&a, l); + + em = rand_long(); + mp_set_l(&b, em); + + mp_set_l(&d, l | em); + + mp_or(&a, &b, &c); + if (mp_cmp(&c, &d) != MP_EQ) { + printf("\nmp_or() bad result!"); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_FAILURE; +} + +static int test_mp_and(void) +{ + int i; + + mp_int a, b, c, d; + if (mp_init_multi(&a, &b, &c, &d, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < 1000; ++i) { + long l, em; + + l = rand_long(); + mp_set_l(&a, l); + + em = rand_long(); + mp_set_l(&b, em); + + mp_set_l(&d, l & em); + + mp_and(&a, &b, &c); + if (mp_cmp(&c, &d) != MP_EQ) { + printf("\nmp_and() bad result!"); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_FAILURE; +} + +static int test_mp_invmod(void) +{ + mp_int a, b, c, d; + if (mp_init_multi(&a, &b, &c, &d, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + /* mp_invmod corner-case of https://github.com/libtom/libtommath/issues/118 */ + { + const char *a_ = "47182BB8DF0FFE9F61B1F269BACC066B48BA145D35137D426328DC3F88A5EA44"; + const char *b_ = "FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF"; + const char *should_ = "0521A82E10376F8E4FDEF9A32A427AC2A0FFF686E00290D39E3E4B5522409596"; + + if (mp_read_radix(&a, a_, 16) != MP_OKAY) { + printf("\nmp_read_radix(a) failed!"); + goto LBL_ERR; + } + if (mp_read_radix(&b, b_, 16) != MP_OKAY) { + printf("\nmp_read_radix(b) failed!"); + goto LBL_ERR; + } + if (mp_read_radix(&c, should_, 16) != MP_OKAY) { + printf("\nmp_read_radix(should) failed!"); + goto LBL_ERR; + } + + if (mp_invmod(&a, &b, &d) != MP_OKAY) { + printf("\nmp_invmod() failed!"); + goto LBL_ERR; + } + + if (mp_cmp(&c, &d) != MP_EQ) { + printf("\nmp_invmod() bad result!"); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_FAILURE; + +} + +#if defined(__STDC_IEC_559__) || defined(__GCC_IEC_559) +static int test_mp_set_double(void) +{ + int i; + + mp_int a, b; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + /* test mp_get_double/mp_set_double */ + if (mp_set_double(&a, +1.0/0.0) != MP_VAL) { + printf("\nmp_set_double should return MP_VAL for +inf"); + goto LBL_ERR; + } + if (mp_set_double(&a, -1.0/0.0) != MP_VAL) { + printf("\nmp_set_double should return MP_VAL for -inf"); + goto LBL_ERR; + } + if (mp_set_double(&a, +0.0/0.0) != MP_VAL) { + printf("\nmp_set_double should return MP_VAL for NaN"); + goto LBL_ERR; + } + if (mp_set_double(&a, -0.0/0.0) != MP_VAL) { + printf("\nmp_set_double should return MP_VAL for NaN"); + goto LBL_ERR; + } + + for (i = 0; i < 1000; ++i) { + int tmp = rand_int(); + double dbl = (double)tmp * rand_int() + 1; + if (mp_set_double(&a, dbl) != MP_OKAY) { + printf("\nmp_set_double() failed"); + goto LBL_ERR; + } + if (dbl != mp_get_double(&a)) { + printf("\nmp_get_double() bad result!"); + goto LBL_ERR; + } + if (mp_set_double(&a, -dbl) != MP_OKAY) { + printf("\nmp_set_double() failed"); + goto LBL_ERR; + } + if (-dbl != mp_get_double(&a)) { + printf("\nmp_get_double() bad result!"); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; + +} +#endif + +static int test_mp_get_u32(void) +{ + unsigned long t; + int i; + + mp_int a, b; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < 1000; ++i) { + t = (unsigned long)rand_long() & 0xFFFFFFFFuL; + mp_set_ul(&a, t); + if (t != mp_get_u32(&a)) { + printf("\nmp_get_u32() bad result!"); + goto LBL_ERR; + } + } + mp_set_ul(&a, 0uL); + if (mp_get_u32(&a) != 0) { + printf("\nmp_get_u32() bad result!"); + goto LBL_ERR; + } + mp_set_ul(&a, 0xFFFFFFFFuL); + if (mp_get_u32(&a) != 0xFFFFFFFFuL) { + printf("\nmp_get_u32() bad result!"); + goto LBL_ERR; + } + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +} + +static int test_mp_get_ul(void) +{ + unsigned long s, t; + int i; + + mp_int a, b; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < ((int)MP_SIZEOF_BITS(unsigned long) - 1); ++i) { + t = (1UL << (i+1)) - 1; + if (!t) + t = ~0UL; + printf(" t = 0x%lx i = %d\r", t, i); + do { + mp_set_ul(&a, t); + s = mp_get_ul(&a); + if (s != t) { + printf("\nmp_get_ul() bad result! 0x%lx != 0x%lx", s, t); + goto LBL_ERR; + } + t <<= 1; + } while (t != 0uL); + } + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +} + +static int test_mp_get_u64(void) +{ + unsigned long long q, r; + int i; + + mp_int a, b; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < (int)(MP_SIZEOF_BITS(unsigned long long) - 1); ++i) { + r = (1ULL << (i+1)) - 1; + if (!r) + r = ~0ULL; + printf(" r = 0x%llx i = %d\r", r, i); + do { + mp_set_u64(&a, r); + q = mp_get_u64(&a); + if (q != r) { + printf("\nmp_get_u64() bad result! 0x%llx != 0x%llx", q, r); + goto LBL_ERR; + } + r <<= 1; + } while (r != 0uLL); + } + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; + +} + +static int test_mp_sqrt(void) +{ + int i, n; + + mp_int a, b, c; + if (mp_init_multi(&a, &b, &c, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < 1000; ++i) { + printf("%6d\r", i); + fflush(stdout); + n = (rand_int() & 15) + 1; + mp_rand(&a, n); + if (mp_sqrt(&a, &b) != MP_OKAY) { + printf("\nmp_sqrt() error!"); + goto LBL_ERR; + } + mp_root_u32(&a, 2uL, &c); + if (mp_cmp_mag(&b, &c) != MP_EQ) { + printf("mp_sqrt() bad result!\n"); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +static int test_mp_is_square(void) +{ + int i, n; + + mp_int a, b; + mp_bool res; + + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + for (i = 0; i < 1000; ++i) { + printf("%6d\r", i); + fflush(stdout); + + /* test mp_is_square false negatives */ + n = (rand_int() & 7) + 1; + mp_rand(&a, n); + mp_sqr(&a, &a); + if (mp_is_square(&a, &res) != MP_OKAY) { + printf("\nfn:mp_is_square() error!"); + goto LBL_ERR; + } + if (res == MP_NO) { + printf("\nfn:mp_is_square() bad result!"); + goto LBL_ERR; + } + + /* test for false positives */ + mp_add_d(&a, 1uL, &a); + if (mp_is_square(&a, &res) != MP_OKAY) { + printf("\nfp:mp_is_square() error!"); + goto LBL_ERR; + } + if (res == MP_YES) { + printf("\nfp:mp_is_square() bad result!"); + goto LBL_ERR; + } + + } + printf("\n\n"); + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +} + +static int test_mp_sqrtmod_prime(void) +{ + struct mp_sqrtmod_prime_st { + unsigned long p; + unsigned long n; + mp_digit r; + }; + + static struct mp_sqrtmod_prime_st sqrtmod_prime[] = { + { 5, 14, 3 }, + { 7, 9, 4 }, + { 113, 2, 62 } + }; + int i; + + mp_int a, b, c; + if (mp_init_multi(&a, &b, &c, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + /* r^2 = n (mod p) */ + for (i = 0; i < (int)(sizeof(sqrtmod_prime)/sizeof(sqrtmod_prime[0])); ++i) { + mp_set_ul(&a, sqrtmod_prime[i].p); + mp_set_ul(&b, sqrtmod_prime[i].n); + if (mp_sqrtmod_prime(&b, &a, &c) != MP_OKAY) { + printf("Failed executing %d. mp_sqrtmod_prime\n", (i+1)); + goto LBL_ERR; + } + if (mp_cmp_d(&c, sqrtmod_prime[i].r) != MP_EQ) { + printf("Failed %d. trivial mp_sqrtmod_prime\n", (i+1)); + ndraw(&c, "r"); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +static int test_mp_prime_rand(void) +{ + int ix; + mp_err err; + mp_int a, b; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + /* test for size */ + for (ix = 10; ix < 128; ix++) { + printf("Testing (not safe-prime): %9d bits \r", ix); + fflush(stdout); + err = mp_prime_rand(&a, 8, ix, (rand_int() & 1) ? 0 : MP_PRIME_2MSB_ON); + if (err != MP_OKAY) { + printf("\nfailed with error: %s\n", mp_error_to_string(err)); + goto LBL_ERR; + } + if (mp_count_bits(&a) != ix) { + printf("Prime is %d not %d bits!!!\n", mp_count_bits(&a), ix); + goto LBL_ERR; + } + } + printf("\n"); + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +} + +static int test_mp_prime_is_prime(void) +{ + int ix; + mp_err err; + mp_bool cnt, fu; + + mp_int a, b; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + /* strong Miller-Rabin pseudoprime to the first 200 primes (F. Arnault) */ + puts("Testing mp_prime_is_prime() with Arnault's pseudoprime 803...901 \n"); + mp_read_radix(&a, + "91xLNF3roobhzgTzoFIG6P13ZqhOVYSN60Fa7Cj2jVR1g0k89zdahO9/kAiRprpfO1VAp1aBHucLFV/qLKLFb+zonV7R2Vxp1K13ClwUXStpV0oxTNQVjwybmFb5NBEHImZ6V7P6+udRJuH8VbMEnS0H8/pSqQrg82OoQQ2fPpAk6G1hkjqoCv5s/Yr", + 64); + mp_prime_is_prime(&a, mp_prime_rabin_miller_trials(mp_count_bits(&a)), &cnt); + if (cnt == MP_YES) { + printf("Arnault's pseudoprime is not prime but mp_prime_is_prime says it is.\n"); + goto LBL_ERR; + } + /* About the same size as Arnault's pseudoprime */ + puts("Testing mp_prime_is_prime() with certified prime 2^1119 + 53\n"); + mp_set(&a, 1uL); + mp_mul_2d(&a,1119,&a); + mp_add_d(&a, 53uL, &a); + err = mp_prime_is_prime(&a, mp_prime_rabin_miller_trials(mp_count_bits(&a)), &cnt); + /* small problem */ + if (err != MP_OKAY) { + printf("\nfailed with error: %s\n", mp_error_to_string(err)); + } + /* large problem */ + if (cnt == MP_NO) { + printf("A certified prime is a prime but mp_prime_is_prime says it is not.\n"); + } + if ((err != MP_OKAY) || (cnt == MP_NO)) { + printf("prime tested was: 0x"); + mp_fwrite(&a,16,stdout); + putchar('\n'); + goto LBL_ERR; + } + for (ix = 16; ix < 128; ix++) { + printf("Testing ( safe-prime): %9d bits \r", ix); + fflush(stdout); + err = mp_prime_rand(&a, 8, ix, ((rand_int() & 1) ? 0 : MP_PRIME_2MSB_ON) | MP_PRIME_SAFE); + if (err != MP_OKAY) { + printf("\nfailed with error: %s\n", mp_error_to_string(err)); + goto LBL_ERR; + } + if (mp_count_bits(&a) != ix) { + printf("Prime is %d not %d bits!!!\n", mp_count_bits(&a), ix); + goto LBL_ERR; + } + /* let's see if it's really a safe prime */ + mp_sub_d(&a, 1uL, &b); + mp_div_2(&b, &b); + err = mp_prime_is_prime(&b, mp_prime_rabin_miller_trials(mp_count_bits(&b)), &cnt); + /* small problem */ + if (err != MP_OKAY) { + printf("\nfailed with error: %s\n", mp_error_to_string(err)); + } + /* large problem */ + if (cnt == MP_NO) { + printf("\nsub is not prime!\n"); + } + mp_prime_frobenius_underwood(&b, &fu); + if (fu == MP_NO) { + printf("\nfrobenius-underwood says sub is not prime!\n"); + } + if ((err != MP_OKAY) || (cnt == MP_NO)) { + printf("prime tested was: 0x"); + mp_fwrite(&a,16,stdout); + putchar('\n'); + printf("sub tested was: 0x"); + mp_fwrite(&b,16,stdout); + putchar('\n'); + goto LBL_ERR; + } + + } + /* Check regarding problem #143 */ +#ifndef MP_8BIT + mp_read_radix(&a, + "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E088A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9A63A3620FFFFFFFFFFFFFFFF", + 16); + err = mp_prime_strong_lucas_selfridge(&a, &cnt); + /* small problem */ + if (err != MP_OKAY) { + printf("\nmp_prime_strong_lucas_selfridge failed with error: %s\n", mp_error_to_string(err)); + } + /* large problem */ + if (cnt == MP_NO) { + printf("\n\nissue #143 - mp_prime_strong_lucas_selfridge FAILED!\n"); + } + if ((err != MP_OKAY) || (cnt == MP_NO)) { + printf("prime tested was: 0x"); + mp_fwrite(&a,16,stdout); + putchar('\n'); + goto LBL_ERR; + } +#endif + + printf("\n\n"); + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; + +} + + +static int test_mp_prime_next_prime(void) +{ + mp_err err; + mp_int a, b, c; + + mp_init_multi(&a, &b, &c, NULL); + + + /* edge cases */ + mp_set(&a, 0u); + if ((err = mp_prime_next_prime(&a, 5, 0)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp_d(&a, 2u) != MP_EQ) { + printf("mp_prime_next_prime: output should have been 2 but was: "); + mp_fwrite(&a,10,stdout); + putchar('\n'); + goto LBL_ERR; + } + + mp_set(&a, 0u); + if ((err = mp_prime_next_prime(&a, 5, 1)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp_d(&a, 3u) != MP_EQ) { + printf("mp_prime_next_prime: output should have been 3 but was: "); + mp_fwrite(&a,10,stdout); + putchar('\n'); + goto LBL_ERR; + } + + mp_set(&a, 2u); + if ((err = mp_prime_next_prime(&a, 5, 0)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp_d(&a, 3u) != MP_EQ) { + printf("mp_prime_next_prime: output should have been 3 but was: "); + mp_fwrite(&a,10,stdout); + putchar('\n'); + goto LBL_ERR; + } + + mp_set(&a, 2u); + if ((err = mp_prime_next_prime(&a, 5, 1)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp_d(&a, 3u) != MP_EQ) { + printf("mp_prime_next_prime: output should have been 3 but was: "); + mp_fwrite(&a,10,stdout); + putchar('\n'); + goto LBL_ERR; + } + mp_set(&a, 8); + if ((err = mp_prime_next_prime(&a, 5, 1)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp_d(&a, 11u) != MP_EQ) { + printf("mp_prime_next_prime: output should have been 11 but was: "); + mp_fwrite(&a,10,stdout); + putchar('\n'); + goto LBL_ERR; + } + /* 2^300 + 157 is a 300 bit large prime to guarantee a multi-limb bigint */ + if ((err = mp_2expt(&a, 300)) != MP_OKAY) { + goto LBL_ERR; + } + mp_set_u32(&b, 157); + if ((err = mp_add(&a, &b, &a)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = mp_copy(&a, &b)) != MP_OKAY) { + goto LBL_ERR; + } + + /* 2^300 + 385 is the next prime */ + mp_set_u32(&c, 228); + if ((err = mp_add(&b, &c, &b)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = mp_prime_next_prime(&a, 5, 0)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp(&a, &b) != MP_EQ) { + printf("mp_prime_next_prime: output should have been\n"); + mp_fwrite(&b,10,stdout); + putchar('\n'); + printf("but was:\n"); + mp_fwrite(&a,10,stdout); + putchar('\n'); + goto LBL_ERR; + } + + /* Use another temporary variable or recompute? Mmh... */ + if ((err = mp_2expt(&a, 300)) != MP_OKAY) { + goto LBL_ERR; + } + mp_set_u32(&b, 157); + if ((err = mp_add(&a, &b, &a)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = mp_copy(&a, &b)) != MP_OKAY) { + goto LBL_ERR; + } + + /* 2^300 + 631 is the next prime congruent to 3 mod 4*/ + mp_set_u32(&c, 474); + if ((err = mp_add(&b, &c, &b)) != MP_OKAY) { + goto LBL_ERR; + } + if ((err = mp_prime_next_prime(&a, 5, 1)) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_cmp(&a, &b) != MP_EQ) { + printf("mp_prime_next_prime (bbs): output should have been\n"); + mp_fwrite(&b,10,stdout); + putchar('\n'); + printf("but was:\n"); + mp_fwrite(&a,10,stdout); + putchar('\n'); + goto LBL_ERR; + } + + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +static int test_mp_montgomery_reduce(void) +{ + mp_digit mp; + int ix, i, n; + char buf[4096]; + + /* size_t written; */ + + mp_int a, b, c, d, e; + if (mp_init_multi(&a, &b, &c, &d, &e, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + /* test montgomery */ + for (i = 1; i <= 10; i++) { + if (i == 10) + i = 1000; + printf(" digit size: %2d\r", i); + fflush(stdout); + for (n = 0; n < 1000; n++) { + mp_rand(&a, i); + a.dp[0] |= 1; + + /* let's see if R is right */ + mp_montgomery_calc_normalization(&b, &a); + mp_montgomery_setup(&a, &mp); + + /* now test a random reduction */ + for (ix = 0; ix < 100; ix++) { + mp_rand(&c, 1 + abs(rand_int()) % (2*i)); + mp_copy(&c, &d); + mp_copy(&c, &e); + + mp_mod(&d, &a, &d); + mp_montgomery_reduce(&c, &a, mp); + mp_mulmod(&c, &b, &a, &c); + + if (mp_cmp(&c, &d) != MP_EQ) { +/* *INDENT-OFF* */ + printf("d = e mod a, c = e MOD a\n"); + mp_to_decimal(&a, buf, sizeof(buf)); printf("a = %s\n", buf); + mp_to_decimal(&e, buf, sizeof(buf)); printf("e = %s\n", buf); + mp_to_decimal(&d, buf, sizeof(buf)); printf("d = %s\n", buf); + mp_to_decimal(&c, buf, sizeof(buf)); printf("c = %s\n", buf); + + printf("compare no compare!\n"); goto LBL_ERR; +/* *INDENT-ON* */ + } + /* only one big montgomery reduction */ + if (i > 10) { + n = 1000; + ix = 100; + } + } + } + } + + printf("\n\n"); + + mp_clear_multi(&a, &b, &c, &d, &e, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, &d, &e, NULL); + return EXIT_FAILURE; + +} + +static int test_mp_read_radix(void) +{ + char buf[4096]; + size_t written; + mp_err err; + + mp_int a; + if (mp_init_multi(&a, NULL)!= MP_OKAY) goto LTM_ERR; + + if ((err = mp_read_radix(&a, "123456", 10)) != MP_OKAY) goto LTM_ERR; + + if ((err = mp_to_radix(&a, buf, SIZE_MAX, &written, 10)) != MP_OKAY) goto LTM_ERR; + printf(" '123456' a == %s, length = %zu\n", buf, written); + + /* See comment in bn_mp_to_radix.c */ + /* + if( (err = mp_to_radix(&a, buf, 3u, &written, 10) ) != MP_OKAY) goto LTM_ERR; + printf(" '56' a == %s, length = %zu\n", buf, written); + + if( (err = mp_to_radix(&a, buf, 4u, &written, 10) ) != MP_OKAY) goto LTM_ERR; + printf(" '456' a == %s, length = %zu\n", buf, written); + if( (err = mp_to_radix(&a, buf, 30u, &written, 10) ) != MP_OKAY) goto LTM_ERR; + printf(" '123456' a == %s, length = %zu, error = %s\n", + buf, written, mp_error_to_string(err)); + */ + if ((err = mp_read_radix(&a, "-123456", 10)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_to_radix(&a, buf, SIZE_MAX, &written, 10)) != MP_OKAY) goto LTM_ERR; + printf(" '-123456' a == %s, length = %zu\n", buf, written); + + if ((err = mp_read_radix(&a, "0", 10)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_to_radix(&a, buf, SIZE_MAX, &written, 10)) != MP_OKAY) goto LTM_ERR; + printf(" '0' a == %s, length = %zu\n", buf, written); + + + + /* Although deprecated it needs to function as long as it isn't dropped */ + /* + printf("Testing deprecated mp_toradix_n\n"); + if( (err = mp_read_radix(&a, "-123456", 10) ) != MP_OKAY) goto LTM_ERR; + if( (err = mp_toradix_n(&a, buf, 10, 3) ) != MP_OKAY) goto LTM_ERR; + printf("a == %s\n", buf); + if( (err = mp_toradix_n(&a, buf, 10, 4) ) != MP_OKAY) goto LTM_ERR; + printf("a == %s\n", buf); + if( (err = mp_toradix_n(&a, buf, 10, 30) ) != MP_OKAY) goto LTM_ERR; + printf("a == %s\n", buf); + */ + + + while (0) { + char *s = fgets(buf, sizeof(buf), stdin); + if (s != buf) break; + mp_read_radix(&a, buf, 10); + mp_prime_next_prime(&a, 5, 1); + mp_to_radix(&a, buf, sizeof(buf), NULL, 10); + printf("%s, %lu\n", buf, (unsigned long)a.dp[0] & 3uL); + } + + mp_clear(&a); + return EXIT_SUCCESS; +LTM_ERR: + mp_clear(&a); + return EXIT_FAILURE; +} + +static int test_mp_cnt_lsb(void) +{ + int ix; + + mp_int a, b; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + mp_set(&a, 1uL); + for (ix = 0; ix < 1024; ix++) { + if (mp_cnt_lsb(&a) != ix) { + printf("Failed at %d, %d\n", ix, mp_cnt_lsb(&a)); + goto LBL_ERR; + } + mp_mul_2(&a, &a); + } + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; + +} + +static int test_mp_reduce_2k(void) +{ + int ix, cnt; + + mp_int a, b, c, d; + if (mp_init_multi(&a, &b, &c, &d, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + /* test mp_reduce_2k */ + for (cnt = 3; cnt <= 128; ++cnt) { + mp_digit tmp; + + mp_2expt(&a, cnt); + mp_sub_d(&a, 2uL, &a); /* a = 2**cnt - 2 */ + + printf("\r %4d bits", cnt); + printf("(%d)", mp_reduce_is_2k(&a)); + mp_reduce_2k_setup(&a, &tmp); + printf("(%lu)", (unsigned long) tmp); + for (ix = 0; ix < 1000; ix++) { + if (!(ix & 127)) { + printf("."); + fflush(stdout); + } + mp_rand(&b, (cnt / MP_DIGIT_BIT + 1) * 2); + mp_copy(&c, &b); + mp_mod(&c, &a, &c); + mp_reduce_2k(&b, &a, 2uL); + if (mp_cmp(&c, &b) != MP_EQ) { + printf("FAILED\n"); + goto LBL_ERR; + } + } + } + + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_FAILURE; +} + +static int test_mp_div_3(void) +{ + int cnt; + + mp_int a, b, c, d, e; + if (mp_init_multi(&a, &b, &c, &d, &e, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + /* test mp_div_3 */ + mp_set(&d, 3uL); + for (cnt = 0; cnt < 10000;) { + mp_digit r2; + + if (!(++cnt & 127)) { + printf("%9d\r", cnt); + fflush(stdout); + } + mp_rand(&a, abs(rand_int()) % 128 + 1); + mp_div(&a, &d, &b, &e); + mp_div_3(&a, &c, &r2); + + if (mp_cmp(&b, &c) || mp_cmp_d(&e, r2)) { + printf("\nmp_div_3 => Failure\n"); + goto LBL_ERR; + } + } + printf("\nPassed div_3 testing"); + + mp_clear_multi(&a, &b, &c, &d, &e, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, &d, &e, NULL); + return EXIT_FAILURE; +} + +static int test_mp_dr_reduce(void) +{ + mp_digit mp; + int cnt; + unsigned rr; + int ix; + + mp_int a, b, c; + if (mp_init_multi(&a, &b, &c, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + + /* test the DR reduction */ + for (cnt = 2; cnt < 32; cnt++) { + printf("\r%d digit modulus", cnt); + mp_grow(&a, cnt); + mp_zero(&a); + for (ix = 1; ix < cnt; ix++) { + a.dp[ix] = MP_MASK; + } + a.used = cnt; + a.dp[0] = 3; + + mp_rand(&b, cnt - 1); + mp_copy(&b, &c); + + rr = 0; + do { + if (!(rr & 127)) { + printf("."); + fflush(stdout); + } + mp_sqr(&b, &b); + mp_add_d(&b, 1uL, &b); + mp_copy(&b, &c); + + mp_mod(&b, &a, &b); + mp_dr_setup(&a, &mp); + mp_dr_reduce(&c, &a, mp); + + if (mp_cmp(&b, &c) != MP_EQ) { + printf("Failed on trial %u\n", rr); + goto LBL_ERR; + } + } while (++rr < 500); + printf(" passed"); + fflush(stdout); + } + + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +static int test_mp_reduce_2k_l(void) +{ +# if LTM_DEMO_TEST_REDUCE_2K_L + mp_int a, b, c, d; + int cnt; + char buf[4096]; + size_t length[1]; + if (mp_init_multi(&a, &b, NULL)!= MP_OKAY) { + return EXIT_FAILURE; + } + /* test the mp_reduce_2k_l code */ +# if LTM_DEMO_TEST_REDUCE_2K_L == 1 + /* first load P with 2^1024 - 0x2A434 B9FDEC95 D8F9D550 FFFFFFFF FFFFFFFF */ + mp_2expt(&a, 1024); + mp_read_radix(&b, "2A434B9FDEC95D8F9D550FFFFFFFFFFFFFFFF", 16); + mp_sub(&a, &b, &a); +# elif LTM_DEMO_TEST_REDUCE_2K_L == 2 + /* p = 2^2048 - 0x1 00000000 00000000 00000000 00000000 4945DDBF 8EA2A91D 5776399B B83E188F */ + mp_2expt(&a, 2048); + mp_read_radix(&b, + "1000000000000000000000000000000004945DDBF8EA2A91D5776399BB83E188F", + 16); + mp_sub(&a, &b, &a); +# else +# error oops +# endif + *length = sizeof(buf); + mp_to_radix(&a, buf, length, 10); + printf("\n\np==%s, length = %zu\n", buf, *length); + /* now mp_reduce_is_2k_l() should return */ + if (mp_reduce_is_2k_l(&a) != 1) { + printf("mp_reduce_is_2k_l() return 0, should be 1\n"); + goto LBL_ERR; + } + mp_reduce_2k_setup_l(&a, &d); + /* now do a million square+1 to see if it varies */ + mp_rand(&b, 64); + mp_mod(&b, &a, &b); + mp_copy(&b, &c); + printf("Testing: mp_reduce_2k_l..."); + fflush(stdout); + for (cnt = 0; cnt < (int)(1uL << 20); cnt++) { + mp_sqr(&b, &b); + mp_add_d(&b, 1uL, &b); + mp_reduce_2k_l(&b, &a, &d); + mp_sqr(&c, &c); + mp_add_d(&c, 1uL, &c); + mp_mod(&c, &a, &c); + if (mp_cmp(&b, &c) != MP_EQ) { + printf("mp_reduce_2k_l() failed at step %d\n", cnt); + mp_to_hex(&b, buf, sizeof(buf)); + printf("b == %s\n", buf); + mp_to_hex(&c, buf, sizeof(buf)); + printf("c == %s\n", buf); + goto LBL_ERR; + } + } + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +#else + return EXIT_SUCCESS; +# endif /* LTM_DEMO_TEST_REDUCE_2K_L */ +} +/* stripped down version of mp_radix_size. The faster version can be off by up t +o +3 */ +/* TODO: This function should be removed, replaced by mp_radix_size, mp_radix_size_overestimate in 2.0 */ +static mp_err s_rs(const mp_int *a, int radix, uint32_t *size) +{ + mp_err res; + uint32_t digs = 0u; + mp_int t; + mp_digit d; + *size = 0u; + if (mp_iszero(a) == MP_YES) { + *size = 2u; + return MP_OKAY; + } + if (radix == 2) { + *size = (uint32_t)mp_count_bits(a) + 1u; + return MP_OKAY; + } + if ((res = mp_init_copy(&t, a)) != MP_OKAY) { + return res; + } + t.sign = MP_ZPOS; + while (mp_iszero(&t) == MP_NO) { + if ((res = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) { + mp_clear(&t); + return res; + } + ++digs; + } + mp_clear(&t); + *size = digs + 1; + return MP_OKAY; +} +static int test_mp_log_u32(void) +{ + mp_int a; + mp_digit d; + uint32_t base, lb, size; + const uint32_t max_base = MP_MIN(UINT32_MAX, MP_DIGIT_MAX); + + if (mp_init(&a) != MP_OKAY) { + goto LBL_ERR; + } + + /* + base a result + 0 x MP_VAL + 1 x MP_VAL + */ + mp_set(&a, 42uL); + base = 0u; + if (mp_log_u32(&a, base, &lb) != MP_VAL) { + goto LBL_ERR; + } + base = 1u; + if (mp_log_u32(&a, base, &lb) != MP_VAL) { + goto LBL_ERR; + } + /* + base a result + 2 0 MP_VAL + 2 1 0 + 2 2 1 + 2 3 1 + */ + base = 2u; + mp_zero(&a); + if (mp_log_u32(&a, base, &lb) != MP_VAL) { + goto LBL_ERR; + } + + for (d = 1; d < 4; d++) { + mp_set(&a, d); + if (mp_log_u32(&a, base, &lb) != MP_OKAY) { + goto LBL_ERR; + } + if (lb != ((d == 1)?0uL:1uL)) { + goto LBL_ERR; + } + } + /* + base a result + 3 0 MP_VAL + 3 1 0 + 3 2 0 + 3 3 1 + */ + base = 3u; + mp_zero(&a); + if (mp_log_u32(&a, base, &lb) != MP_VAL) { + goto LBL_ERR; + } + for (d = 1; d < 4; d++) { + mp_set(&a, d); + if (mp_log_u32(&a, base, &lb) != MP_OKAY) { + goto LBL_ERR; + } + if (lb != ((d < base)?0uL:1uL)) { + goto LBL_ERR; + } + } + + /* + bases 2..64 with "a" a random large constant. + The range of bases tested allows to check with + radix_size. + */ + if (mp_rand(&a, 10) != MP_OKAY) { + goto LBL_ERR; + } + for (base = 2u; base < 65u; base++) { + if (mp_log_u32(&a, base, &lb) != MP_OKAY) { + goto LBL_ERR; + } + if (s_rs(&a,(int)base, &size) != MP_OKAY) { + goto LBL_ERR; + } + /* radix_size includes the memory needed for '\0', too*/ + size -= 2; + if (lb != size) { + goto LBL_ERR; + } + } + + /* + bases 2..64 with "a" a random small constant to + test the part of mp_ilogb that uses native types. + */ + if (mp_rand(&a, 1) != MP_OKAY) { + goto LBL_ERR; + } + for (base = 2u; base < 65u; base++) { + if (mp_log_u32(&a, base, &lb) != MP_OKAY) { + goto LBL_ERR; + } + if (s_rs(&a,(int)base, &size) != MP_OKAY) { + goto LBL_ERR; + } + size -= 2; + if (lb != size) { + goto LBL_ERR; + } + } + + /*Test upper edgecase with base UINT32_MAX and number (UINT32_MAX/2)*UINT32_MAX^10 */ + mp_set(&a, max_base); + if (mp_expt_u32(&a, 10uL, &a) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_add_d(&a, max_base / 2, &a) != MP_OKAY) { + goto LBL_ERR; + } + if (mp_log_u32(&a, max_base, &lb) != MP_OKAY) { + goto LBL_ERR; + } + if (lb != 10u) { + goto LBL_ERR; + } + + mp_clear(&a); + return EXIT_SUCCESS; +LBL_ERR: + mp_clear(&a); + return EXIT_FAILURE; +} + +static int test_mp_incr(void) +{ + mp_int a, b; + mp_err e = MP_OKAY; + + if ((e = mp_init_multi(&a, &b, NULL)) != MP_OKAY) { + goto LTM_ERR; + } + + /* Does it increment inside the limits of a MP_xBIT limb? */ + mp_set(&a, MP_MASK/2); + if ((e = mp_incr(&a)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp_d(&a, (MP_MASK/2uL) + 1uL) != MP_EQ) { + goto LTM_ERR; + } + + /* Does it increment outside of the limits of a MP_xBIT limb? */ + mp_set(&a, MP_MASK); + mp_set(&b, MP_MASK); + if ((e = mp_incr(&a)) != MP_OKAY) { + goto LTM_ERR; + } + if ((e = mp_add_d(&b, 1uL, &b)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp(&a, &b) != MP_EQ) { + goto LTM_ERR; + } + + /* Does it increment from -1 to 0? */ + mp_set(&a, 1uL); + a.sign = MP_NEG; + if ((e = mp_incr(&a)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp_d(&a, 0uL) != MP_EQ) { + goto LTM_ERR; + } + + /* Does it increment from -(MP_MASK + 1) to -MP_MASK? */ + mp_set(&a, MP_MASK); + if ((e = mp_add_d(&a, 1uL, &a)) != MP_OKAY) { + goto LTM_ERR; + } + a.sign = MP_NEG; + if ((e = mp_incr(&a)) != MP_OKAY) { + goto LTM_ERR; + } + if (a.sign != MP_NEG) { + goto LTM_ERR; + } + a.sign = MP_ZPOS; + if (mp_cmp_d(&a, MP_MASK) != MP_EQ) { + goto LTM_ERR; + } + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LTM_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +} + +static int test_mp_decr(void) +{ + mp_int a, b; + mp_err e = MP_OKAY; + + if ((e = mp_init_multi(&a, &b, NULL)) != MP_OKAY) { + goto LTM_ERR; + } + + /* Does it decrement inside the limits of a MP_xBIT limb? */ + mp_set(&a, MP_MASK/2); + if ((e = mp_decr(&a)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp_d(&a, (MP_MASK/2uL) - 1uL) != MP_EQ) { + goto LTM_ERR; + } + + /* Does it decrement outside of the limits of a MP_xBIT limb? */ + mp_set(&a, MP_MASK); + if ((e = mp_add_d(&a, 1uL, &a)) != MP_OKAY) { + goto LTM_ERR; + } + if ((e = mp_decr(&a)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp_d(&a, MP_MASK) != MP_EQ) { + goto LTM_ERR; + } + + /* Does it decrement from 0 to -1? */ + mp_zero(&a); + if ((e = mp_decr(&a)) != MP_OKAY) { + goto LTM_ERR; + } + if (a.sign == MP_NEG) { + a.sign = MP_ZPOS; + if (mp_cmp_d(&a, 1uL) != MP_EQ) { + goto LTM_ERR; + } + } else { + goto LTM_ERR; + } + + + /* Does it decrement from -MP_MASK to -(MP_MASK + 1)? */ + mp_set(&a, MP_MASK); + a.sign = MP_NEG; + mp_set(&b, MP_MASK); + b.sign = MP_NEG; + if ((e = mp_sub_d(&b, 1uL, &b)) != MP_OKAY) { + goto LTM_ERR; + } + if ((e = mp_decr(&a)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp(&a, &b) != MP_EQ) { + goto LTM_ERR; + } + + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LTM_ERR: + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +} + +/* + Cannot test mp_exp(_d) without mp_root and vice versa. + So one of the two has to be tested from scratch. + + Numbers generated by + for i in {1..10} + do + seed=$(head -c 10000 /dev/urandom | tr -dc '[:digit:]' | head -c 120); + echo $seed; + convertbase $seed 10 64; + done + + (The program "convertbase" uses libtommath's to/from_radix functions) + + Roots were precalculated with Pari/GP + + default(realprecision,1000); + for(n=3,100,r = floor(a^(1/n));printf("\"" r "\", ")) + + All numbers as strings to simplifiy things, especially for the + low-mp branch. +*/ + +static int test_mp_root_u32(void) +{ + mp_int a, c, r; + mp_err e; + int i, j; + + const char *input[] = { + "4n9cbk886QtLQmofprid3l2Q0GD8Yv979Lh8BdZkFE8g2pDUUSMBET/+M/YFyVZ3mBp", + "5NlgzHhmIX05O5YoW5yW5reAlVNtRAlIcN2dfoATnNdc1Cw5lHZUTwNthmK6/ZLKfY6", + "3gweiHDX+ji5utraSe46IJX+uuh7iggs63xIpMP5MriU4Np+LpHI5are8RzS9pKh9xP", + "5QOJUSKMrfe7LkeyJOlupS8h7bjT+TXmZkDzOjZtfj7mdA7cbg0lRX3CuafhjIrpK8S", + "4HtYFldVkyVbrlg/s7kmaA7j45PvLQm+1bbn6ehgP8tVoBmGbv2yDQI1iQQze4AlHyN", + "3bwCUx79NAR7c68OPSp5ZabhZ9aBEr7rWNTO2oMY7zhbbbw7p6shSMxqE9K9nrTNucf", + "4j5RGb78TfuYSzrXn0z6tiAoWiRI81hGY3el9AEa9S+gN4x/AmzotHT2Hvj6lyBpE7q", + "4lwg30SXqZhEHNsl5LIXdyu7UNt0VTWebP3m7+WUL+hsnFW9xJe7UnzYngZsvWh14IE", + "1+tcqFeRuGqjRADRoRUJ8gL4UUSFQVrVVoV6JpwVcKsuBq5G0pABn0dLcQQQMViiVRj", + "hXwxuFySNSFcmbrs/coz4FUAaUYaOEt+l4V5V8vY71KyBvQPxRq/6lsSrG2FHvWDax" + }; + /* roots 3-100 of the above */ + const char *root[10][100] = { + { + "9163694094944489658600517465135586130944", + "936597377180979771960755204040", "948947857956884030956907", + "95727185767390496595", "133844854039712620", "967779611885360", + "20926191452627", "974139547476", "79203891950", "9784027073", + "1667309744", "365848129", "98268452", "31109156", "11275351", + "4574515", "2040800", "986985", "511525", "281431", "163096", + "98914", "62437", "40832", "27556", "19127", "13614", "9913", + "7367", "5577", "4294", "3357", "2662", "2138", "1738", "1428", + "1185", "993", "839", "715", "613", "530", "461", "403", "355", + "314", "279", "249", "224", "202", "182", "166", "151", "138", + "126", "116", "107", "99", "92", "85", "79", "74", "69", "65", "61", + "57", "54", "51", "48", "46", "43", "41", "39", "37", "36", "34", + "32", "31", "30", "28", "27", "26", "25", "24", "23", "23", "22", + "21", "20", "20", "19", "18", "18", "17", "17", "16", "16", "15" + }, { + "9534798256755061606359588498764080011382", + "964902943621813525741417593772", "971822399862464674540423", + "97646291566833512831", "136141536090599560", "982294733581430", + "21204945933335", "985810529393", "80066084985", "9881613813", + "1682654547", "368973625", "99051783", "31341581", "11354620", + "4604882", "2053633", "992879", "514434", "282959", "163942", + "99406", "62736", "41020", "27678", "19208", "13670", "9952", + "7395", "5598", "4310", "3369", "2671", "2145", "1744", "1433", + "1189", "996", "842", "717", "615", "531", "462", "404", "356", + "315", "280", "250", "224", "202", "183", "166", "151", "138", + "127", "116", "107", "99", "92", "85", "80", "74", "70", "65", "61", + "58", "54", "51", "48", "46", "43", "41", "39", "37", "36", "34", + "32", "31", "30", "29", "27", "26", "25", "24", "23", "23", "22", + "21", "20", "20", "19", "18", "18", "17", "17", "16", "16", "15" + }, { + "8398539113202579297642815367509019445624", + "877309458945432597462853440936", "900579899458998599215071", + "91643543761699761637", "128935656335800903", "936647990947203", + "20326748623514", "948988882684", "77342677787", "9573063447", + "1634096832", "359076114", "96569670", "30604705", "11103188", + "4508519", "2012897", "974160", "505193", "278105", "161251", + "97842", "61788", "40423", "27291", "18949", "13492", "9826", + "7305", "5532", "4260", "3332", "2642", "2123", "1726", "1418", + "1177", "986", "834", "710", "610", "527", "458", "401", "353", + "312", "278", "248", "223", "201", "181", "165", "150", "137", + "126", "116", "107", "99", "91", "85", "79", "74", "69", "65", "61", + "57", "54", "51", "48", "46", "43", "41", "39", "37", "35", "34", + "32", "31", "30", "28", "27", "26", "25", "24", "23", "22", "22", + "21", "20", "20", "19", "18", "18", "17", "17", "16", "16", "15" + }, { + "9559098494021810340217797724866627755195", + "966746709063325235560830083787", "973307706084821682248292", + "97770642291138756434", "136290128605981259", "983232784778520", + "21222944848922", "986563584410", "80121684894", "9887903837", + "1683643206", "369174929", "99102220", "31356542", "11359721", + "4606836", "2054458", "993259", "514621", "283057", "163997", + "99437", "62755", "41032", "27686", "19213", "13674", "9955", + "7397", "5599", "4311", "3370", "2672", "2146", "1744", "1433", + "1189", "996", "842", "717", "615", "532", "462", "404", "356", + "315", "280", "250", "224", "202", "183", "166", "151", "138", + "127", "116", "107", "99", "92", "86", "80", "74", "70", "65", "61", + "58", "54", "51", "48", "46", "43", "41", "39", "37", "36", "34", + "32", "31", "30", "29", "27", "26", "25", "24", "23", "23", "22", + "21", "20", "20", "19", "18", "18", "17", "17", "16", "16", "15" + }, { + "8839202025813295923132694443541993309220", + "911611499784863252820288596270", "928640961450376817534853", + "94017030509441723821", "131792686685970629", "954783483196511", + "20676214073400", "963660189823", "78428929840", "9696237956", + "1653495486", "363032624", "97562430", "30899570", "11203842", + "4547110", "2029216", "981661", "508897", "280051", "162331", + "98469", "62168", "40663", "27446", "19053", "13563", "9877", + "7341", "5558", "4280", "3347", "2654", "2132", "1733", "1424", + "1182", "990", "837", "713", "612", "529", "460", "402", "354", + "313", "279", "249", "223", "201", "182", "165", "150", "138", + "126", "116", "107", "99", "92", "85", "79", "74", "69", "65", "61", + "57", "54", "51", "48", "46", "43", "41", "39", "37", "36", "34", + "32", "31", "30", "28", "27", "26", "25", "24", "23", "23", "22", + "21", "20", "20", "19", "18", "18", "17", "17", "16", "16", "15" + }, { + "8338442683973420410660145045849076963795", + "872596990706967613912664152945", "896707843885562730147307", + "91315073695274540969", "128539440806486007", "934129001105825", + "20278149285734", "946946589774", "77191347471", "9555892093", + "1631391010", "358523975", "96431070", "30563524", "11089126", + "4503126", "2010616", "973111", "504675", "277833", "161100", + "97754", "61734", "40390", "27269", "18934", "13482", "9819", + "7300", "5528", "4257", "3330", "2641", "2122", "1725", "1417", + "1177", "986", "833", "710", "609", "527", "458", "401", "353", + "312", "278", "248", "222", "200", "181", "165", "150", "137", + "126", "116", "107", "99", "91", "85", "79", "74", "69", "65", "61", + "57", "54", "51", "48", "46", "43", "41", "39", "37", "35", "34", + "32", "31", "30", "28", "27", "26", "25", "24", "23", "22", "22", + "21", "20", "20", "19", "18", "18", "17", "17", "16", "16", "15" + }, { + "9122818552483814953977703257848970704164", + "933462289569511464780529972314", "946405863353935713909178", + "95513446972056321834", "133588658082928446", + "966158521967027", "20895030642048", "972833934108", + "79107381638", "9773098125", "1665590516", "365497822", + "98180628", "31083090", "11266459", "4571108", "2039360", + "986323", "511198", "281260", "163001", "98858", + "62404", "40811", "27543", "19117", "13608", "9908", + "7363", "5575", "4292", "3356", "2661", "2138", + "1737", "1428", "1185", "993", "839", "714", "613", + "530", "461", "403", "355", "314", "279", "249", + "224", "202", "182", "165", "151", "138", "126", + "116", "107", "99", "92", "85", "79", "74", "69", + "65", "61", "57", "54", "51", "48", "46", "43", + "41", "39", "37", "36", "34", "32", "31", "30", + "28", "27", "26", "25", "24", "23", "23", "22", + "21", "20", "20", "19", "18", "18", "17", "17", + "16", "16", "15" + }, { + "9151329724083804100369546479681933027521", + "935649419557299174433860420387", "948179413831316112751907", + "95662582675170358900", "133767426788182384", + "967289728859610", "20916775466497", "973745045600", + "79174731802", "9780725058", "1666790321", "365742295", + "98241919", "31101281", "11272665", "4573486", "2040365", + "986785", "511426", "281380", "163067", "98897", + "62427", "40826", "27552", "19124", "13612", "9911", + "7366", "5576", "4294", "3357", "2662", "2138", + "1738", "1428", "1185", "993", "839", "715", "613", + "530", "461", "403", "355", "314", "279", "249", + "224", "202", "182", "165", "151", "138", "126", + "116", "107", "99", "92", "85", "79", "74", "69", + "65", "61", "57", "54", "51", "48", "46", "43", + "41", "39", "37", "36", "34", "32", "31", "30", + "28", "27", "26", "25", "24", "23", "23", "22", + "21", "20", "20", "19", "18", "18", "17", "17", + "16", "16", "15" + }, { + "6839396355168045468586008471269923213531", + "752078770083218822016981965090", "796178899357307807726034", + "82700643015444840424", "118072966296549115", + "867224751770392", "18981881485802", "892288574037", + "73130030771", "9093989389", "1558462688", "343617470", + "92683740", "29448679", "10708016", "4356820", "1948676", + "944610", "490587", "270425", "156989", "95362", + "60284", "39477", "26675", "18536", "13208", "9627", + "7161", "5426", "4181", "3272", "2596", "2087", + "1697", "1395", "1159", "971", "821", "700", "601", + "520", "452", "396", "348", "308", "274", "245", + "220", "198", "179", "163", "148", "136", "124", + "114", "106", "98", "91", "84", "78", "73", "68", + "64", "60", "57", "53", "50", "48", "45", "43", + "41", "39", "37", "35", "34", "32", "31", "29", + "28", "27", "26", "25", "24", "23", "22", "22", + "21", "20", "19", "19", "18", "18", "17", "17", + "16", "16", "15" + }, { + "4788090721380022347683138981782307670424", + "575601315594614059890185238256", "642831903229558719812840", + "69196031110028430211", "101340693763170691", + "758683936560287", "16854690815260", "801767985909", + "66353290503", "8318415180", "1435359033", "318340531", + "86304307", "27544217", "10054988", "4105446", "1841996", + "895414", "466223", "257591", "149855", "91205", + "57758", "37886", "25639", "17842", "12730", "9290", + "6918", "5248", "4048", "3170", "2518", "2026", + "1649", "1357", "1128", "946", "800", "682", "586", + "507", "441", "387", "341", "302", "268", "240", + "215", "194", "176", "160", "146", "133", "122", + "112", "104", "96", "89", "83", "77", "72", "67", + "63", "59", "56", "53", "50", "47", "45", "42", + "40", "38", "36", "35", "33", "32", "30", "29", + "28", "27", "26", "25", "24", "23", "22", "21", + "21", "20", "19", "19", "18", "17", "17", "16", + "16", "15", "15" + } + }; + + if ((e = mp_init_multi(&a, &c, &r, NULL)) != MP_OKAY) { + return EXIT_FAILURE; + } +#ifdef MP_8BIT + for (i = 0; i < 1; i++) { +#else + for (i = 0; i < 10; i++) { +#endif + mp_read_radix(&a, input[i], 64); +#ifdef MP_8BIT + for (j = 3; j < 10; j++) { +#else + for (j = 3; j < 100; j++) { +#endif + mp_root_u32(&a, (uint32_t)j, &c); + mp_read_radix(&r, root[i][j-3], 10); + if (mp_cmp(&r, &c) != MP_EQ) { + fprintf(stderr, "mp_root_u32 failed at input #%d, root #%d\n", i, j); + goto LTM_ERR; + } + } + } + mp_clear_multi(&a, &c, &r, NULL); + return EXIT_SUCCESS; +LTM_ERR: + mp_clear_multi(&a, &c, &r, NULL); + return EXIT_FAILURE; +} + +static int test_s_mp_balance_mul(void) +{ + mp_int a, b, c; + mp_err e = MP_OKAY; + + const char *na = + "4b0I5uMTujCysw+1OOuOyH2FX2WymrHUqi8BBDb7XpkV/4i7vXTbEYUy/kdIfCKu5jT5JEqYkdmnn3jAYo8XShPzNLxZx9yoLjxYRyptSuOI2B1DspvbIVYXY12sxPZ4/HCJ4Usm2MU5lO/006KnDMxuxiv1rm6YZJZ0eZU"; + const char *nb = "3x9vs0yVi4hIq7poAeVcggC3WoRt0zRLKO"; + const char *nc = + "HzrSq9WVt1jDTVlwUxSKqxctu2GVD+N8+SVGaPFRqdxyld6IxDBbj27BPJzYUdR96k3sWpkO8XnDBvupGPnehpQe4KlO/KmN1PjFov/UTZYM+LYzkFcBPyV6hkkL8ePC1rlFLAHzgJMBCXVp4mRqtkQrDsZXXlcqlbTFu69wF6zDEysiX2cAtn/kP9ldblJiwYPCD8hG"; + + if ((e = mp_init_multi(&a, &b, &c, NULL)) != MP_OKAY) { + goto LTM_ERR; + } + + if ((e = mp_read_radix(&a, na, 64)) != MP_OKAY) { + goto LTM_ERR; + } + if ((e = mp_read_radix(&b, nb, 64)) != MP_OKAY) { + goto LTM_ERR; + } + + if ((e = s_mp_balance_mul(&a, &b, &c)) != MP_OKAY) { + goto LTM_ERR; + } + + if ((e = mp_read_radix(&b, nc, 64)) != MP_OKAY) { + goto LTM_ERR; + } + + if (mp_cmp(&b, &c) != MP_EQ) { + goto LTM_ERR; + } + + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LTM_ERR: + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) +static int test_s_mp_karatsuba_mul(void) +{ + mp_int a, b, c, d; + int size, err; + + if ((err = mp_init_multi(&a, &b, &c, &d, NULL)) != MP_OKAY) { + goto LTM_ERR; + } + for (size = MP_KARATSUBA_MUL_CUTOFF; size < MP_KARATSUBA_MUL_CUTOFF + 20; size++) { + if ((err = mp_rand(&a, size)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = mp_rand(&b, size)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = s_mp_karatsuba_mul(&a, &b, &c)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = s_mp_mul(&a,&b,&d)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp(&c, &d) != MP_EQ) { + fprintf(stderr, "Karatsuba multiplication failed at size %d\n", size); + goto LTM_ERR; + } + } + + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_SUCCESS; +LTM_ERR: + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_FAILURE; +} + +static int test_s_mp_karatsuba_sqr(void) +{ + mp_int a, b, c; + int size, err; + + if ((err = mp_init_multi(&a, &b, &c, NULL)) != MP_OKAY) { + goto LTM_ERR; + } + for (size = MP_KARATSUBA_SQR_CUTOFF; size < MP_KARATSUBA_SQR_CUTOFF + 20; size++) { + if ((err = mp_rand(&a, size)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = s_mp_karatsuba_sqr(&a, &b)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = s_mp_sqr(&a, &c)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp(&b, &c) != MP_EQ) { + fprintf(stderr, "Karatsuba squaring failed at size %d\n", size); + goto LTM_ERR; + } + } + + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LTM_ERR: + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +static int test_s_mp_toom_mul(void) +{ + mp_int a, b, c, d; + int size, err; + +#if (MP_DIGIT_BIT == 60) + int tc_cutoff; +#endif + + if ((err = mp_init_multi(&a, &b, &c, &d, NULL)) != MP_OKAY) { + goto LTM_ERR; + } + /* This number construction is limb-size specific */ +#if (MP_DIGIT_BIT == 60) + if ((err = mp_rand(&a, 1196)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = mp_mul_2d(&a,71787 - mp_count_bits(&a), &a)) != MP_OKAY) { + goto LTM_ERR; + } + + if ((err = mp_rand(&b, 1338)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = mp_mul_2d(&b, 80318 - mp_count_bits(&b), &b)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = mp_mul_2d(&b, 6310, &b)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = mp_2expt(&c, 99000 - 1000)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = mp_add(&b, &c, &b)) != MP_OKAY) { + goto LTM_ERR; + } + + tc_cutoff = TOOM_MUL_CUTOFF; + TOOM_MUL_CUTOFF = INT_MAX; + if ((err = mp_mul(&a, &b, &c)) != MP_OKAY) { + goto LTM_ERR; + } + TOOM_MUL_CUTOFF = tc_cutoff; + if ((err = mp_mul(&a, &b, &d)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp(&c, &d) != MP_EQ) { + fprintf(stderr, "Toom-Cook 3-way multiplication failed for edgecase f1 * f2\n"); + goto LTM_ERR; + } +#endif + + for (size = MP_TOOM_MUL_CUTOFF; size < MP_TOOM_MUL_CUTOFF + 20; size++) { + if ((err = mp_rand(&a, size)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = mp_rand(&b, size)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = s_mp_toom_mul(&a, &b, &c)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = s_mp_mul(&a,&b,&d)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp(&c, &d) != MP_EQ) { + fprintf(stderr, "Toom-Cook 3-way multiplication failed at size %d\n", size); + goto LTM_ERR; + } + } + + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_SUCCESS; +LTM_ERR: + mp_clear_multi(&a, &b, &c, &d, NULL); + return EXIT_FAILURE; +} + +static int test_s_mp_toom_sqr(void) +{ + mp_int a, b, c; + int size, err; + + if ((err = mp_init_multi(&a, &b, &c, NULL)) != MP_OKAY) { + goto LTM_ERR; + } + for (size = MP_TOOM_SQR_CUTOFF; size < MP_TOOM_SQR_CUTOFF + 20; size++) { + if ((err = mp_rand(&a, size)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = s_mp_toom_sqr(&a, &b)) != MP_OKAY) { + goto LTM_ERR; + } + if ((err = s_mp_sqr(&a, &c)) != MP_OKAY) { + goto LTM_ERR; + } + if (mp_cmp(&b, &c) != MP_EQ) { + fprintf(stderr, "Toom-Cook 3-way squaring failed at size %d\n", size); + goto LTM_ERR; + } + } + + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LTM_ERR: + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +static int test_mp_read_write_ubin(void) +{ + mp_int a, b, c; + int err; + size_t size, len; + unsigned char *buf = NULL; + + if ((err = mp_init_multi(&a, &b, &c, NULL)) != MP_OKAY) { + goto LTM_ERR; + } + + if ((err = mp_rand(&a, 15)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_neg(&a, &b)) != MP_OKAY) goto LTM_ERR; + + size = mp_ubin_size(&a); + printf("mp_to_ubin_size %zu\n", size); + buf = malloc(sizeof(*buf) * size); + if (buf == NULL) { + fprintf(stderr, "test_read_write_binaries (u) failed to allocate %zu bytes\n", + sizeof(*buf) * size); + goto LTM_ERR; + } + + if ((err = mp_to_ubin(&a, buf, size, &len)) != MP_OKAY) goto LTM_ERR; + printf("mp_to_ubin len = %zu\n", len); + + if ((err = mp_from_ubin(&c, buf, len)) != MP_OKAY) goto LTM_ERR; + + if (mp_cmp(&a, &c) != MP_EQ) { + fprintf(stderr, "to/from ubin cycle failed\n"); + goto LTM_ERR; + } + free(buf); + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LTM_ERR: + free(buf); + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +static int test_mp_read_write_sbin(void) +{ + mp_int a, b, c; + int err; + size_t size, len; + unsigned char *buf = NULL; + + if ((err = mp_init_multi(&a, &b, &c, NULL)) != MP_OKAY) { + goto LTM_ERR; + } + + if ((err = mp_rand(&a, 15)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_neg(&a, &b)) != MP_OKAY) goto LTM_ERR; + + size = mp_sbin_size(&a); + printf("mp_to_sbin_size %zu\n", size); + buf = malloc(sizeof(*buf) * size); + if (buf == NULL) { + fprintf(stderr, "test_read_write_binaries (s) failed to allocate %zu bytes\n", + sizeof(*buf) * size); + goto LTM_ERR; + } + + if ((err = mp_to_sbin(&b, buf, size, &len)) != MP_OKAY) goto LTM_ERR; + printf("mp_to_sbin len = %zu\n", len); + + if ((err = mp_from_sbin(&c, buf, len)) != MP_OKAY) goto LTM_ERR; + + if (mp_cmp(&b, &c) != MP_EQ) { + fprintf(stderr, "to/from ubin cycle failed\n"); + goto LTM_ERR; + } + + free(buf); + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_SUCCESS; +LTM_ERR: + free(buf); + mp_clear_multi(&a, &b, &c, NULL); + return EXIT_FAILURE; +} + +static int test_mp_pack_unpack(void) +{ + mp_int a, b; + int err; + size_t written, count; + unsigned char *buf = NULL; + + mp_order order = MP_LSB_FIRST; + mp_endian endianess = MP_NATIVE_ENDIAN; + + if ((err = mp_init_multi(&a, &b, NULL)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_rand(&a, 15)) != MP_OKAY) goto LTM_ERR; + + count = mp_pack_count(&a, 0, 1); + + buf = malloc(count); + if (buf == NULL) { + fprintf(stderr, "test_pack_unpack failed to allocate\n"); + goto LTM_ERR; + } + + if ((err = mp_pack((void *)buf, count, &written, order, 1, + endianess, 0, &a)) != MP_OKAY) goto LTM_ERR; + if ((err = mp_unpack(&b, count, order, 1, + endianess, 0, (const void *)buf)) != MP_OKAY) goto LTM_ERR; + + if (mp_cmp(&a, &b) != MP_EQ) { + fprintf(stderr, "pack/unpack cycle failed\n"); + goto LTM_ERR; + } + + free(buf); + mp_clear_multi(&a, &b, NULL); + return EXIT_SUCCESS; +LTM_ERR: + free(buf); + mp_clear_multi(&a, &b, NULL); + return EXIT_FAILURE; +} + +static int unit_tests(int argc, char **argv) +{ + static const struct { + const char *name; + int (*fn)(void); + } test[] = { +#define T0(n) { #n, test_##n } +#define T1(n, o) { #n, MP_HAS(o) ? test_##n : NULL } +#define T2(n, o1, o2) { #n, MP_HAS(o1) && MP_HAS(o2) ? test_##n : NULL } + T0(feature_detection), + T0(trivial_stuff), + T2(mp_get_set_i32, MP_GET_I32, MP_GET_MAG_U32), + T2(mp_get_set_i64, MP_GET_I64, MP_GET_MAG_U64), + T1(mp_and, MP_AND), + T1(mp_cnt_lsb, MP_CNT_LSB), + T1(mp_complement, MP_COMPLEMENT), + T1(mp_decr, MP_DECR), + T1(mp_div_3, MP_DIV_3), + T1(mp_dr_reduce, MP_DR_REDUCE), + T2(mp_pack_unpack,MP_PACK, MP_UNPACK), + T2(mp_fread_fwrite, MP_FREAD, MP_FWRITE), + T1(mp_get_u32, MP_GET_I32), + T1(mp_get_u64, MP_GET_I64), + T1(mp_get_ul, MP_GET_L), + T1(mp_log_u32, MP_LOG_U32), + T1(mp_incr, MP_INCR), + T1(mp_invmod, MP_INVMOD), + T1(mp_is_square, MP_IS_SQUARE), + T1(mp_kronecker, MP_KRONECKER), + T1(mp_montgomery_reduce, MP_MONTGOMERY_REDUCE), + T1(mp_root_u32, MP_ROOT_U32), + T1(mp_or, MP_OR), + T1(mp_prime_is_prime, MP_PRIME_IS_PRIME), + T1(mp_prime_next_prime, MP_PRIME_NEXT_PRIME), + T1(mp_prime_rand, MP_PRIME_RAND), + T1(mp_rand, MP_RAND), + T1(mp_read_radix, MP_READ_RADIX), + T1(mp_read_write_ubin, MP_TO_UBIN), + T1(mp_read_write_sbin, MP_TO_SBIN), + T1(mp_reduce_2k, MP_REDUCE_2K), + T1(mp_reduce_2k_l, MP_REDUCE_2K_L), +#if defined(__STDC_IEC_559__) || defined(__GCC_IEC_559) + T1(mp_set_double, MP_SET_DOUBLE), +#endif + T1(mp_signed_rsh, MP_SIGNED_RSH), + T1(mp_sqrt, MP_SQRT), + T1(mp_sqrtmod_prime, MP_SQRTMOD_PRIME), + T1(mp_xor, MP_XOR), + T1(s_mp_balance_mul, S_MP_BALANCE_MUL), + T1(s_mp_karatsuba_mul, S_MP_KARATSUBA_MUL), + T1(s_mp_karatsuba_sqr, S_MP_KARATSUBA_SQR), + T1(s_mp_toom_mul, S_MP_TOOM_MUL), + T1(s_mp_toom_sqr, S_MP_TOOM_SQR) +#undef T2 +#undef T1 + }; + unsigned long i, ok, fail, nop; + uint64_t t; + int j; + + ok = fail = nop = 0; + + t = (uint64_t)time(NULL); + printf("SEED: 0x%"PRIx64"\n\n", t); + s_mp_rand_jenkins_init(t); + mp_rand_source(s_mp_rand_jenkins); + + for (i = 0; i < sizeof(test) / sizeof(test[0]); ++i) { + if (argc > 1) { + for (j = 1; j < argc; ++j) { + if (strstr(test[i].name, argv[j]) != NULL) { + break; + } + } + if (j == argc) continue; + } + printf("TEST %s\n\n", test[i].name); + if (test[i].fn == NULL) { + nop++; + printf("NOP %s\n\n", test[i].name); + } else if (test[i].fn() == EXIT_SUCCESS) { + ok++; + printf("\n\n"); + } else { + fail++; + printf("\n\nFAIL %s\n\n", test[i].name); + } + } + printf("Tests OK/NOP/FAIL: %lu/%lu/%lu\n", ok, nop, fail); + + if (fail != 0) return EXIT_FAILURE; + else return EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + print_header(); + + return unit_tests(argc, argv); +} diff --git a/libtommath/helper.pl b/libtommath/helper.pl index e60c1a7..c624b7c 100755 --- a/libtommath/helper.pl +++ b/libtommath/helper.pl @@ -51,7 +51,7 @@ sub check_source { push @{$troubles->{tab}}, $lineno if $l =~ /\t/ && basename($file) !~ /^makefile/i; push @{$troubles->{non_ascii_char}}, $lineno if $l =~ /[^[:ascii:]]/; push @{$troubles->{cpp_comment}}, $lineno if $file =~ /\.(c|h)$/ && ($l =~ /\s\/\// || $l =~ /\/\/\s/); - # we prefer using XMALLOC, XFREE, XREALLOC, XCALLOC ... + # we prefer using MP_MALLOC, MP_FREE, MP_REALLOC, MP_CALLOC ... push @{$troubles->{unwanted_malloc}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bmalloc\s*\(/; push @{$troubles->{unwanted_realloc}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\brealloc\s*\(/; push @{$troubles->{unwanted_calloc}}, $lineno if $file =~ /^[^\/]+\.c$/ && $l =~ /\bcalloc\s*\(/; diff --git a/libtommath/makefile_include.mk b/libtommath/makefile_include.mk index 7b025e8..452d37d 100644 --- a/libtommath/makefile_include.mk +++ b/libtommath/makefile_include.mk @@ -116,10 +116,10 @@ endif # adjust coverage set ifneq ($(filter $(_ARCH), i386 i686 x86_64 amd64 ia64),) - COVERAGE = test_standalone timing + COVERAGE = test timing COVERAGE_APP = ./test && ./timing else - COVERAGE = test_standalone + COVERAGE = test COVERAGE_APP = ./test endif @@ -135,6 +135,10 @@ LIBPATH ?= $(PREFIX)/lib INCPATH ?= $(PREFIX)/include DATAPATH ?= $(PREFIX)/share/doc/libtommath/pdf +# build & run test-suite +check: test + ./test + #make the code coverage of the library # coverage: LTM_CFLAGS += -fprofile-arcs -ftest-coverage -DTIMING_NO_LOGS diff --git a/libtommath/testme.sh b/libtommath/testme.sh new file mode 100755 index 0000000..40fa32d --- /dev/null +++ b/libtommath/testme.sh @@ -0,0 +1,394 @@ +#!/bin/bash +# +# return values of this script are: +# 0 success +# 128 a test failed +# >0 the number of timed-out tests +# 255 parsing of parameters failed + +set -e + +if [ -f /proc/cpuinfo ] +then + MAKE_JOBS=$(( ($(cat /proc/cpuinfo | grep -E '^processor[[:space:]]*:' | tail -n -1 | cut -d':' -f2) + 1) * 2 + 1 )) +else + MAKE_JOBS=8 +fi + +ret=0 +TEST_CFLAGS="" + +_help() +{ + echo "Usage options for $(basename $0) [--with-cc=arg [other options]]" + echo + echo "Executing this script without any parameter will only run the default" + echo "configuration that has automatically been determined for the" + echo "architecture you're running." + echo + echo " --with-cc=* The compiler(s) to use for the tests" + echo " This is an option that will be iterated." + echo + echo " --test-vs-mtest=* Run test vs. mtest for '*' operations." + echo " Only the first of each options will be" + echo " taken into account." + echo + echo "To be able to specify options a compiler has to be given with" + echo "the option --with-cc=compilername" + echo "All other options will be tested with all MP_xBIT configurations." + echo + echo " --with-{m64,m32,mx32} The architecture(s) to build and test" + echo " for, e.g. --with-mx32." + echo " This is an option that will be iterated," + echo " multiple selections are possible." + echo " The mx32 architecture is not supported" + echo " by clang and will not be executed." + echo + echo " --cflags=* Give an option to the compiler," + echo " e.g. --cflags=-g" + echo " This is an option that will always be" + echo " passed as parameter to CC." + echo + echo " --make-option=* Give an option to make," + echo " e.g. --make-option=\"-f makefile.shared\"" + echo " This is an option that will always be" + echo " passed as parameter to make." + echo + echo " --with-low-mp Also build&run tests with -DMP_{8,16,32}BIT." + echo + echo " --mtest-real-rand Use real random data when running mtest." + echo + echo " --with-valgrind" + echo " --with-valgrind=* Run in valgrind (slow!)." + echo + echo " --with-travis-valgrind Run with valgrind on Travis on specific branches." + echo + echo " --valgrind-options Additional Valgrind options" + echo " Some of the options like e.g.:" + echo " --track-origins=yes add a lot of extra" + echo " runtime and may trigger the 30 minutes" + echo " timeout." + echo + echo "Godmode:" + echo + echo " --all Choose all architectures and gcc and clang" + echo " as compilers but does not run valgrind." + echo + echo " --format Runs the various source-code formatters" + echo " and generators and checks if the sources" + echo " are clean." + echo + echo " -h" + echo " --help This message" + echo + echo " -v" + echo " --version Prints the version. It is just the number" + echo " of git commits to this file, no deeper" + echo " meaning attached" + exit 0 +} + +_die() +{ + echo "error $2 while $1" + if [ "$2" != "124" ] + then + exit 128 + else + echo "assuming timeout while running test - continue" + local _tail="" + which tail >/dev/null && _tail="tail -n 1 test_${suffix}.log" && \ + echo "last line of test_"${suffix}".log was:" && $_tail && echo "" + ret=$(( $ret + 1 )) + fi +} + +_make() +{ + echo -ne " Compile $1 $2" + suffix=$(echo ${1}${2} | tr ' ' '_') + CC="$1" CFLAGS="$2 $TEST_CFLAGS" make -j$MAKE_JOBS $3 $MAKE_OPTIONS > /dev/null 2>gcc_errors_${suffix}.log + errcnt=$(wc -l < gcc_errors_${suffix}.log) + if [[ ${errcnt} -gt 1 ]]; then + echo " failed" + cat gcc_errors_${suffix}.log + exit 128 + fi +} + + +_runtest() +{ + make clean > /dev/null + local _timeout="" + which timeout >/dev/null && _timeout="timeout --foreground 90" + if [[ "$MAKE_OPTIONS" =~ "tune" ]] + then + # "make tune" will run "tune_it.sh" automatically, hence "autotune", but it cannot + # get switched off without some effort, so we just let it run twice for testing purposes + echo -e "\rRun autotune $1 $2" + _make "$1" "$2" "" + $_timeout $TUNE_CMD > test_${suffix}.log || _die "running autotune" $? + else + _make "$1" "$2" "test" + echo -e "\rRun test $1 $2" + $_timeout ./test > test_${suffix}.log || _die "running tests" $? + fi +} + +# This is not much more of a C&P of _runtest with a different timeout +# and the additional valgrind call. +# TODO: merge +_runvalgrind() +{ + make clean > /dev/null + local _timeout="" + # 30 minutes? Yes. Had it at 20 minutes and the Valgrind run needed over 25 minutes. + # A bit too close for comfort. + which timeout >/dev/null && _timeout="timeout --foreground 1800" +echo "MAKE_OPTIONS = \"$MAKE_OPTIONS\"" + if [[ "$MAKE_OPTIONS" =~ "tune" ]] + then +echo "autotune branch" + _make "$1" "$2" "" + # The shell used for /bin/sh is DASH 0.5.7-4ubuntu1 on the author's machine which fails valgrind, so + # we just run on instance of etc/tune with the same options as in etc/tune_it.sh + echo -e "\rRun etc/tune $1 $2 once inside valgrind" + $_timeout $VALGRIND_BIN $VALGRIND_OPTS $TUNE_CMD > test_${suffix}.log || _die "running etc/tune" $? + else + _make "$1" "$2" "test" + echo -e "\rRun test $1 $2 inside valgrind" + $_timeout $VALGRIND_BIN $VALGRIND_OPTS ./test > test_${suffix}.log || _die "running tests" $? + fi +} + + +_banner() +{ + echo "uname="$(uname -a) + [[ "$#" != "0" ]] && (echo $1=$($1 -dumpversion)) || true +} + +_exit() +{ + if [ "$ret" == "0" ] + then + echo "Tests successful" + else + echo "$ret tests timed out" + fi + + exit $ret +} + +ARCHFLAGS="" +COMPILERS="" +CFLAGS="" +WITH_LOW_MP="" +TEST_VS_MTEST="" +MTEST_RAND="" +# timed with an AMD A8-6600K +# 25 minutes +#VALGRIND_OPTS=" --track-origins=yes --leak-check=full --show-leak-kinds=all --error-exitcode=1 " +# 9 minutes (14 minutes with --test-vs-mtest=333333 --mtest-real-rand) +VALGRIND_OPTS=" --leak-check=full --show-leak-kinds=all --error-exitcode=1 " +#VALGRIND_OPTS="" +VALGRIND_BIN="" +CHECK_FORMAT="" +TUNE_CMD="./etc/tune -t -r 10 -L 3" + +alive_pid=0 + +function kill_alive() { + disown $alive_pid || true + kill $alive_pid 2>/dev/null +} + +function start_alive_printing() { + [ "$alive_pid" == "0" ] || return 0; + for i in `seq 1 10` ; do sleep 300 && echo "Tests still in Progress..."; done & + alive_pid=$! + trap kill_alive EXIT +} + +while [ $# -gt 0 ]; +do + case $1 in + "--with-m64" | "--with-m32" | "--with-mx32") + ARCHFLAGS="$ARCHFLAGS ${1:6}" + ;; + --with-cc=*) + COMPILERS="$COMPILERS ${1#*=}" + ;; + --cflags=*) + CFLAGS="$CFLAGS ${1#*=}" + ;; + --valgrind-options=*) + VALGRIND_OPTS="$VALGRIND_OPTS ${1#*=}" + ;; + --with-valgrind*) + if [[ ${1#*d} != "" ]] + then + VALGRIND_BIN="${1#*=}" + else + VALGRIND_BIN="valgrind" + fi + start_alive_printing + ;; + --with-travis-valgrind*) + if [[ ("$TRAVIS_BRANCH" == "develop" && "$TRAVIS_PULL_REQUEST" == "false") || "$TRAVIS_BRANCH" == *"valgrind"* || "$TRAVIS_COMMIT_MESSAGE" == *"valgrind"* ]] + then + if [[ ${1#*d} != "" ]] + then + VALGRIND_BIN="${1#*=}" + else + VALGRIND_BIN="valgrind" + fi + start_alive_printing + fi + ;; + --make-option=*) + MAKE_OPTIONS="$MAKE_OPTIONS ${1#*=}" + ;; + --with-low-mp) + WITH_LOW_MP="1" + ;; + --test-vs-mtest=*) + TEST_VS_MTEST="${1#*=}" + if ! [ "$TEST_VS_MTEST" -eq "$TEST_VS_MTEST" ] 2> /dev/null + then + echo "--test-vs-mtest Parameter has to be int" + exit 255 + fi + start_alive_printing + ;; + --mtest-real-rand) + MTEST_RAND="-DLTM_MTEST_REAL_RAND" + ;; + --format) + CHECK_FORMAT="1" + ;; + --all) + COMPILERS="gcc clang" + ARCHFLAGS="-m64 -m32 -mx32" + ;; + --help | -h) + _help + ;; + --version | -v) + echo $(git rev-list HEAD --count -- testme.sh) || echo "Unknown. Please run in original libtommath git repository." + exit 0 + ;; + *) + echo "Ignoring option ${1}" + ;; + esac + shift +done + +function _check_git() { + git update-index --refresh >/dev/null || true + git diff-index --quiet HEAD -- . || ( echo "FAILURE: $*" && exit 1 ) +} + +if [[ "$CHECK_FORMAT" == "1" ]] +then + make astyle + _check_git "make astyle" + perl helper.pl --update-files + _check_git "helper.pl --update-files" + perl helper.pl --check-all + _check_git "helper.pl --check-all" + exit $? +fi + +[[ "$VALGRIND_BIN" == "" ]] && VALGRIND_OPTS="" + +# default to CC environment variable if no compiler is defined but some other options +if [[ "$COMPILERS" == "" ]] && [[ "$ARCHFLAGS$MAKE_OPTIONS$CFLAGS" != "" ]] +then + COMPILERS="$CC" +# default to CC environment variable and run only default config if no option is given +elif [[ "$COMPILERS" == "" ]] +then + _banner "$CC" + if [[ "$VALGRIND_BIN" != "" ]] + then + _runvalgrind "$CC" "" + else + _runtest "$CC" "" + fi + _exit +fi + + +archflags=( $ARCHFLAGS ) +compilers=( $COMPILERS ) + +# choosing a compiler without specifying an architecture will use the default architecture +if [ "${#archflags[@]}" == "0" ] +then + archflags[0]=" " +fi + +_banner + +if [[ "$TEST_VS_MTEST" != "" ]] +then + make clean > /dev/null + _make "${compilers[0]} ${archflags[0]}" "$CFLAGS" "mtest_opponent" + echo + _make "gcc" "$MTEST_RAND" "mtest" + echo + echo "Run test vs. mtest for $TEST_VS_MTEST iterations" + _timeout="" + which timeout >/dev/null && _timeout="timeout --foreground 1800" + $_timeout ./mtest/mtest $TEST_VS_MTEST | $VALGRIND_BIN $VALGRIND_OPTS ./mtest_opponent > valgrind_test.log 2> test_vs_mtest_err.log + retval=$? + head -n 5 valgrind_test.log + tail -n 2 valgrind_test.log + exit $retval +fi + +for i in "${compilers[@]}" +do + if [ -z "$(which $i)" ] + then + echo "Skipped compiler $i, file not found" + continue + fi + compiler_version=$(echo "$i="$($i -dumpversion)) + if [ "$compiler_version" == "clang=4.2.1" ] + then + # one of my versions of clang complains about some stuff in stdio.h and stdarg.h ... + TEST_CFLAGS="-Wno-typedef-redefinition" + else + TEST_CFLAGS="" + fi + echo $compiler_version + + for a in "${archflags[@]}" + do + if [[ $(expr "$i" : "clang") -ne 0 && "$a" == "-mx32" ]] + then + echo "clang -mx32 tests skipped" + continue + fi + if [[ "$VALGRIND_BIN" != "" ]] + then + _runvalgrind "$i $a" "$CFLAGS" + [ "$WITH_LOW_MP" != "1" ] && continue + _runvalgrind "$i $a" "-DMP_8BIT $CFLAGS" + _runvalgrind "$i $a" "-DMP_16BIT $CFLAGS" + _runvalgrind "$i $a" "-DMP_32BIT $CFLAGS" + else + _runtest "$i $a" "$CFLAGS" + [ "$WITH_LOW_MP" != "1" ] && continue + _runtest "$i $a" "-DMP_8BIT $CFLAGS" + _runtest "$i $a" "-DMP_16BIT $CFLAGS" + _runtest "$i $a" "-DMP_32BIT $CFLAGS" + fi + done +done + +_exit diff --git a/libtommath/tommath.h b/libtommath/tommath.h index e87bb08..2c4023c 100644 --- a/libtommath/tommath.h +++ b/libtommath/tommath.h @@ -234,13 +234,22 @@ TOOM_SQR_CUTOFF; #if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 405) # define MP_DEPRECATED(x) __attribute__((deprecated("replaced by " #x))) +#elif defined(_MSC_VER) && _MSC_VER >= 1500 +# define MP_DEPRECATED(x) __declspec(deprecated("replaced by " #x)) +#else +# define MP_DEPRECATED(x) +#endif + +#ifndef MP_NO_DEPRECATED_PRAGMA +#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 301) # define PRIVATE_MP_DEPRECATED_PRAGMA(s) _Pragma(#s) # define MP_DEPRECATED_PRAGMA(s) PRIVATE_MP_DEPRECATED_PRAGMA(GCC warning s) #elif defined(_MSC_VER) && _MSC_VER >= 1500 -# define MP_DEPRECATED(x) __declspec(deprecated("replaced by " #x)) # define MP_DEPRECATED_PRAGMA(s) __pragma(message(s)) -#else -# define MP_DEPRECATED(s) +#endif +#endif + +#ifndef MP_DEPRECATED_PRAGMA # define MP_DEPRECATED_PRAGMA(s) #endif -- cgit v0.12 From 0ff0a94010dcb27842e2f78515d4cfd9ae70ed26 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 22 Nov 2019 12:38:13 +0000 Subject: Take over recent commmits on the support/1.x branch --- libtommath/bn_mp_log_u32.c | 8 ++++---- libtommath/bn_mp_set_double.c | 4 ++-- libtommath/bn_s_mp_rand_jenkins.c | 4 ++-- libtommath/demo/test.c | 22 +++++++++++----------- libtommath/tommath.h | 2 +- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/libtommath/bn_mp_log_u32.c b/libtommath/bn_mp_log_u32.c index f7bca01..b86d789 100644 --- a/libtommath/bn_mp_log_u32.c +++ b/libtommath/bn_mp_log_u32.c @@ -6,7 +6,7 @@ /* Compute log_{base}(a) */ static mp_word s_pow(mp_word base, mp_word exponent) { - mp_word result = 1uLL; + mp_word result = 1u; while (exponent != 0u) { if ((exponent & 1u) == 1u) { result *= base; @@ -20,8 +20,8 @@ static mp_word s_pow(mp_word base, mp_word exponent) static mp_digit s_digit_ilogb(mp_digit base, mp_digit n) { - mp_word bracket_low = 1uLL, bracket_mid, bracket_high, N; - mp_digit ret, high = 1uL, low = 0uL, mid; + mp_word bracket_low = 1u, bracket_mid, bracket_high, N; + mp_digit ret, high = 1u, low = 0uL, mid; if (n < base) { return 0uL; @@ -40,7 +40,7 @@ static mp_digit s_digit_ilogb(mp_digit base, mp_digit n) bracket_high *= bracket_high; } - while (((mp_digit)(high - low)) > 1uL) { + while (((mp_digit)(high - low)) > 1u) { mid = (low + high) >> 1; bracket_mid = bracket_low * s_pow(base, (mp_word)(mid - low)); diff --git a/libtommath/bn_mp_set_double.c b/libtommath/bn_mp_set_double.c index a42fc70..7f1ab75 100644 --- a/libtommath/bn_mp_set_double.c +++ b/libtommath/bn_mp_set_double.c @@ -16,7 +16,7 @@ mp_err mp_set_double(mp_int *a, double b) cast.dbl = b; exp = (int)((unsigned)(cast.bits >> 52) & 0x7FFu); - frac = (cast.bits & ((1uLL << 52) - 1uLL)) | (1uLL << 52); + frac = (cast.bits & (((uint64_t)1 << 52) - (uint64_t)1)) | ((uint64_t)1 << 52); if (exp == 0x7FF) { /* +-inf, NaN */ return MP_VAL; @@ -30,7 +30,7 @@ mp_err mp_set_double(mp_int *a, double b) return err; } - if (((cast.bits >> 63) != 0uLL) && !MP_IS_ZERO(a)) { + if (((cast.bits >> 63) != 0u) && !MP_IS_ZERO(a)) { a->sign = MP_NEG; } diff --git a/libtommath/bn_s_mp_rand_jenkins.c b/libtommath/bn_s_mp_rand_jenkins.c index da0771c..c64afac 100644 --- a/libtommath/bn_s_mp_rand_jenkins.c +++ b/libtommath/bn_s_mp_rand_jenkins.c @@ -27,10 +27,10 @@ static uint64_t s_rand_jenkins_val(void) void s_mp_rand_jenkins_init(uint64_t seed) { - uint64_t i; + int i; jenkins_x.a = 0xf1ea5eedULL; jenkins_x.b = jenkins_x.c = jenkins_x.d = seed; - for (i = 0uLL; i < 20uLL; ++i) { + for (i = 0; i < 20; ++i) { (void)s_rand_jenkins_val(); } } diff --git a/libtommath/demo/test.c b/libtommath/demo/test.c index 7b29a4c..14b0c58 100644 --- a/libtommath/demo/test.c +++ b/libtommath/demo/test.c @@ -754,7 +754,7 @@ LBL_ERR: static int test_mp_get_u64(void) { - unsigned long long q, r; + uint64_t q, r; int i; mp_int a, b; @@ -762,20 +762,20 @@ static int test_mp_get_u64(void) return EXIT_FAILURE; } - for (i = 0; i < (int)(MP_SIZEOF_BITS(unsigned long long) - 1); ++i) { - r = (1ULL << (i+1)) - 1; + for (i = 0; i < (int)(MP_SIZEOF_BITS(uint64_t) - 1); ++i) { + r = ((uint64_t)1 << (i+1)) - 1; if (!r) - r = ~0ULL; - printf(" r = 0x%llx i = %d\r", r, i); + r = UINT64_MAX; + printf(" r = 0x%" PRIx64 " i = %d\r", r, i); do { mp_set_u64(&a, r); q = mp_get_u64(&a); if (q != r) { - printf("\nmp_get_u64() bad result! 0x%llx != 0x%llx", q, r); + printf("\nmp_get_u64() bad result! 0x%" PRIx64 " != 0x%" PRIx64, q, r); goto LBL_ERR; } r <<= 1; - } while (r != 0uLL); + } while (r != 0u); } mp_clear_multi(&a, &b, NULL); @@ -2313,7 +2313,7 @@ static int test_mp_read_write_ubin(void) size = mp_ubin_size(&a); printf("mp_to_ubin_size %zu\n", size); - buf = malloc(sizeof(*buf) * size); + buf = (unsigned char *)malloc(sizeof(*buf) * size); if (buf == NULL) { fprintf(stderr, "test_read_write_binaries (u) failed to allocate %zu bytes\n", sizeof(*buf) * size); @@ -2354,7 +2354,7 @@ static int test_mp_read_write_sbin(void) size = mp_sbin_size(&a); printf("mp_to_sbin_size %zu\n", size); - buf = malloc(sizeof(*buf) * size); + buf = (unsigned char *)malloc(sizeof(*buf) * size); if (buf == NULL) { fprintf(stderr, "test_read_write_binaries (s) failed to allocate %zu bytes\n", sizeof(*buf) * size); @@ -2395,7 +2395,7 @@ static int test_mp_pack_unpack(void) count = mp_pack_count(&a, 0, 1); - buf = malloc(count); + buf = (unsigned char *)malloc(count); if (buf == NULL) { fprintf(stderr, "test_pack_unpack failed to allocate\n"); goto LTM_ERR; @@ -2483,7 +2483,7 @@ static int unit_tests(int argc, char **argv) ok = fail = nop = 0; t = (uint64_t)time(NULL); - printf("SEED: 0x%"PRIx64"\n\n", t); + printf("SEED: 0x%" PRIx64 "\n\n", t); s_mp_rand_jenkins_init(t); mp_rand_source(s_mp_rand_jenkins); diff --git a/libtommath/tommath.h b/libtommath/tommath.h index 2c4023c..fd28894 100644 --- a/libtommath/tommath.h +++ b/libtommath/tommath.h @@ -200,7 +200,7 @@ TOOM_SQR_CUTOFF; #endif /* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */ -#define PRIVATE_MP_WARRAY (int)(1uLL << (((CHAR_BIT * sizeof(private_mp_word)) - (2 * MP_DIGIT_BIT)) + 1)) +#define PRIVATE_MP_WARRAY (int)(1 << (((CHAR_BIT * (int)sizeof(private_mp_word)) - (2 * MP_DIGIT_BIT)) + 1)) #define MP_WARRAY (MP_DEPRECATED_PRAGMA("MP_WARRAY is an internal macro") PRIVATE_MP_WARRAY) #if defined(__GNUC__) && __GNUC__ >= 4 -- cgit v0.12 From c7157e0bbdcf351b626e52b5b521a8a68d6334d6 Mon Sep 17 00:00:00 2001 From: oehhar Date: Fri, 3 Dec 2021 12:04:40 +0000 Subject: RFE [eb64b1520] http: be tolerant against invalid content encoding header responses --- library/http/http.tcl | 8 ++++++-- library/http/pkgIndex.tcl | 2 +- unix/Makefile.in | 4 ++-- win/Makefile.in | 4 ++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index b0f87de..9f8d7ff 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -11,7 +11,7 @@ package require Tcl 8.6- # Keep this in sync with pkgIndex.tcl and with the install directories in # Makefiles -package provide http 2.9.5 +package provide http 2.9.6 namespace eval http { # Allow resourcing to not clobber existing data @@ -3457,8 +3457,12 @@ proc http::ContentEncoding {token} { gzip - x-gzip { lappend r gunzip } compress - x-compress { lappend r decompress } identity {} + br { + return -code error\ + "content-encoding \"br\" not implemented" + } default { - return -code error "unsupported content-encoding \"$coding\"" + Log "unknown content-encoding \"$coding\" ignored" } } } diff --git a/library/http/pkgIndex.tcl b/library/http/pkgIndex.tcl index 74c4841..7249547 100644 --- a/library/http/pkgIndex.tcl +++ b/library/http/pkgIndex.tcl @@ -1,2 +1,2 @@ if {![package vsatisfies [package provide Tcl] 8.6-]} {return} -package ifneeded http 2.9.5 [list tclPkgSetup $dir http 2.9.5 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister ::http::mapReply}}}] +package ifneeded http 2.9.6 [list tclPkgSetup $dir http 2.9.6 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister ::http::mapReply}}}] diff --git a/unix/Makefile.in b/unix/Makefile.in index d187c84..de18d5d 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -946,8 +946,8 @@ install-libraries: libraries do \ $(INSTALL_DATA) $$i "$(SCRIPT_INSTALL_DIR)/http1.0"; \ done - @echo "Installing package http 2.9.5 as a Tcl Module"; - @$(INSTALL_DATA) $(TOP_DIR)/library/http/http.tcl "$(MODULE_INSTALL_DIR)/8.6/http-2.9.5.tm" + @echo "Installing package http 2.9.6 as a Tcl Module"; + @$(INSTALL_DATA) $(TOP_DIR)/library/http/http.tcl "$(MODULE_INSTALL_DIR)/8.6/http-2.9.6.tm" @echo "Installing package opt0.4 files to $(SCRIPT_INSTALL_DIR)/opt0.4/"; @for i in $(TOP_DIR)/library/opt/*.tcl; do \ $(INSTALL_DATA) $$i "$(SCRIPT_INSTALL_DIR)/opt0.4"; \ diff --git a/win/Makefile.in b/win/Makefile.in index 41ab5b1..6a67db1 100644 --- a/win/Makefile.in +++ b/win/Makefile.in @@ -726,8 +726,8 @@ install-libraries: libraries install-tzdata install-msgs do \ $(COPY) "$$j" "$(SCRIPT_INSTALL_DIR)/http1.0"; \ done; - @echo "Installing package http 2.9.5 as a Tcl Module"; - @$(COPY) $(ROOT_DIR)/library/http/http.tcl "$(MODULE_INSTALL_DIR)/8.6/http-2.9.5.tm"; + @echo "Installing package http 2.9.6 as a Tcl Module"; + @$(COPY) $(ROOT_DIR)/library/http/http.tcl "$(MODULE_INSTALL_DIR)/8.6/http-2.9.6.tm"; @echo "Installing library opt0.4 directory"; @for j in $(ROOT_DIR)/library/opt/*.tcl; \ do \ -- cgit v0.12 From c079597c246630f1982fc12887cb5b199fe98961 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 11 Mar 2022 14:52:27 +0000 Subject: Experiment: full UTF for 8.7. (WIP) --- .github/workflows/linux-build.yml | 2 +- .github/workflows/win-build.yml | 3 +-- generic/tcl.h | 6 +++++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index b410aab..cb93bd4 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -7,7 +7,7 @@ jobs: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=4" + - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml index a8019ee..0ff696c 100644 --- a/.github/workflows/win-build.yml +++ b/.github/workflows/win-build.yml @@ -13,7 +13,6 @@ jobs: matrix: cfgopt: - "" - - "OPTS=utfmax" - "CHECKS=nodep" - "OPTS=static" - "OPTS=symbols" @@ -52,7 +51,7 @@ jobs: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=4" + - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" diff --git a/generic/tcl.h b/generic/tcl.h index b82cf0a..742a548 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2128,7 +2128,11 @@ typedef struct Tcl_EncodingType { */ #ifndef TCL_UTF_MAX -#define TCL_UTF_MAX 3 +# ifdef BUILD_tcl +# define TCL_UTF_MAX 4 +# else +# define TCL_UTF_MAX 3 +# endif #endif /* -- cgit v0.12 From 3ab43f6d833639cadd33faec67aefb712f5a1798 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 11 Mar 2022 22:11:45 +0000 Subject: Handle TclUniCharNcmp() --- generic/tcl.decls | 2 +- generic/tclCmdMZ.c | 4 ++-- generic/tclDecls.h | 6 +++--- generic/tclInt.h | 28 ++++++++++------------------ generic/tclStringObj.c | 2 +- generic/tclStubInit.c | 15 --------------- generic/tclUtf.c | 34 +++++++++++++++++++++++++++++----- 7 files changed, 46 insertions(+), 45 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 8e21b1d..4a637ad 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1248,7 +1248,7 @@ declare 352 { int Tcl_Char16Len(const unsigned short *uniStr) } declare 353 {deprecated {Use Tcl_UtfNcmp}} { - int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, + int Tcl_UniCharNcmp(const unsigned short *ucs, const unsigned short *uct, unsigned long numChars) } declare 354 { diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index f394035..5e95217 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -604,7 +604,7 @@ Tcl_RegsubObjCmd( numMatches = 0; nocase = (cflags & TCL_REG_NOCASE); - strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; + strCmpFn = nocase ? Tcl_UniCharNcasecmp : TclUniCharNcmp; wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); @@ -2070,7 +2070,7 @@ StringMapCmd( } end = ustring1 + length1; - strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); + strCmpFn = (nocase ? Tcl_UniCharNcasecmp : TclUniCharNcmp); /* * Force result to be Unicode diff --git a/generic/tclDecls.h b/generic/tclDecls.h index e84a7e8..49ce21d 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1066,8 +1066,8 @@ EXTERN int Tcl_UniCharIsWordChar(int ch); EXTERN int Tcl_Char16Len(const unsigned short *uniStr); /* 353 */ TCL_DEPRECATED("Use Tcl_UtfNcmp") -int Tcl_UniCharNcmp(const Tcl_UniChar *ucs, - const Tcl_UniChar *uct, +int Tcl_UniCharNcmp(const unsigned short *ucs, + const unsigned short *uct, unsigned long numChars); /* 354 */ EXTERN char * Tcl_Char16ToUtfDString(const unsigned short *uniStr, @@ -2345,7 +2345,7 @@ typedef struct TclStubs { int (*tcl_UniCharIsUpper) (int ch); /* 350 */ int (*tcl_UniCharIsWordChar) (int ch); /* 351 */ int (*tcl_Char16Len) (const unsigned short *uniStr); /* 352 */ - TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 353 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcmp") int (*tcl_UniCharNcmp) (const unsigned short *ucs, const unsigned short *uct, unsigned long numChars); /* 353 */ char * (*tcl_Char16ToUtfDString) (const unsigned short *uniStr, int uniLength, Tcl_DString *dsPtr); /* 354 */ unsigned short * (*tcl_UtfToChar16DString) (const char *src, int length, Tcl_DString *dsPtr); /* 355 */ Tcl_RegExp (*tcl_GetRegExpFromObj) (Tcl_Interp *interp, Tcl_Obj *patObj, int flags); /* 356 */ diff --git a/generic/tclInt.h b/generic/tclInt.h index 2873ad3..a0f9622 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3319,6 +3319,16 @@ MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp, MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); +MODULE_SCOPE int *TclGetUnicode(Tcl_Obj *); +MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); +MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); +MODULE_SCOPE void TclSetUnicodeObj(Tcl_Obj *, const int *, int); +MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); +MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); +MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); +MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); + + /* * Many parsing tasks need a common definition of whitespace. * Use this routine and macro to achieve that and place @@ -4777,24 +4787,6 @@ MODULE_SCOPE int TclIsPureByteArray(Tcl_Obj *objPtr); /* *---------------------------------------------------------------- - * Macro used by the Tcl core to compare Unicode strings. On big-endian - * systems we can use the more efficient memcmp, but this would not be - * lexically correct on little-endian systems. The ANSI C "prototype" for - * this macro is: - * - * MODULE_SCOPE int TclUniCharNcmp(const Tcl_UniChar *cs, - * const Tcl_UniChar *ct, unsigned long n); - *---------------------------------------------------------------- - */ - -#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) -# define TclUniCharNcmp(cs,ct,n) memcmp((cs),(ct),(n)*sizeof(Tcl_UniChar)) -#else /* !WORDS_BIGENDIAN */ -# define TclUniCharNcmp Tcl_UniCharNcmp -#endif /* WORDS_BIGENDIAN */ - -/* - *---------------------------------------------------------------- * Macro used by the Tcl core to increment a namespace's export epoch * counter. The ANSI C "prototype" for this macro is: * diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 7d4aef3..521d13b 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -3492,7 +3492,7 @@ TclStringCmp( s1len *= sizeof(Tcl_UniChar); s2len *= sizeof(Tcl_UniChar); } else { - memCmpFn = (memCmpFn_t)(void *)Tcl_UniCharNcmp; + memCmpFn = (memCmpFn_t)(void *)TclUniCharNcmp; } } } diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 1aec652..f9430cb 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -81,21 +81,6 @@ #define TclWinConvertError (void (*)(DWORD))(void *)Tcl_WinConvertError #endif - -#if TCL_UTF_MAX > 3 -static void uniCodePanic(void) { - Tcl_Panic("This extension uses a deprecated function, not available now: Tcl is compiled with -DTCL_UTF_MAX==%d", TCL_UTF_MAX); -} -# define Tcl_GetUnicode (int *(*)(Tcl_Obj *))(void *)uniCodePanic -# define Tcl_GetUnicodeFromObj (int *(*)(Tcl_Obj *, Tcl_UniChar *))(void *)uniCodePanic -# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const int *, Tcl_UniChar))(void *)uniCodePanic -# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int))(void *)uniCodePanic -# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const Tcl_UniChar *, int))(void *)uniCodePanic -# define Tcl_UniCharNcasecmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic -# define Tcl_UniCharCaseMatch (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, int))(void *)uniCodePanic -# define Tcl_UniCharNcmp (int(*)(const Tcl_UniChar *, const Tcl_UniChar *, unsigned long))(void *)uniCodePanic -#endif - #define TclUtfCharComplete UtfCharComplete #define TclUtfNext UtfNext #define TclUtfPrev UtfPrev diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 169f240..e024b65 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1849,9 +1849,36 @@ Tcl_UniCharLen( */ int +TclUniCharNcmp( + const int *ucs, /* Unicode string to compare to uct. */ + const int *uct, /* Unicode string ucs is compared to. */ + unsigned long numChars) /* Number of unichars to compare. */ +{ +#if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) + /* + * We are definitely on a big-endian machine; memcmp() is safe + */ + + return memcmp(ucs, uct, numChars*sizeof(Tcl_UniChar)); + +#else /* !WORDS_BIGENDIAN */ + /* + * We can't simply call memcmp() because that is not lexically correct. + */ + + for ( ; numChars != 0; ucs++, uct++, numChars--) { + if (*ucs != *uct) { + return (*ucs - *uct); + } + } + return 0; +#endif /* WORDS_BIGENDIAN */ +} + +int Tcl_UniCharNcmp( - const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ - const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ + const unsigned short *ucs, /* Unicode string to compare to uct. */ + const unsigned short *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) @@ -1868,21 +1895,18 @@ Tcl_UniCharNcmp( for ( ; numChars != 0; ucs++, uct++, numChars--) { if (*ucs != *uct) { -#if TCL_UTF_MAX < 4 /* special case for handling upper surrogates */ if (((*ucs & 0xFC00) == 0xD800) && ((*uct & 0xFC00) != 0xD800)) { return 1; } else if (((*uct & 0xFC00) == 0xD800)) { return -1; } -#endif return (*ucs - *uct); } } return 0; #endif /* WORDS_BIGENDIAN */ } - /* *---------------------------------------------------------------------- * -- cgit v0.12 From 5f4d25842d6e58ab6e37998a654b80487ae80c29 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 11 Mar 2022 22:43:51 +0000 Subject: 2 more functions --- generic/tcl.decls | 6 +- generic/tclCmdMZ.c | 4 +- generic/tclDecls.h | 12 +-- generic/tclStringObj.c | 2 +- generic/tclUtf.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 208 insertions(+), 22 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 4a637ad..5a32bfd 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1483,12 +1483,12 @@ declare 418 { int Tcl_IsChannelExisting(const char *channelName) } declare 419 {deprecated {Use Tcl_UtfNcasecmp}} { - int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, const Tcl_UniChar *uct, + int Tcl_UniCharNcasecmp(const unsigned short *ucs, const unsigned short *uct, unsigned long numChars) } declare 420 {deprecated {Use Tcl_StringCaseMatch}} { - int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, - const Tcl_UniChar *uniPattern, int nocase) + int Tcl_UniCharCaseMatch(const unsigned short *uniStr, + const unsigned short *uniPattern, int nocase) } declare 421 { Tcl_HashEntry *Tcl_FindHashEntry(Tcl_HashTable *tablePtr, const void *key) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 5e95217..b178085 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -604,7 +604,7 @@ Tcl_RegsubObjCmd( numMatches = 0; nocase = (cflags & TCL_REG_NOCASE); - strCmpFn = nocase ? Tcl_UniCharNcasecmp : TclUniCharNcmp; + strCmpFn = nocase ? TclUniCharNcasecmp : TclUniCharNcmp; wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); @@ -2070,7 +2070,7 @@ StringMapCmd( } end = ustring1 + length1; - strCmpFn = (nocase ? Tcl_UniCharNcasecmp : TclUniCharNcmp); + strCmpFn = (nocase ? TclUniCharNcasecmp : TclUniCharNcmp); /* * Force result to be Unicode diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 49ce21d..0a336f1 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1256,13 +1256,13 @@ EXTERN void Tcl_ClearChannelHandlers(Tcl_Channel channel); EXTERN int Tcl_IsChannelExisting(const char *channelName); /* 419 */ TCL_DEPRECATED("Use Tcl_UtfNcasecmp") -int Tcl_UniCharNcasecmp(const Tcl_UniChar *ucs, - const Tcl_UniChar *uct, +int Tcl_UniCharNcasecmp(const unsigned short *ucs, + const unsigned short *uct, unsigned long numChars); /* 420 */ TCL_DEPRECATED("Use Tcl_StringCaseMatch") -int Tcl_UniCharCaseMatch(const Tcl_UniChar *uniStr, - const Tcl_UniChar *uniPattern, int nocase); +int Tcl_UniCharCaseMatch(const unsigned short *uniStr, + const unsigned short *uniPattern, int nocase); /* 421 */ EXTERN Tcl_HashEntry * Tcl_FindHashEntry(Tcl_HashTable *tablePtr, const void *key); @@ -2411,8 +2411,8 @@ typedef struct TclStubs { void (*tcl_SpliceChannel) (Tcl_Channel channel); /* 416 */ void (*tcl_ClearChannelHandlers) (Tcl_Channel channel); /* 417 */ int (*tcl_IsChannelExisting) (const char *channelName); /* 418 */ - TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const Tcl_UniChar *ucs, const Tcl_UniChar *uct, unsigned long numChars); /* 419 */ - TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const Tcl_UniChar *uniStr, const Tcl_UniChar *uniPattern, int nocase); /* 420 */ + TCL_DEPRECATED_API("Use Tcl_UtfNcasecmp") int (*tcl_UniCharNcasecmp) (const unsigned short *ucs, const unsigned short *uct, unsigned long numChars); /* 419 */ + TCL_DEPRECATED_API("Use Tcl_StringCaseMatch") int (*tcl_UniCharCaseMatch) (const unsigned short *uniStr, const unsigned short *uniPattern, int nocase); /* 420 */ Tcl_HashEntry * (*tcl_FindHashEntry) (Tcl_HashTable *tablePtr, const void *key); /* 421 */ Tcl_HashEntry * (*tcl_CreateHashEntry) (Tcl_HashTable *tablePtr, const void *key, int *newPtr); /* 422 */ void (*tcl_InitCustomHashTable) (Tcl_HashTable *tablePtr, int keyType, const Tcl_HashKeyType *typePtr); /* 423 */ diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 521d13b..4ab595f 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -3467,7 +3467,7 @@ TclStringCmp( if (nocase) { s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); - memCmpFn = (memCmpFn_t)(void *)Tcl_UniCharNcasecmp; + memCmpFn = (memCmpFn_t)(void *)TclUniCharNcasecmp; } else { s1len = Tcl_GetCharLength(value1Ptr); s2len = Tcl_GetCharLength(value2Ptr); diff --git a/generic/tclUtf.c b/generic/tclUtf.c index e024b65..68a0e32 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1927,30 +1927,48 @@ Tcl_UniCharNcmp( int Tcl_UniCharNcasecmp( - const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ - const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ + const unsigned short *ucs, /* Unicode string to compare to uct. */ + const unsigned short *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { for ( ; numChars != 0; numChars--, ucs++, uct++) { if (*ucs != *uct) { - Tcl_UniChar lcs = Tcl_UniCharToLower(*ucs); - Tcl_UniChar lct = Tcl_UniCharToLower(*uct); + unsigned short lcs = Tcl_UniCharToLower(*ucs); + unsigned short lct = Tcl_UniCharToLower(*uct); if (lcs != lct) { -#if TCL_UTF_MAX < 4 /* special case for handling upper surrogates */ if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) { return 1; } else if (((lct & 0xFC00) == 0xD800)) { return -1; } -#endif return (lcs - lct); } } } return 0; } + +int +TclUniCharNcasecmp( + const int *ucs, /* Unicode string to compare to uct. */ + const int *uct, /* Unicode string ucs is compared to. */ + unsigned long numChars) /* Number of unichars to compare. */ +{ + for ( ; numChars != 0; numChars--, ucs++, uct++) { + if (*ucs != *uct) { + int lcs = Tcl_UniCharToLower(*ucs); + int lct = Tcl_UniCharToLower(*uct); + + if (lcs != lct) { + return (lcs - lct); + } + } + } + return 0; +} + /* *---------------------------------------------------------------------- @@ -2314,14 +2332,181 @@ Tcl_UniCharIsWordChar( */ int +TclUniCharCaseMatch( + const int *uniStr, /* Unicode String. */ + const int *uniPattern, + /* Pattern, which may contain special + * characters. */ + int nocase) /* 0 for case sensitive, 1 for insensitive */ +{ + int ch1 = 0, p; + + while (1) { + p = *uniPattern; + + /* + * See if we're at the end of both the pattern and the string. If so, + * we succeeded. If we're at the end of the pattern but not at the end + * of the string, we failed. + */ + + if (p == 0) { + return (*uniStr == 0); + } + if ((*uniStr == 0) && (p != '*')) { + return 0; + } + + /* + * Check for a "*" as the next pattern character. It matches any + * substring. We handle this by skipping all the characters up to the + * next matching one in the pattern, and then calling ourselves + * recursively for each postfix of string, until either we match or we + * reach the end of the string. + */ + + if (p == '*') { + /* + * Skip all successive *'s in the pattern + */ + + while (*(++uniPattern) == '*') { + /* empty body */ + } + p = *uniPattern; + if (p == 0) { + return 1; + } + if (nocase) { + p = Tcl_UniCharToLower(p); + } + while (1) { + /* + * Optimization for matching - cruise through the string + * quickly if the next char in the pattern isn't a special + * character + */ + + if ((p != '[') && (p != '?') && (p != '\\')) { + if (nocase) { + while (*uniStr && (p != *uniStr) + && (p != Tcl_UniCharToLower(*uniStr))) { + uniStr++; + } + } else { + while (*uniStr && (p != *uniStr)) { + uniStr++; + } + } + } + if (TclUniCharCaseMatch(uniStr, uniPattern, nocase)) { + return 1; + } + if (*uniStr == 0) { + return 0; + } + uniStr++; + } + } + + /* + * Check for a "?" as the next pattern character. It matches any + * single character. + */ + + if (p == '?') { + uniPattern++; + uniStr++; + continue; + } + + /* + * Check for a "[" as the next pattern character. It is followed by a + * list of characters that are acceptable, or by a range (two + * characters separated by "-"). + */ + + if (p == '[') { + int startChar, endChar; + + uniPattern++; + ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr); + uniStr++; + while (1) { + if ((*uniPattern == ']') || (*uniPattern == 0)) { + return 0; + } + startChar = (nocase ? Tcl_UniCharToLower(*uniPattern) + : *uniPattern); + uniPattern++; + if (*uniPattern == '-') { + uniPattern++; + if (*uniPattern == 0) { + return 0; + } + endChar = (nocase ? Tcl_UniCharToLower(*uniPattern) + : *uniPattern); + uniPattern++; + if (((startChar <= ch1) && (ch1 <= endChar)) + || ((endChar <= ch1) && (ch1 <= startChar))) { + /* + * Matches ranges of form [a-z] or [z-a]. + */ + break; + } + } else if (startChar == ch1) { + break; + } + } + while (*uniPattern != ']') { + if (*uniPattern == 0) { + uniPattern--; + break; + } + uniPattern++; + } + uniPattern++; + continue; + } + + /* + * If the next pattern character is '\', just strip off the '\' so we + * do exact matching on the character that follows. + */ + + if (p == '\\') { + if (*(++uniPattern) == '\0') { + return 0; + } + } + + /* + * There's no special character. Just make sure that the next bytes of + * each string match. + */ + + if (nocase) { + if (Tcl_UniCharToLower(*uniStr) != + Tcl_UniCharToLower(*uniPattern)) { + return 0; + } + } else if (*uniStr != *uniPattern) { + return 0; + } + uniStr++; + uniPattern++; + } +} + +int Tcl_UniCharCaseMatch( - const Tcl_UniChar *uniStr, /* Unicode String. */ - const Tcl_UniChar *uniPattern, + const unsigned short *uniStr, /* Unicode String. */ + const unsigned short *uniPattern, /* Pattern, which may contain special * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ { - Tcl_UniChar ch1 = 0, p; + unsigned short ch1 = 0, p; while (1) { p = *uniPattern; @@ -2409,7 +2594,7 @@ Tcl_UniCharCaseMatch( */ if (p == '[') { - Tcl_UniChar startChar, endChar; + unsigned short startChar, endChar; uniPattern++; ch1 = (nocase ? Tcl_UniCharToLower(*uniStr) : *uniStr); @@ -2480,6 +2665,7 @@ Tcl_UniCharCaseMatch( } } + /* *---------------------------------------------------------------------- * -- cgit v0.12 From e709f7c5392a8506414be6727a3f8b6bdd7fbae4 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 12 Mar 2022 22:30:12 +0000 Subject: More progress --- .github/workflows/linux-build.yml | 2 +- .github/workflows/win-build.yml | 3 +- generic/tcl.decls | 8 ++-- generic/tclCmdMZ.c | 10 ++--- generic/tclDecls.h | 16 +++---- generic/tclExecute.c | 2 +- generic/tclInt.h | 2 - generic/tclStringObj.c | 88 ++++++++++++++++++++------------------- generic/tclTestObj.c | 4 +- 9 files changed, 68 insertions(+), 67 deletions(-) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index cb93bd4..b410aab 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -7,7 +7,7 @@ jobs: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=3" + - "CFLAGS=-DTCL_UTF_MAX=4" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml index 0ff696c..a8019ee 100644 --- a/.github/workflows/win-build.yml +++ b/.github/workflows/win-build.yml @@ -13,6 +13,7 @@ jobs: matrix: cfgopt: - "" + - "OPTS=utfmax" - "CHECKS=nodep" - "OPTS=static" - "OPTS=symbols" @@ -51,7 +52,7 @@ jobs: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=3" + - "CFLAGS=-DTCL_UTF_MAX=4" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" diff --git a/generic/tcl.decls b/generic/tcl.decls index 5a32bfd..9c83e81 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1338,10 +1338,10 @@ declare 377 { void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr) } declare 378 { - Tcl_Obj *Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, int numChars) + Tcl_Obj *Tcl_NewUnicodeObj(const unsigned char *unicode, int numChars) } declare 379 { - void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, + void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const unsigned short *unicode, int numChars) } declare 380 { @@ -1351,7 +1351,7 @@ declare 381 { int Tcl_GetUniChar(Tcl_Obj *objPtr, int index) } declare 382 {deprecated {No longer in use, changed to macro}} { - Tcl_UniChar *Tcl_GetUnicode(Tcl_Obj *objPtr) + unsigned short *Tcl_GetUnicode(Tcl_Obj *objPtr) } declare 383 { Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last) @@ -2417,7 +2417,7 @@ declare 651 { char *TclGetStringFromObj(Tcl_Obj *objPtr, size_t *lengthPtr) } declare 652 { - Tcl_UniChar *TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr) + unsigned short *TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr) } declare 653 { unsigned char *TclGetByteArrayFromObj(Tcl_Obj *objPtr, size_t *numBytesPtr) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index b178085..11b383f 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -619,7 +619,7 @@ Tcl_RegsubObjCmd( */ if (wstring < wend) { - resultPtr = Tcl_NewUnicodeObj(wstring, 0); + resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); for (; wstring < wend; wstring++) { Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); @@ -636,7 +636,7 @@ Tcl_RegsubObjCmd( (slen==1 || (strCmpFn(wstring, wsrc, (unsigned long) slen) == 0))) { if (numMatches == 0) { - resultPtr = Tcl_NewUnicodeObj(wstring, 0); + resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); } if (p != wstring) { @@ -742,7 +742,7 @@ Tcl_RegsubObjCmd( break; } if (numMatches == 0) { - resultPtr = Tcl_NewUnicodeObj(wstring, 0); + resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); if (offset > 0) { /* @@ -785,7 +785,7 @@ Tcl_RegsubObjCmd( subStart = info.matches[idx].start; subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { - args[idx + numParts] = Tcl_NewUnicodeObj( + args[idx + numParts] = TclNewUnicodeObj( wstring + offset + subStart, subEnd - subStart); } else { TclNewObj(args[idx + numParts]); @@ -2076,7 +2076,7 @@ StringMapCmd( * Force result to be Unicode */ - resultPtr = Tcl_NewUnicodeObj(ustring1, 0); + resultPtr = TclNewUnicodeObj(ustring1, 0); if (mapElemc == 2) { /* diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 0a336f1..4217e9c 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1142,18 +1142,18 @@ EXTERN int Tcl_RegExpExecObj(Tcl_Interp *interp, EXTERN void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 378 */ -EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const Tcl_UniChar *unicode, +EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const unsigned char *unicode, int numChars); /* 379 */ EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, int numChars); + const unsigned short *unicode, int numChars); /* 380 */ EXTERN int Tcl_GetCharLength(Tcl_Obj *objPtr); /* 381 */ EXTERN int Tcl_GetUniChar(Tcl_Obj *objPtr, int index); /* 382 */ TCL_DEPRECATED("No longer in use, changed to macro") -Tcl_UniChar * Tcl_GetUnicode(Tcl_Obj *objPtr); +unsigned short * Tcl_GetUnicode(Tcl_Obj *objPtr); /* 383 */ EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last); /* 384 */ @@ -1930,7 +1930,7 @@ EXTERN unsigned char * Tcl_GetBytesFromObj(Tcl_Interp *interp, EXTERN char * TclGetStringFromObj(Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ -EXTERN Tcl_UniChar * TclGetUnicodeFromObj(Tcl_Obj *objPtr, +EXTERN unsigned short * TclGetUnicodeFromObj(Tcl_Obj *objPtr, size_t *lengthPtr); /* 653 */ EXTERN unsigned char * TclGetByteArrayFromObj(Tcl_Obj *objPtr, @@ -2370,11 +2370,11 @@ typedef struct TclStubs { int (*tcl_UniCharIsPunct) (int ch); /* 375 */ int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */ void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */ - Tcl_Obj * (*tcl_NewUnicodeObj) (const Tcl_UniChar *unicode, int numChars); /* 378 */ - void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); /* 379 */ + Tcl_Obj * (*tcl_NewUnicodeObj) (const unsigned char *unicode, int numChars); /* 378 */ + void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int numChars); /* 379 */ int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */ int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */ - TCL_DEPRECATED_API("No longer in use, changed to macro") Tcl_UniChar * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ + TCL_DEPRECATED_API("No longer in use, changed to macro") unsigned short * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */ TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */ int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */ @@ -2644,7 +2644,7 @@ typedef struct TclStubs { unsigned char * (*tclGetBytesFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int *numBytesPtr); /* 649 */ unsigned char * (*tcl_GetBytesFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, size_t *numBytesPtr); /* 650 */ char * (*tclGetStringFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 651 */ - Tcl_UniChar * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ + unsigned short * (*tclGetUnicodeFromObj) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ unsigned char * (*tclGetByteArrayFromObj) (Tcl_Obj *objPtr, size_t *numBytesPtr); /* 653 */ int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */ const char * (*tcl_UtfNext) (const char *src); /* 655 */ diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 0279218..7cc002f 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5514,7 +5514,7 @@ TEBCresume( } ustring3 = Tcl_GetUnicodeFromObj(value3Ptr, &length3); - objResultPtr = Tcl_NewUnicodeObj(ustring1, 0); + objResultPtr = TclNewUnicodeObj(ustring1, 0); p = ustring1; end = ustring1 + length; for (; ustring1 < end; ustring1++) { diff --git a/generic/tclInt.h b/generic/tclInt.h index a0f9622..ed607cd 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3319,10 +3319,8 @@ MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp, MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); -MODULE_SCOPE int *TclGetUnicode(Tcl_Obj *); MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); -MODULE_SCOPE void TclSetUnicodeObj(Tcl_Obj *, const int *, int); MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 4ab595f..972eef7 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -374,8 +374,8 @@ Tcl_DbNewStringObj( */ Tcl_Obj * -Tcl_NewUnicodeObj( - const Tcl_UniChar *unicode, /* The unicode string used to initialize the +TclNewUnicodeObj( + const int *unicode, /* The unicode string used to initialize the * new object. */ int numChars) /* Number of characters in the unicode * string. */ @@ -387,6 +387,22 @@ Tcl_NewUnicodeObj( return objPtr; } +Tcl_Obj * +Tcl_NewUnicodeObj( + const unsigned char *unicode, /* The unicode string used to initialize the + * new object. */ + int numChars) /* Number of characters in the unicode + * string. */ +{ + Tcl_Obj *objPtr; + (void)unicode; + (void)numChars; + + TclNewObj(objPtr); + /* TODO JN */ + return objPtr; +} + /* *---------------------------------------------------------------------- * @@ -612,12 +628,12 @@ Tcl_GetUniChar( #undef Tcl_GetUnicodeFromObj #ifndef TCL_NO_DEPRECATED #undef Tcl_GetUnicode -Tcl_UniChar * +unsigned short * Tcl_GetUnicode( Tcl_Obj *objPtr) /* The object to find the unicode string * for. */ { - return Tcl_GetUnicodeFromObj(objPtr, (int *)NULL); + return TclGetUnicodeFromObj(objPtr, NULL); } #endif /* TCL_NO_DEPRECATED */ @@ -663,7 +679,7 @@ Tcl_GetUnicodeFromObj( } return stringPtr->unicode; } -Tcl_UniChar * +unsigned short * TclGetUnicodeFromObj( Tcl_Obj *objPtr, /* The object to find the unicode string * for. */ @@ -671,24 +687,10 @@ TclGetUnicodeFromObj( * rep's unichar length should be stored. If * NULL, no length is stored. */ { - String *stringPtr; - - SetStringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); - - if (stringPtr->hasUnicode == 0) { - FillUnicodeRep(objPtr); - stringPtr = GET_STRING(objPtr); - } - - if (lengthPtr != NULL) { -#if TCL_MAJOR_VERSION > 8 - *lengthPtr = stringPtr->numChars; -#else - *lengthPtr = ((size_t)(unsigned)(stringPtr->numChars + 1)) - 1; -#endif - } - return stringPtr->unicode; + (void)objPtr; + (void)lengthPtr; + /* TODO JN */ + return NULL; } /* @@ -797,7 +799,7 @@ Tcl_GetRange( ++last; } #endif - return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); + return TclNewUnicodeObj(stringPtr->unicode + first, last - first + 1); } /* @@ -1092,16 +1094,16 @@ Tcl_AttemptSetObjLength( void Tcl_SetUnicodeObj( Tcl_Obj *objPtr, /* The object to set the string of. */ - const Tcl_UniChar *unicode, /* The unicode string used to initialize the + const unsigned short *unicode, /* The unicode string used to initialize the * object. */ int numChars) /* Number of characters in the unicode * string. */ { - if (Tcl_IsShared(objPtr)) { - Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj"); - } - TclFreeInternalRep(objPtr); - SetUnicodeObj(objPtr, unicode, numChars); + (void)objPtr; + (void)unicode; + (void)numChars; + + /* TODO JN */ } static int @@ -1228,7 +1230,7 @@ Tcl_AppendLimitedToObj( /* If appended string starts with a continuation byte or a lower surrogate, * force objPtr to unicode representation. See [7f1162a867] */ if (bytes && ISCONTINUATION(bytes)) { - Tcl_GetUnicode(objPtr); + Tcl_GetUnicodeFromObj(objPtr, NULL); stringPtr = GET_STRING(objPtr); } if (stringPtr->hasUnicode && stringPtr->numChars > 0) { @@ -1432,7 +1434,7 @@ Tcl_AppendObjToObj( * force objPtr to unicode representation. See [7f1162a867] * This fixes append-3.4, append-3.7 and utf-1.18 testcases. */ if (ISCONTINUATION(TclGetString(appendObjPtr))) { - Tcl_GetUnicode(objPtr); + Tcl_GetUnicodeFromObj(objPtr, NULL); stringPtr = GET_STRING(objPtr); } /* @@ -2976,7 +2978,7 @@ TclStringRepeat( */ if (!inPlace || Tcl_IsShared(objPtr)) { - objResultPtr = Tcl_NewUnicodeObj(Tcl_GetUnicode(objPtr), length); + objResultPtr = TclNewUnicodeObj(Tcl_GetUnicodeFromObj(objPtr, NULL), length); } else { TclInvalidateStringRep(objPtr); objResultPtr = objPtr; @@ -2997,7 +2999,7 @@ TclStringRepeat( Tcl_AppendObjToObj(objResultPtr, objResultPtr); done *= 2; } - Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicode(objResultPtr), + Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicodeFromObj(objResultPtr, NULL), (count - done) * length); } else { /* @@ -3318,12 +3320,12 @@ TclStringCat( } return NULL; } - dst = Tcl_GetUnicode(objResultPtr) + start; + dst = Tcl_GetUnicodeFromObj(objResultPtr, NULL) + start; } else { Tcl_UniChar ch = 0; /* Ugly interface! No scheme to init array size. */ - objResultPtr = Tcl_NewUnicodeObj(&ch, 0); /* PANIC? */ + objResultPtr = TclNewUnicodeObj(&ch, 0); /* PANIC? */ if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { Tcl_DecrRefCount(objResultPtr); if (interp) { @@ -3335,7 +3337,7 @@ TclStringCat( } return NULL; } - dst = Tcl_GetUnicode(objResultPtr); + dst = Tcl_GetUnicodeFromObj(objResultPtr, NULL); } while (objc--) { Tcl_Obj *objPtr = *objv++; @@ -3479,8 +3481,8 @@ TclStringCmp( s2 = value2Ptr->bytes; memCmpFn = memcmp; } else { - s1 = (char *) Tcl_GetUnicode(value1Ptr); - s2 = (char *) Tcl_GetUnicode(value2Ptr); + s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, NULL); + s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, NULL); if ( #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) 1 @@ -3854,7 +3856,7 @@ TclStringReverse( stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode) { - Tcl_UniChar *from = Tcl_GetUnicode(objPtr); + Tcl_UniChar *from = Tcl_GetUnicodeFromObj(objPtr, NULL); stringPtr = GET_STRING(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; Tcl_UniChar *to; @@ -3865,9 +3867,9 @@ TclStringReverse( * Tcl_SetObjLength into growing the unicode rep buffer. */ - objPtr = Tcl_NewUnicodeObj(&ch, 1); + objPtr = TclNewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); - to = Tcl_GetUnicode(objPtr); + to = Tcl_GetUnicodeFromObj(objPtr, NULL); stringPtr = GET_STRING(objPtr); while (--src >= from) { #if TCL_UTF_MAX < 4 @@ -4101,7 +4103,7 @@ TclStringReplace( /* TODO: Is there an in-place option worth pursuing here? */ - result = Tcl_NewUnicodeObj(ustring, first); + result = TclNewUnicodeObj(ustring, first); if (insertPtr) { Tcl_AppendObjToObj(result, insertPtr); } diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index a235002..9814cfe 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ - +#undef BUILD_tcl #ifndef USE_TCL_STUBS # define USE_TCL_STUBS #endif @@ -1153,7 +1153,7 @@ TeststringobjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_UniChar *unicode; + unsigned short *unicode; size_t varIndex; int size, option, i; Tcl_WideInt length; -- cgit v0.12 From d05ac0b98bbc039639b7784e313620b8d3757ddf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 12 Mar 2022 23:21:12 +0000 Subject: Start defining "utf32string" type --- generic/tcl.decls | 4 +- generic/tclCmdMZ.c | 62 +++++++++++----------- generic/tclDecls.h | 8 +-- generic/tclExecute.c | 18 +++---- generic/tclRegexp.c | 2 +- generic/tclStringObj.c | 139 ++++++++++++++++++++++++++++++++++--------------- generic/tclTestObj.c | 1 + generic/tclUtil.c | 4 +- tests/string.test | 2 +- 9 files changed, 149 insertions(+), 91 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 9c83e81..6dbb457 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1357,7 +1357,7 @@ declare 383 { Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last) } declare 384 {deprecated {Use Tcl_AppendStringsToObj}} { - void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, + void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const unsigned short *unicode, int length) } declare 385 { @@ -1541,7 +1541,7 @@ declare 433 { # introduced in 8.4a3 declare 434 { - Tcl_UniChar *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) + unsigned short *Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr) } # TIP#15 (math function introspection) dkf diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 11b383f..db4002a 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -606,9 +606,9 @@ Tcl_RegsubObjCmd( nocase = (cflags & TCL_REG_NOCASE); strCmpFn = nocase ? TclUniCharNcasecmp : TclUniCharNcmp; - wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); - wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); - wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen); + wsrc = TclGetUnicodeFromObj_(objv[0], &slen); + wstring = TclGetUnicodeFromObj_(objv[1], &wlen); + wsubspec = TclGetUnicodeFromObj_(objv[2], &wsublen); wend = wstring + wlen - (slen ? slen - 1 : 0); result = TCL_OK; @@ -622,8 +622,8 @@ Tcl_RegsubObjCmd( resultPtr = TclNewUnicodeObj(wstring, 0); Tcl_IncrRefCount(resultPtr); for (; wstring < wend; wstring++) { - Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); - Tcl_AppendUnicodeToObj(resultPtr, wstring, 1); + TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen); + TclAppendUnicodeToObj(resultPtr, wstring, 1); numMatches++; } wlen = 0; @@ -640,14 +640,14 @@ Tcl_RegsubObjCmd( Tcl_IncrRefCount(resultPtr); } if (p != wstring) { - Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p); + TclAppendUnicodeToObj(resultPtr, p, wstring - p); p = wstring + slen; } else { p += slen; } wstring = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); + TclAppendUnicodeToObj(resultPtr, wsubspec, wsublen); numMatches++; } } @@ -699,14 +699,14 @@ Tcl_RegsubObjCmd( } else { objPtr = objv[1]; } - wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + wstring = TclGetUnicodeFromObj_(objPtr, &wlen); if (objv[2] == objv[0]) { subPtr = Tcl_DuplicateObj(objv[2]); } else { subPtr = objv[2]; } if (!command) { - wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); + wsubspec = TclGetUnicodeFromObj_(subPtr, &wsublen); } result = TCL_OK; @@ -750,7 +750,7 @@ Tcl_RegsubObjCmd( * specified. */ - Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); + TclAppendUnicodeToObj(resultPtr, wstring, offset); } } numMatches++; @@ -763,7 +763,7 @@ Tcl_RegsubObjCmd( Tcl_RegExpGetInfo(regExpr, &info); start = info.matches[0].start; end = info.matches[0].end; - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); + TclAppendUnicodeToObj(resultPtr, wstring + offset, start); /* * In command-prefix mode, the substitutions are added as quoted @@ -826,7 +826,7 @@ Tcl_RegsubObjCmd( * the user code. */ - wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); + wstring = TclGetUnicodeFromObj_(objPtr, &wlen); offset += end; if (end == 0 || start == end) { @@ -838,7 +838,7 @@ Tcl_RegsubObjCmd( */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } @@ -867,7 +867,7 @@ Tcl_RegsubObjCmd( idx = ch - '0'; } else if ((ch == '\\') || (ch == '&')) { *wsrc = ch; - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar + 1); *wsrc = '\\'; wfirstChar = wsrc + 2; @@ -881,7 +881,7 @@ Tcl_RegsubObjCmd( } if (wfirstChar != wsrc) { - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } @@ -889,7 +889,7 @@ Tcl_RegsubObjCmd( subStart = info.matches[idx].start; subEnd = info.matches[idx].end; if ((subStart >= 0) && (subEnd >= 0)) { - Tcl_AppendUnicodeToObj(resultPtr, + TclAppendUnicodeToObj(resultPtr, wstring + offset + subStart, subEnd - subStart); } } @@ -901,7 +901,7 @@ Tcl_RegsubObjCmd( } if (wfirstChar != wsrc) { - Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); + TclAppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); } if (end == 0) { @@ -911,7 +911,7 @@ Tcl_RegsubObjCmd( */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } else { @@ -923,7 +923,7 @@ Tcl_RegsubObjCmd( */ if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); + TclAppendUnicodeToObj(resultPtr, wstring + offset, 1); } offset++; } @@ -948,7 +948,7 @@ Tcl_RegsubObjCmd( resultPtr = objv[1]; Tcl_IncrRefCount(resultPtr); } else if (offset < wlen) { - Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); + TclAppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); } if (objc == 4) { if (Tcl_ObjSetVar2(interp, objv[3], NULL, resultPtr, @@ -2060,7 +2060,7 @@ StringMapCmd( } else { sourceObj = objv[objc-1]; } - ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1); + ustring1 = TclGetUnicodeFromObj_(sourceObj, &length1); if (length1 == 0) { /* * Empty input string, just stop now. @@ -2089,7 +2089,7 @@ StringMapCmd( int mapLen, u2lc; Tcl_UniChar *mapString; - ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); + ustring2 = TclGetUnicodeFromObj_(mapElemv[0], &length2); p = ustring1; if ((length2 > length1) || (length2 == 0)) { /* @@ -2098,7 +2098,7 @@ StringMapCmd( ustring1 = end; } else { - mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); + mapString = TclGetUnicodeFromObj_(mapElemv[1], &mapLen); u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); for (; ustring1 < end; ustring1++) { if (((*ustring1 == *ustring2) || @@ -2106,14 +2106,14 @@ StringMapCmd( (length2==1 || strCmpFn(ustring1, ustring2, (unsigned long) length2) == 0)) { if (p != ustring1) { - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + TclAppendUnicodeToObj(resultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; } ustring1 = p - 1; - Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); + TclAppendUnicodeToObj(resultPtr, mapString, mapLen); } } } @@ -2134,7 +2134,7 @@ StringMapCmd( u2lc = (int *)TclStackAlloc(interp, mapElemc * sizeof(int)); } for (index = 0; index < mapElemc; index++) { - mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], + mapStrings[index] = TclGetUnicodeFromObj_(mapElemv[index], mapLens+index); if (nocase && ((index % 2) == 0)) { u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); @@ -2158,7 +2158,7 @@ StringMapCmd( * Put the skipped chars onto the result first. */ - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); + TclAppendUnicodeToObj(resultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; @@ -2174,7 +2174,7 @@ StringMapCmd( * Append the map value to the unicode string. */ - Tcl_AppendUnicodeToObj(resultPtr, + TclAppendUnicodeToObj(resultPtr, mapStrings[index+1], mapLens[index+1]); break; } @@ -2191,7 +2191,7 @@ StringMapCmd( * Put the rest of the unmapped chars onto result. */ - Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p); + TclAppendUnicodeToObj(resultPtr, p, ustring1 - p); } Tcl_SetObjResult(interp, resultPtr); done: @@ -2506,7 +2506,7 @@ StringStartCmd( return TCL_ERROR; } - string = Tcl_GetUnicodeFromObj(objv[1], &length); + string = TclGetUnicodeFromObj_(objv[1], &length); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } @@ -2576,7 +2576,7 @@ StringEndCmd( return TCL_ERROR; } - string = Tcl_GetUnicodeFromObj(objv[1], &length); + string = TclGetUnicodeFromObj_(objv[1], &length); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 4217e9c..bf15862 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1159,7 +1159,7 @@ EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last); /* 384 */ TCL_DEPRECATED("Use Tcl_AppendStringsToObj") void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, - const Tcl_UniChar *unicode, int length); + const unsigned short *unicode, int length); /* 385 */ EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); @@ -1304,7 +1304,7 @@ EXTERN int Tcl_AttemptSetObjLength(Tcl_Obj *objPtr, int length); /* 433 */ EXTERN Tcl_ThreadId Tcl_GetChannelThread(Tcl_Channel channel); /* 434 */ -EXTERN Tcl_UniChar * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, +EXTERN unsigned short * Tcl_GetUnicodeFromObj(Tcl_Obj *objPtr, int *lengthPtr); /* 435 */ TCL_DEPRECATED("") @@ -2376,7 +2376,7 @@ typedef struct TclStubs { int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */ TCL_DEPRECATED_API("No longer in use, changed to macro") unsigned short * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */ - TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int length); /* 384 */ + TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int length); /* 384 */ int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */ void (*tcl_SetNotifier) (const Tcl_NotifierProcs *notifierProcPtr); /* 386 */ Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */ @@ -2426,7 +2426,7 @@ typedef struct TclStubs { char * (*tcl_AttemptDbCkrealloc) (char *ptr, unsigned int size, const char *file, int line); /* 431 */ int (*tcl_AttemptSetObjLength) (Tcl_Obj *objPtr, int length); /* 432 */ Tcl_ThreadId (*tcl_GetChannelThread) (Tcl_Channel channel); /* 433 */ - Tcl_UniChar * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ + unsigned short * (*tcl_GetUnicodeFromObj) (Tcl_Obj *objPtr, int *lengthPtr); /* 434 */ TCL_DEPRECATED_API("") int (*tcl_GetMathFuncInfo) (Tcl_Interp *interp, const char *name, int *numArgsPtr, Tcl_ValueType **argTypesPtr, Tcl_MathProc **procPtr, ClientData *clientDataPtr); /* 435 */ TCL_DEPRECATED_API("") Tcl_Obj * (*tcl_ListMathFuncs) (Tcl_Interp *interp, const char *pattern); /* 436 */ Tcl_Obj * (*tcl_SubstObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, int flags); /* 437 */ diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 7cc002f..7a1025d 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5495,12 +5495,12 @@ TEBCresume( objResultPtr = value3Ptr; goto doneStringMap; } - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); if (length == 0) { objResultPtr = valuePtr; goto doneStringMap; } - ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2); + ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2); if (length2 > length || length2 == 0) { objResultPtr = valuePtr; goto doneStringMap; @@ -5512,7 +5512,7 @@ TEBCresume( } goto doneStringMap; } - ustring3 = Tcl_GetUnicodeFromObj(value3Ptr, &length3); + ustring3 = TclGetUnicodeFromObj_(value3Ptr, &length3); objResultPtr = TclNewUnicodeObj(ustring1, 0); p = ustring1; @@ -5524,14 +5524,14 @@ TEBCresume( memcmp(ustring1, ustring2, sizeof(Tcl_UniChar) * length2) == 0)) { if (p != ustring1) { - Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1-p); + TclAppendUnicodeToObj(objResultPtr, p, ustring1-p); p = ustring1 + length2; } else { p += length2; } ustring1 = p - 1; - Tcl_AppendUnicodeToObj(objResultPtr, ustring3, length3); + TclAppendUnicodeToObj(objResultPtr, ustring3, length3); } } if (p != ustring1) { @@ -5539,7 +5539,7 @@ TEBCresume( * Put the rest of the unmapped chars onto result. */ - Tcl_AppendUnicodeToObj(objResultPtr, p, ustring1 - p); + TclAppendUnicodeToObj(objResultPtr, p, ustring1 - p); } doneStringMap: TRACE_WITH_OBJ(("%.20s %.20s %.20s => ", @@ -5565,7 +5565,7 @@ TEBCresume( valuePtr = OBJ_AT_TOS; TRACE(("%s \"%.30s\" => ", tclStringClassTable[opnd].name, O2S(valuePtr))); - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); match = 1; if (length > 0) { int ch; @@ -5596,8 +5596,8 @@ TEBCresume( || TclHasInternalRep(value2Ptr, &tclStringType)) { Tcl_UniChar *ustring1, *ustring2; - ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); - ustring2 = Tcl_GetUnicodeFromObj(value2Ptr, &length2); + ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); + ustring2 = TclGetUnicodeFromObj_(value2Ptr, &length2); match = TclUniCharMatch(ustring1, length, ustring2, length2, nocase); } else if (TclIsPureByteArray(valuePtr) && !nocase) { diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 8e588ac..bb8a6ad 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -482,7 +482,7 @@ Tcl_RegExpExecObj( regexpPtr->string = NULL; regexpPtr->objPtr = textObj; - udata = Tcl_GetUnicodeFromObj(textObj, &length); + udata = TclGetUnicodeFromObj_(textObj, &length); if (offset > length) { offset = length; diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 972eef7..a723586 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -80,14 +80,41 @@ static void UpdateStringOfString(Tcl_Obj *objPtr); * functions that can be invoked by generic object code. */ -const Tcl_ObjType tclStringType = { - "string", /* name */ +static const Tcl_ObjType utf32StringType = { + "utf32string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ DupStringInternalRep, /* dupIntRepProc */ UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */ }; +typedef struct { + int numChars; /* The number of chars in the string. */ + int allocated; /* The amount of space actually allocated for + * the UTF-16 string (minus 1 byte for the + * termination char). */ + int maxChars; /* Max number of chars that can fit in the + * space allocated for the UTF-16 array. */ + int hasUnicode; /* Boolean determining whether the string has + * a UTF-16 representation. Always 1 */ + unsigned short unicode[TCLFLEXARRAY]; /* The array of Unicode chars. The actual size + * of this field depends on the 'maxChars' + * field above. */ +} UTF16String; + +const Tcl_ObjType tclStringType = { + "string", /* name */ + FreeStringInternalRep, /* freeIntRepPro */ +#if 0 + /* TODO JN */ + DupUTF16StringInternalRep, /* dupIntRepProc */ + UpdateUTF16StringOfString, /* updateStringProc */ + SetUTF16StringFromAny /* setFromAnyProc */ +#endif + NULL, NULL, NULL +}; + + /* * TCL STRING GROWTH ALGORITHM * @@ -656,8 +683,8 @@ Tcl_GetUnicode( *---------------------------------------------------------------------- */ -Tcl_UniChar * -Tcl_GetUnicodeFromObj( +int * +TclGetUnicodeFromObj_( Tcl_Obj *objPtr, /* The object to find the unicode string * for. */ int *lengthPtr) /* If non-NULL, the location where the string @@ -679,6 +706,22 @@ Tcl_GetUnicodeFromObj( } return stringPtr->unicode; } + +unsigned short * +Tcl_GetUnicodeFromObj( + Tcl_Obj *objPtr, /* The object to find the unicode string + * for. */ + int *lengthPtr) /* If non-NULL, the location where the string + * rep's unichar length should be stored. If + * NULL, no length is stored. */ +{ + (void)objPtr; + (void)lengthPtr; + + /* TODO JN */ + return NULL; +} + unsigned short * TclGetUnicodeFromObj( Tcl_Obj *objPtr, /* The object to find the unicode string @@ -1142,7 +1185,7 @@ SetUnicodeObj( stringCheckLimits(numChars); stringPtr = stringAlloc(numChars); SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; + objPtr->typePtr = &utf32StringType; stringPtr->maxChars = numChars; memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar)); @@ -1230,7 +1273,7 @@ Tcl_AppendLimitedToObj( /* If appended string starts with a continuation byte or a lower surrogate, * force objPtr to unicode representation. See [7f1162a867] */ if (bytes && ISCONTINUATION(bytes)) { - Tcl_GetUnicodeFromObj(objPtr, NULL); + TclGetUnicodeFromObj_(objPtr, NULL); stringPtr = GET_STRING(objPtr); } if (stringPtr->hasUnicode && stringPtr->numChars > 0) { @@ -1298,9 +1341,9 @@ Tcl_AppendToObj( */ void -Tcl_AppendUnicodeToObj( +TclAppendUnicodeToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ - const Tcl_UniChar *unicode, /* The unicode string to append to the + const int *unicode, /* The unicode string to append to the * object. */ int length) /* Number of chars in "unicode". */ { @@ -1330,6 +1373,20 @@ Tcl_AppendUnicodeToObj( } } +void +Tcl_AppendUnicodeToObj( + Tcl_Obj *objPtr, /* Points to the object to append to. */ + const unsigned short *unicode, /* The unicode string to append to the + * object. */ + int length) /* Number of chars in "unicode". */ +{ + (void)objPtr; + (void)unicode; + (void)length; + + /* TODO JN */ +} + /* *---------------------------------------------------------------------- * @@ -1434,7 +1491,7 @@ Tcl_AppendObjToObj( * force objPtr to unicode representation. See [7f1162a867] * This fixes append-3.4, append-3.7 and utf-1.18 testcases. */ if (ISCONTINUATION(TclGetString(appendObjPtr))) { - Tcl_GetUnicodeFromObj(objPtr, NULL); + TclGetUnicodeFromObj_(objPtr, NULL); stringPtr = GET_STRING(objPtr); } /* @@ -1447,9 +1504,9 @@ Tcl_AppendObjToObj( * If appendObjPtr is not of the "String" type, don't convert it. */ - if (TclHasInternalRep(appendObjPtr, &tclStringType)) { + if (TclHasInternalRep(appendObjPtr, &utf32StringType)) { Tcl_UniChar *unicode = - Tcl_GetUnicodeFromObj(appendObjPtr, &numChars); + TclGetUnicodeFromObj_(appendObjPtr, &numChars); AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); } else { @@ -1468,7 +1525,7 @@ Tcl_AppendObjToObj( bytes = TclGetStringFromObj(appendObjPtr, &length); numChars = stringPtr->numChars; - if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclStringType)) { + if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &utf32StringType)) { String *appendStringPtr = GET_STRING(appendObjPtr); appendNumChars = appendStringPtr->numChars; @@ -2877,7 +2934,7 @@ TclGetStringStorage( { String *stringPtr; - if (!TclHasInternalRep(objPtr, &tclStringType) || objPtr->bytes == NULL) { + if (!TclHasInternalRep(objPtr, &utf32StringType) || objPtr->bytes == NULL) { return TclGetStringFromObj(objPtr, (int *)sizePtr); } @@ -2925,7 +2982,7 @@ TclStringRepeat( */ if (!binary) { - if (TclHasInternalRep(objPtr, &tclStringType)) { + if (TclHasInternalRep(objPtr, &utf32StringType)) { String *stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode) { unichar = 1; @@ -2938,7 +2995,7 @@ TclStringRepeat( Tcl_GetByteArrayFromObj(objPtr, &length); } else if (unichar) { /* Result will be pure Tcl_UniChar array. Pre-size it. */ - Tcl_GetUnicodeFromObj(objPtr, &length); + TclGetUnicodeFromObj_(objPtr, &length); } else { /* Result will be concat of string reps. Pre-size it. */ Tcl_GetStringFromObj(objPtr, &length); @@ -2978,7 +3035,7 @@ TclStringRepeat( */ if (!inPlace || Tcl_IsShared(objPtr)) { - objResultPtr = TclNewUnicodeObj(Tcl_GetUnicodeFromObj(objPtr, NULL), length); + objResultPtr = TclNewUnicodeObj(TclGetUnicodeFromObj_(objPtr, NULL), length); } else { TclInvalidateStringRep(objPtr); objResultPtr = objPtr; @@ -2999,7 +3056,7 @@ TclStringRepeat( Tcl_AppendObjToObj(objResultPtr, objResultPtr); done *= 2; } - Tcl_AppendUnicodeToObj(objResultPtr, Tcl_GetUnicodeFromObj(objResultPtr, NULL), + TclAppendUnicodeToObj(objResultPtr, TclGetUnicodeFromObj_(objResultPtr, NULL), (count - done) * length); } else { /* @@ -3096,7 +3153,7 @@ TclStringCat( binary = 0; if (ov > objv+1 && ISCONTINUATION(TclGetString(objPtr))) { forceUniChar = 1; - } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) { + } else if ((objPtr->typePtr) && (objPtr->typePtr != &utf32StringType)) { /* Prevent shimmer of non-string types. */ allowUniChar = 0; } @@ -3104,7 +3161,7 @@ TclStringCat( } else { /* assert (objPtr->typePtr != NULL) -- stork! */ binary = 0; - if (TclHasInternalRep(objPtr, &tclStringType)) { + if (TclHasInternalRep(objPtr, &utf32StringType)) { /* Have a pure Unicode value; ask to preserve it */ requestUniChar = 1; } else { @@ -3158,7 +3215,7 @@ TclStringCat( if ((objPtr->bytes == NULL) || (objPtr->length)) { int numChars; - Tcl_GetUnicodeFromObj(objPtr, &numChars); /* PANIC? */ + TclGetUnicodeFromObj_(objPtr, &numChars); /* PANIC? */ if (numChars) { last = objc - oc; if (length == 0) { @@ -3308,7 +3365,7 @@ TclStringCat( objResultPtr = *objv++; objc--; /* Ugly interface! Force resize of the unicode array. */ - Tcl_GetUnicodeFromObj(objResultPtr, &start); + TclGetUnicodeFromObj_(objResultPtr, &start); Tcl_InvalidateStringRep(objResultPtr); if (0 == Tcl_AttemptSetObjLength(objResultPtr, length)) { if (interp) { @@ -3320,7 +3377,7 @@ TclStringCat( } return NULL; } - dst = Tcl_GetUnicodeFromObj(objResultPtr, NULL) + start; + dst = TclGetUnicodeFromObj_(objResultPtr, NULL) + start; } else { Tcl_UniChar ch = 0; @@ -3337,14 +3394,14 @@ TclStringCat( } return NULL; } - dst = Tcl_GetUnicodeFromObj(objResultPtr, NULL); + dst = TclGetUnicodeFromObj_(objResultPtr, NULL); } while (objc--) { Tcl_Obj *objPtr = *objv++; if ((objPtr->bytes == NULL) || (objPtr->length)) { int more; - Tcl_UniChar *src = Tcl_GetUnicodeFromObj(objPtr, &more); + Tcl_UniChar *src = TclGetUnicodeFromObj_(objPtr, &more); memcpy(dst, src, more * sizeof(Tcl_UniChar)); dst += more; } @@ -3457,8 +3514,8 @@ TclStringCmp( s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; - } else if (TclHasInternalRep(value1Ptr, &tclStringType) - && TclHasInternalRep(value2Ptr, &tclStringType)) { + } else if (TclHasInternalRep(value1Ptr, &utf32StringType) + && TclHasInternalRep(value2Ptr, &utf32StringType)) { /* * Do a unicode-specific comparison if both of the args are of * String type. If the char length == byte length, we can do a @@ -3467,8 +3524,8 @@ TclStringCmp( */ if (nocase) { - s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, &s1len); - s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, &s2len); + s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, &s1len); + s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, &s2len); memCmpFn = (memCmpFn_t)(void *)TclUniCharNcasecmp; } else { s1len = Tcl_GetCharLength(value1Ptr); @@ -3481,8 +3538,8 @@ TclStringCmp( s2 = value2Ptr->bytes; memCmpFn = memcmp; } else { - s1 = (char *) Tcl_GetUnicodeFromObj(value1Ptr, NULL); - s2 = (char *) Tcl_GetUnicodeFromObj(value2Ptr, NULL); + s1 = (char *) TclGetUnicodeFromObj_(value1Ptr, NULL); + s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, NULL); if ( #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) 1 @@ -3680,8 +3737,8 @@ TclStringFirst( * do only the well-defined Tcl_UniChar array search. */ - un = Tcl_GetUnicodeFromObj(needle, &ln); - uh = Tcl_GetUnicodeFromObj(haystack, &lh); + un = TclGetUnicodeFromObj_(needle, &ln); + uh = TclGetUnicodeFromObj_(haystack, &lh); if ((lh < ln) || (start > lh - ln)) { /* Don't start the loop if there cannot be a valid answer */ goto firstEnd; @@ -3763,8 +3820,8 @@ TclStringLast( goto lastEnd; } - uh = Tcl_GetUnicodeFromObj(haystack, &lh); - un = Tcl_GetUnicodeFromObj(needle, &ln); + uh = TclGetUnicodeFromObj_(haystack, &lh); + un = TclGetUnicodeFromObj_(needle, &ln); if (last >= lh) { last = lh - 1; @@ -3856,7 +3913,7 @@ TclStringReverse( stringPtr = GET_STRING(objPtr); if (stringPtr->hasUnicode) { - Tcl_UniChar *from = Tcl_GetUnicodeFromObj(objPtr, NULL); + Tcl_UniChar *from = TclGetUnicodeFromObj_(objPtr, NULL); stringPtr = GET_STRING(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; Tcl_UniChar *to; @@ -3869,7 +3926,7 @@ TclStringReverse( objPtr = TclNewUnicodeObj(&ch, 1); Tcl_SetObjLength(objPtr, stringPtr->numChars); - to = Tcl_GetUnicodeFromObj(objPtr, NULL); + to = TclGetUnicodeFromObj_(objPtr, NULL); stringPtr = GET_STRING(objPtr); while (--src >= from) { #if TCL_UTF_MAX < 4 @@ -4099,7 +4156,7 @@ TclStringReplace( /* The traditional implementation... */ { int numChars; - Tcl_UniChar *ustring = Tcl_GetUnicodeFromObj(objPtr, &numChars); + Tcl_UniChar *ustring = TclGetUnicodeFromObj_(objPtr, &numChars); /* TODO: Is there an in-place option worth pursuing here? */ @@ -4108,7 +4165,7 @@ TclStringReplace( Tcl_AppendObjToObj(result, insertPtr); } if (first + count < numChars) { - Tcl_AppendUnicodeToObj(result, ustring + first + count, + TclAppendUnicodeToObj(result, ustring + first + count, numChars - first - count); } @@ -4267,7 +4324,7 @@ DupStringInternalRep( copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0; SET_STRING(copyPtr, copyStringPtr); - copyPtr->typePtr = &tclStringType; + copyPtr->typePtr = &utf32StringType; } /* @@ -4292,7 +4349,7 @@ SetStringFromAny( TCL_UNUSED(Tcl_Interp *), Tcl_Obj *objPtr) /* The object to convert. */ { - if (!TclHasInternalRep(objPtr, &tclStringType)) { + if (!TclHasInternalRep(objPtr, &utf32StringType)) { String *stringPtr = stringAlloc(0); /* @@ -4312,7 +4369,7 @@ SetStringFromAny( stringPtr->maxChars = 0; stringPtr->hasUnicode = 0; SET_STRING(objPtr, stringPtr); - objPtr->typePtr = &tclStringType; + objPtr->typePtr = &utf32StringType; } return TCL_OK; } diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index 9814cfe..e99d4c1 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1076,6 +1076,7 @@ TestobjCmd( #ifndef TCL_WIDE_INT_IS_LONG if (!strcmp(typeName, "wideInt")) typeName = "int"; #endif + if (!strcmp(typeName, "utf32string")) typeName = "string"; Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1)); } } else if (strcmp(subCmd, "refcount") == 0) { diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 66d1009..9cc82cb 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -2594,8 +2594,8 @@ TclStringMatchObj( if (TclHasInternalRep(strObj, &tclStringType) || (strObj->typePtr == NULL)) { Tcl_UniChar *udata, *uptn; - udata = Tcl_GetUnicodeFromObj(strObj, &length); - uptn = Tcl_GetUnicodeFromObj(ptnObj, &plen); + udata = TclGetUnicodeFromObj_(strObj, &length); + uptn = TclGetUnicodeFromObj_(ptnObj, &plen); match = TclUniCharMatch(udata, length, uptn, plen, flags); } else if (TclIsPureByteArray(strObj) && TclIsPureByteArray(ptnObj) && !flags) { diff --git a/tests/string.test b/tests/string.test index 203d0c6..9cac73d 100644 --- a/tests/string.test +++ b/tests/string.test @@ -422,7 +422,7 @@ test string-4.16.$noComp {string first, normal string vs pure unicode string} -b # Representation checks are canaries run {list [representationpoke $s] [representationpoke $m] \ [string first $m $s]} -} -result {{string 1} {string 0} 2} +} -result {{utf32string 1} {utf32string 0} 2} test string-4.17.$noComp {string first, corner case} -body { run {string first a aaa 4294967295} } -result {-1} -- cgit v0.12 From 46ed0441c3f458d86557c1813efb3b34c389a0e3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 14 Mar 2022 16:06:23 +0000 Subject: More progress --- .github/workflows/linux-build.yml | 2 +- .github/workflows/win-build.yml | 4 ++-- generic/tcl.decls | 2 +- generic/tclDecls.h | 4 ++-- generic/tclInt.h | 21 +++++++++++++----- generic/tclStringObj.c | 34 +++++++++++++++++++++-------- generic/tclTestObj.c | 3 ++- generic/tclUtf.c | 46 ++++++++++++++++++++++----------------- tests/stringObj.test | 44 ++++++++++++++++++------------------- win/makefile.vc | 4 ++-- win/rules.vc | 6 ++--- 11 files changed, 101 insertions(+), 69 deletions(-) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index b410aab..cb93bd4 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -7,7 +7,7 @@ jobs: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=4" + - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" diff --git a/.github/workflows/win-build.yml b/.github/workflows/win-build.yml index a8019ee..547d27e 100644 --- a/.github/workflows/win-build.yml +++ b/.github/workflows/win-build.yml @@ -13,7 +13,7 @@ jobs: matrix: cfgopt: - "" - - "OPTS=utfmax" + - "OPTS=utf16" - "CHECKS=nodep" - "OPTS=static" - "OPTS=symbols" @@ -52,7 +52,7 @@ jobs: matrix: cfgopt: - "" - - "CFLAGS=-DTCL_UTF_MAX=4" + - "CFLAGS=-DTCL_UTF_MAX=3" - "CFLAGS=-DTCL_NO_DEPRECATED=1" - "--disable-shared" - "--enable-symbols" diff --git a/generic/tcl.decls b/generic/tcl.decls index 6dbb457..f5b2e78 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1338,7 +1338,7 @@ declare 377 { void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr) } declare 378 { - Tcl_Obj *Tcl_NewUnicodeObj(const unsigned char *unicode, int numChars) + Tcl_Obj *Tcl_NewUnicodeObj(const unsigned short *unicode, int numChars) } declare 379 { void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, const unsigned short *unicode, diff --git a/generic/tclDecls.h b/generic/tclDecls.h index bf15862..1952641 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1142,7 +1142,7 @@ EXTERN int Tcl_RegExpExecObj(Tcl_Interp *interp, EXTERN void Tcl_RegExpGetInfo(Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 378 */ -EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const unsigned char *unicode, +EXTERN Tcl_Obj * Tcl_NewUnicodeObj(const unsigned short *unicode, int numChars); /* 379 */ EXTERN void Tcl_SetUnicodeObj(Tcl_Obj *objPtr, @@ -2370,7 +2370,7 @@ typedef struct TclStubs { int (*tcl_UniCharIsPunct) (int ch); /* 375 */ int (*tcl_RegExpExecObj) (Tcl_Interp *interp, Tcl_RegExp regexp, Tcl_Obj *textObj, int offset, int nmatches, int flags); /* 376 */ void (*tcl_RegExpGetInfo) (Tcl_RegExp regexp, Tcl_RegExpInfo *infoPtr); /* 377 */ - Tcl_Obj * (*tcl_NewUnicodeObj) (const unsigned char *unicode, int numChars); /* 378 */ + Tcl_Obj * (*tcl_NewUnicodeObj) (const unsigned short *unicode, int numChars); /* 378 */ void (*tcl_SetUnicodeObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int numChars); /* 379 */ int (*tcl_GetCharLength) (Tcl_Obj *objPtr); /* 380 */ int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */ diff --git a/generic/tclInt.h b/generic/tclInt.h index ed607cd..2a04aca 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3319,12 +3319,21 @@ MODULE_SCOPE void TclErrorStackResetIf(Tcl_Interp *interp, MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); -MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); -MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); -MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); -MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); -MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); -MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); +#if TCL_UTF_MAX > 3 + MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); + MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); + MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); + MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); + MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); + MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); +#else +# define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj +# define TclNewUnicodeObj Tcl_NewUnicodeObj +# define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj +# define TclUniCharNcasecmp Tcl_UniCharNcasecmp +# define TclUniCharCaseMatch Tcl_UniCharCaseMatch +# define TclUniCharNcmp Tcl_UniCharNcmp +#endif /* diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 8730331..eb5103d 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -55,8 +55,6 @@ static void AppendUtfToUtfRep(Tcl_Obj *objPtr, const char *bytes, int numBytes); static void DupStringInternalRep(Tcl_Obj *objPtr, Tcl_Obj *copyPtr); -static void DupUTF16StringInternalRep(Tcl_Obj *objPtr, - Tcl_Obj *copyPtr); static int ExtendStringRepWithUnicode(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); static void ExtendUnicodeRepWithString(Tcl_Obj *objPtr, @@ -67,12 +65,16 @@ static void FreeStringInternalRep(Tcl_Obj *objPtr); static void GrowStringBuffer(Tcl_Obj *objPtr, int needed, int flag); static void GrowUnicodeBuffer(Tcl_Obj *objPtr, int needed); static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); -static int SetUTF16StringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static void SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); static int UnicodeLength(const Tcl_UniChar *unicode); static void UpdateStringOfString(Tcl_Obj *objPtr); +#if TCL_UTF_MAX < 4 +static void DupUTF16StringInternalRep(Tcl_Obj *objPtr, + Tcl_Obj *copyPtr); +static int SetUTF16StringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static void UpdateStringOfUTF16String(Tcl_Obj *objPtr); +#endif #define ISCONTINUATION(bytes) (\ ((((bytes)[0] & 0xC0) == 0x80) || (((bytes)[0] == '\xED') \ @@ -84,6 +86,12 @@ static void UpdateStringOfUTF16String(Tcl_Obj *objPtr); * functions that can be invoked by generic object code. */ +#if TCL_UTF_MAX > 3 + +#define utf32StringType tclStringType + +#else + static const Tcl_ObjType utf32StringType = { "utf32string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ @@ -125,7 +133,7 @@ DupUTF16StringInternalRep( size_t size = offsetof(UTF16String, unicode) + (((srcStringPtr->numChars) + 1U) * sizeof(unsigned short)); UTF16String *copyStringPtr = (UTF16String *)ckalloc(size); memcpy(copyStringPtr, srcStringPtr, size); - copyStringPtr->allocated = srcStringPtr->numChars; + copyStringPtr->allocated = srcStringPtr->numChars + 1; copyStringPtr->maxChars = srcStringPtr->numChars; copyPtr->internalRep.twoPtrValue.ptr1 = copyStringPtr; @@ -156,7 +164,7 @@ SetUTF16StringFromAny( */ stringPtr->numChars = 0; - stringPtr->allocated = objPtr->length; + stringPtr->allocated = objPtr->length + 1; stringPtr->maxChars = objPtr->length; stringPtr->hasUnicode = 1; objPtr->internalRep.twoPtrValue.ptr1 = stringPtr; @@ -172,6 +180,8 @@ UpdateStringOfUTF16String( (void)objPtr; } +#endif + /* * TCL STRING GROWTH ALGORITHM * @@ -459,7 +469,7 @@ Tcl_DbNewStringObj( Tcl_Obj * TclNewUnicodeObj( - const int *unicode, /* The unicode string used to initialize the + const Tcl_UniChar *unicode, /* The unicode string used to initialize the * new object. */ int numChars) /* Number of characters in the unicode * string. */ @@ -471,9 +481,10 @@ TclNewUnicodeObj( return objPtr; } +#if TCL_UTF_MAX > 3 Tcl_Obj * Tcl_NewUnicodeObj( - const unsigned char *unicode, /* The unicode string used to initialize the + const unsigned short *unicode, /* The unicode string used to initialize the * new object. */ int numChars) /* Number of characters in the unicode * string. */ @@ -486,6 +497,7 @@ Tcl_NewUnicodeObj( /* TODO JN */ return objPtr; } +#endif /* *---------------------------------------------------------------------- @@ -740,7 +752,7 @@ Tcl_GetUnicode( *---------------------------------------------------------------------- */ -int * +Tcl_UniChar * TclGetUnicodeFromObj_( Tcl_Obj *objPtr, /* The object to find the unicode string * for. */ @@ -764,6 +776,7 @@ TclGetUnicodeFromObj_( return stringPtr->unicode; } +#if TCL_UTF_MAX > 3 unsigned short * Tcl_GetUnicodeFromObj( Tcl_Obj *objPtr, /* The object to find the unicode string @@ -778,6 +791,7 @@ Tcl_GetUnicodeFromObj( /* TODO JN */ return NULL; } +#endif unsigned short * TclGetUnicodeFromObj( @@ -1400,7 +1414,7 @@ Tcl_AppendToObj( void TclAppendUnicodeToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ - const int *unicode, /* The unicode string to append to the + const Tcl_UniChar *unicode, /* The unicode string to append to the * object. */ int length) /* Number of chars in "unicode". */ { @@ -1430,6 +1444,7 @@ TclAppendUnicodeToObj( } } +#if TCL_UTF_MAX > 3 void Tcl_AppendUnicodeToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ @@ -1443,6 +1458,7 @@ Tcl_AppendUnicodeToObj( /* TODO JN */ } +#endif /* *---------------------------------------------------------------------- diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index 9814cfe..9884a9a 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1073,8 +1073,9 @@ TestobjCmd( Tcl_SetObjResult(interp, Tcl_NewStringObj("none", -1)); } else { typeName = objv[2]->typePtr->name; + if (!strcmp(typeName, "utf32string")) typeName = "string"; #ifndef TCL_WIDE_INT_IS_LONG - if (!strcmp(typeName, "wideInt")) typeName = "int"; + else if (!strcmp(typeName, "wideInt")) typeName = "int"; #endif Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1)); } diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 68a0e32..02f4358 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1850,8 +1850,8 @@ Tcl_UniCharLen( int TclUniCharNcmp( - const int *ucs, /* Unicode string to compare to uct. */ - const int *uct, /* Unicode string ucs is compared to. */ + const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ + const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX > 3) @@ -1875,6 +1875,7 @@ TclUniCharNcmp( #endif /* WORDS_BIGENDIAN */ } +#if TCL_UTF_MAX > 3 int Tcl_UniCharNcmp( const unsigned short *ucs, /* Unicode string to compare to uct. */ @@ -1907,6 +1908,7 @@ Tcl_UniCharNcmp( return 0; #endif /* WORDS_BIGENDIAN */ } +#endif /* *---------------------------------------------------------------------- * @@ -1926,23 +1928,17 @@ Tcl_UniCharNcmp( */ int -Tcl_UniCharNcasecmp( - const unsigned short *ucs, /* Unicode string to compare to uct. */ - const unsigned short *uct, /* Unicode string ucs is compared to. */ +TclUniCharNcasecmp( + const Tcl_UniChar *ucs, /* Unicode string to compare to uct. */ + const Tcl_UniChar *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { for ( ; numChars != 0; numChars--, ucs++, uct++) { if (*ucs != *uct) { - unsigned short lcs = Tcl_UniCharToLower(*ucs); - unsigned short lct = Tcl_UniCharToLower(*uct); + int lcs = Tcl_UniCharToLower(*ucs); + int lct = Tcl_UniCharToLower(*uct); if (lcs != lct) { - /* special case for handling upper surrogates */ - if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) { - return 1; - } else if (((lct & 0xFC00) == 0xD800)) { - return -1; - } return (lcs - lct); } } @@ -1950,24 +1946,32 @@ Tcl_UniCharNcasecmp( return 0; } +#if TCL_UTF_MAX > 3 int -TclUniCharNcasecmp( - const int *ucs, /* Unicode string to compare to uct. */ - const int *uct, /* Unicode string ucs is compared to. */ +Tcl_UniCharNcasecmp( + const unsigned short *ucs, /* Unicode string to compare to uct. */ + const unsigned short *uct, /* Unicode string ucs is compared to. */ unsigned long numChars) /* Number of unichars to compare. */ { for ( ; numChars != 0; numChars--, ucs++, uct++) { if (*ucs != *uct) { - int lcs = Tcl_UniCharToLower(*ucs); - int lct = Tcl_UniCharToLower(*uct); + unsigned short lcs = Tcl_UniCharToLower(*ucs); + unsigned short lct = Tcl_UniCharToLower(*uct); if (lcs != lct) { + /* special case for handling upper surrogates */ + if (((lcs & 0xFC00) == 0xD800) && ((lct & 0xFC00) != 0xD800)) { + return 1; + } else if (((lct & 0xFC00) == 0xD800)) { + return -1; + } return (lcs - lct); } } } return 0; } +#endif /* @@ -2333,8 +2337,8 @@ Tcl_UniCharIsWordChar( int TclUniCharCaseMatch( - const int *uniStr, /* Unicode String. */ - const int *uniPattern, + const Tcl_UniChar *uniStr, /* Unicode String. */ + const Tcl_UniChar *uniPattern, /* Pattern, which may contain special * characters. */ int nocase) /* 0 for case sensitive, 1 for insensitive */ @@ -2498,6 +2502,7 @@ TclUniCharCaseMatch( } } +#if TCL_UTF_MAX > 3 int Tcl_UniCharCaseMatch( const unsigned short *uniStr, /* Unicode String. */ @@ -2664,6 +2669,7 @@ Tcl_UniCharCaseMatch( uniPattern++; } } +#endif /* diff --git a/tests/stringObj.test b/tests/stringObj.test index a2bdf95..abe02b2 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -190,12 +190,12 @@ test stringObj-7.3 {SetStringFromAny called with non-string obj} testobj { set x 2345 list [incr x] [testobj objtype $x] [string index $x end] \ [testobj objtype $x] -} {2346 int 6 utf32string} +} {2346 int 6 string} test stringObj-7.4 {SetStringFromAny called with string obj} testobj { set x "abcdef" list [string length $x] [testobj objtype $x] \ [string length $x] [testobj objtype $x] -} {6 utf32string 6 utf32string} +} {6 string 6 string} test stringObj-8.1 {DupStringInternalRep procedure} testobj { testobj freeallvars @@ -213,28 +213,28 @@ test stringObj-8.2 {DupUnicodeInternalRep, mixed width chars} testobj { set y $x list [testobj objtype $x] [testobj objtype $y] [append x "\xAE\xBF\xEF"] \ [set y] [testobj objtype $x] [testobj objtype $y] -} "utf32string utf32string abc\xEF\xBF\xAEghi\xAE\xBF\xEF abc\xEF\xBF\xAEghi utf32string utf32string" +} "string string abc\xEF\xBF\xAEghi\xAE\xBF\xEF abc\xEF\xBF\xAEghi string string" test stringObj-8.3 {DupUnicodeInternalRep, mixed width chars} testobj { set x abc\xEF\xBF\xAEghi set y $x string length $x list [testobj objtype $x] [testobj objtype $y] [append x "\xAE\xBF\xEF"] \ [set y] [testobj objtype $x] [testobj objtype $y] -} "utf32string utf32string abc\xEF\xBF\xAEghi\xAE\xBF\xEF abc\xEF\xBF\xAEghi utf32string utf32string" +} "string string abc\xEF\xBF\xAEghi\xAE\xBF\xEF abc\xEF\xBF\xAEghi string string" test stringObj-8.4 {DupUnicodeInternalRep, all byte-size chars} testobj { set x abcdefghi string length $x set y $x list [testobj objtype $x] [testobj objtype $y] [append x jkl] \ [set y] [testobj objtype $x] [testobj objtype $y] -} {utf32string utf32string abcdefghijkl abcdefghi utf32string utf32string} +} {string string abcdefghijkl abcdefghi string string} test stringObj-8.5 {DupUnicodeInternalRep, all byte-size chars} testobj { set x abcdefghi set y $x string length $x list [testobj objtype $x] [testobj objtype $y] [append x jkl] \ [set y] [testobj objtype $x] [testobj objtype $y] -} {utf32string utf32string abcdefghijkl abcdefghi utf32string utf32string} +} {string string abcdefghijkl abcdefghi string string} test stringObj-9.1 {TclAppendObjToObj, mixed src & dest} {testobj testdstring} { set x abc\xEF\xBF\xAEghi @@ -244,15 +244,15 @@ test stringObj-9.1 {TclAppendObjToObj, mixed src & dest} {testobj testdstring} { string length $x list [testobj objtype $x] [testobj objtype $y] [append x $y] \ [set y] [testobj objtype $x] [testobj objtype $y] -} "utf32string none abc\xEF\xBF\xAEghi\xAE\xBF\xEF \xAE\xBF\xEF utf32string none" +} "string none abc\xEF\xBF\xAEghi\xAE\xBF\xEF \xAE\xBF\xEF string none" test stringObj-9.2 {TclAppendObjToObj, mixed src & dest} testobj { set x abc\xEF\xBF\xAEghi string length $x list [testobj objtype $x] [append x $x] [testobj objtype $x] \ [append x $x] [testobj objtype $x] -} "utf32string abc\xEF\xBF\xAEghiabc\xEF\xBF\xAEghi utf32string\ +} "string abc\xEF\xBF\xAEghiabc\xEF\xBF\xAEghi string\ abc\xEF\xBF\xAEghiabc\xEF\xBF\xAEghiabc\xEF\xBF\xAEghiabc\xEF\xBF\xAEghi\ -utf32string" +string" test stringObj-9.3 {TclAppendObjToObj, mixed src & 1-byte dest} {testobj testdstring} { set x abcdefghi testdstring free @@ -261,7 +261,7 @@ test stringObj-9.3 {TclAppendObjToObj, mixed src & 1-byte dest} {testobj testdst string length $x list [testobj objtype $x] [testobj objtype $y] [append x $y] \ [set y] [testobj objtype $x] [testobj objtype $y] -} "utf32string none abcdefghi\xAE\xBF\xEF \xAE\xBF\xEF utf32string none" +} "string none abcdefghi\xAE\xBF\xEF \xAE\xBF\xEF string none" test stringObj-9.4 {TclAppendObjToObj, 1-byte src & dest} {testobj testdstring} { set x abcdefghi testdstring free @@ -270,14 +270,14 @@ test stringObj-9.4 {TclAppendObjToObj, 1-byte src & dest} {testobj testdstring} string length $x list [testobj objtype $x] [testobj objtype $y] [append x $y] \ [set y] [testobj objtype $x] [testobj objtype $y] -} {utf32string none abcdefghijkl jkl utf32string none} +} {string none abcdefghijkl jkl string none} test stringObj-9.5 {TclAppendObjToObj, 1-byte src & dest} testobj { set x abcdefghi string length $x list [testobj objtype $x] [append x $x] [testobj objtype $x] \ [append x $x] [testobj objtype $x] -} {utf32string abcdefghiabcdefghi utf32string abcdefghiabcdefghiabcdefghiabcdefghi\ -utf32string} +} {string abcdefghiabcdefghi string abcdefghiabcdefghiabcdefghiabcdefghi\ +string} test stringObj-9.6 {TclAppendObjToObj, 1-byte src & mixed dest} {testobj testdstring} { set x abc\xEF\xBF\xAEghi testdstring free @@ -286,33 +286,33 @@ test stringObj-9.6 {TclAppendObjToObj, 1-byte src & mixed dest} {testobj testdst string length $x list [testobj objtype $x] [testobj objtype $y] [append x $y] \ [set y] [testobj objtype $x] [testobj objtype $y] -} "utf32string none abc\xEF\xBF\xAEghijkl jkl utf32string none" +} "string none abc\xEF\xBF\xAEghijkl jkl string none" test stringObj-9.7 {TclAppendObjToObj, integer src & dest} testobj { set x [expr {4 * 5}] set y [expr {4 + 5}] list [testobj objtype $x] [testobj objtype $y] [append x $y] \ [testobj objtype $x] [append x $y] [testobj objtype $x] \ [testobj objtype $y] -} {int int 209 utf32string 2099 utf32string int} +} {int int 209 string 2099 string int} test stringObj-9.8 {TclAppendObjToObj, integer src & dest} testobj { set x [expr {4 * 5}] list [testobj objtype $x] [append x $x] [testobj objtype $x] \ [append x $x] [testobj objtype $x] -} {int 2020 utf32string 20202020 utf32string} +} {int 2020 string 20202020 string} test stringObj-9.9 {TclAppendObjToObj, integer src & 1-byte dest} testobj { set x abcdefghi set y [expr {4 + 5}] string length $x list [testobj objtype $x] [testobj objtype $y] [append x $y] \ [set y] [testobj objtype $x] [testobj objtype $y] -} {utf32string int abcdefghi9 9 utf32string int} +} {string int abcdefghi9 9 string int} test stringObj-9.10 {TclAppendObjToObj, integer src & mixed dest} testobj { set x abc\xEF\xBF\xAEghi set y [expr {4 + 5}] string length $x list [testobj objtype $x] [testobj objtype $y] [append x $y] \ [set y] [testobj objtype $x] [testobj objtype $y] -} "utf32string int abc\xEF\xBF\xAEghi9 9 utf32string int" +} "string int abc\xEF\xBF\xAEghi9 9 string int" test stringObj-9.11 {TclAppendObjToObj, mixed src & 1-byte dest index check} testobj { # bug 2678, in <=8.2.0, the second obj (the one to append) in # Tcl_AppendObjToObj was not correctly checked to see if it was all one @@ -336,20 +336,20 @@ test stringObj-10.1 {Tcl_GetRange with all byte-size chars} {testobj testdstring set x [testdstring get] list [testobj objtype $x] [set y [string range $x 1 end-1]] \ [testobj objtype $x] [testobj objtype $y] -} [list none bcde utf32string utf32string] +} [list none bcde string string] test stringObj-10.2 {Tcl_GetRange with some mixed width chars} {testobj testdstring} { testdstring free testdstring append "abcïïdef" -1 set x [testdstring get] list [testobj objtype $x] [set y [string range $x 1 end-1]] \ [testobj objtype $x] [testobj objtype $y] -} [list none "bcïïde" utf32string utf32string] +} [list none "bcïïde" string string] test stringObj-10.3 {Tcl_GetRange with some mixed width chars} testobj { set x "abcïïdef" string length $x list [testobj objtype $x] [set y [string range $x 1 end-1]] \ [testobj objtype $x] [testobj objtype $y] -} [list utf32string "bcïïde" utf32string utf32string] +} [list string "bcïïde" string string] test stringObj-10.4 {Tcl_GetRange with some mixed width chars} testobj { set a "ïa¿b®cï¿d®" set result [list] @@ -368,7 +368,7 @@ test stringObj-11.1 {UpdateStringOfString} testobj { set x 2345 list [string index $x end] [testobj objtype $x] [incr x] \ [testobj objtype $x] -} {5 utf32string 2346 int} +} {5 string 2346 int} test stringObj-12.1 {Tcl_GetUniChar with byte-size chars} testobj { set x "abcdefghi" diff --git a/win/makefile.vc b/win/makefile.vc index 1ef64f2..6ff6118 100644 --- a/win/makefile.vc +++ b/win/makefile.vc @@ -52,7 +52,7 @@ # turn on the 64-bit compiler, if your SDK has it. # # Basic macros and options usable on the commandline (see rules.vc for more info): -# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utfmax,none +# OPTS=msvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,time64bit,unchecked,utf16,none # Sets special options for the core. The default is for none. # Any combination of the above may be used (comma separated). # 'none' will over-ride everything to nothing. @@ -80,7 +80,7 @@ # unchecked = Allows a symbols build to not use the debug # enabled runtime (msvcrt.dll not msvcrtd.dll # or libcmt.lib not libcmtd.lib). -# utfmax = Forces a build using UTF-32 representation internally. +# utf16 = Forces a build using UTF-16 representation internally. # # STATS=compdbg,memdbg,none # Sets optional memory and bytecode compiler debugging code added diff --git a/win/rules.vc b/win/rules.vc index 372d70a..713e7f9 100644 --- a/win/rules.vc +++ b/win/rules.vc @@ -884,9 +884,9 @@ USE_THREAD_ALLOC= 0 _USE_64BIT_TIME_T = 1 !endif -!if [nmakehlp -f $(OPTS) "utfmax"] -!message *** Force allowing 4-byte UTF-8 sequences internally -TCL_UTF_MAX = 4 +!if [nmakehlp -f $(OPTS) "utf16"] +!message *** Force UTF-16 internally +TCL_UTF_MAX = 3 !endif !endif -- cgit v0.12 From 4dd5be298811da7ad60f1cd93d3ff09e4baf34d0 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 15 Mar 2022 16:17:42 +0000 Subject: update rules.vc --- win/rules.vc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/win/rules.vc b/win/rules.vc index 713e7f9..6d9bbf8 100644 --- a/win/rules.vc +++ b/win/rules.vc @@ -816,8 +816,7 @@ DOTSEPARATED=$(DOTSEPARATED:b=.) # configuration (ignored for Tcl itself) # _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build # (CRT library should support this, not needed for Tcl 9.x) -# TCL_UTF_MAX=4 - forces a build allowing 4-byte UTF-8 sequences internally. -# (Not needed for Tcl 9.x) +# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended). # Further, LINKERFLAGS are modified based on above. # Default values for all the above @@ -1423,13 +1422,13 @@ OPTDEFINES = $(OPTDEFINES) /DNO_STRTOI64=1 !if "$(_USE_64BIT_TIME_T)" == "1" OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1 !endif -!if "$(TCL_UTF_MAX)" == "4" -OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=4 -!endif # _ATL_XP_TARGETING - Newer SDK's need this to build for XP COMPILERFLAGS = /D_ATL_XP_TARGETING !endif +!if "$(TCL_UTF_MAX)" == "3" +OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3 +!endif # Like the TEA system only set this non empty for non-Tk extensions # Note: some extensions use PACKAGE_NAME and others use PACKAGE_TCLNAME -- cgit v0.12 From 459642686539ca9da0448167dbbf7fef7435f864 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 16 Mar 2022 22:21:39 +0000 Subject: Handle Tcl_GetCharLength --- generic/tclCmdMZ.c | 22 ++++++++--------- generic/tclCompCmdsSZ.c | 2 +- generic/tclExecute.c | 10 ++++---- generic/tclIO.c | 2 +- generic/tclInt.h | 2 ++ generic/tclProc.c | 2 +- generic/tclStringObj.c | 64 ++++++++++++++++++++++++++++++++++++++++++------- 7 files changed, 76 insertions(+), 28 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index db4002a..c2b4eb3 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -256,7 +256,7 @@ Tcl_RegexpObjCmd( */ objPtr = objv[1]; - stringLength = Tcl_GetCharLength(objPtr); + stringLength = TclGetCharLength(objPtr); if (startIndex) { TclGetIntForIndexM(interp, startIndex, stringLength, &offset); @@ -581,7 +581,7 @@ Tcl_RegsubObjCmd( objv += idx; if (startIndex) { - int stringLength = Tcl_GetCharLength(objv[1]); + int stringLength = TclGetCharLength(objv[1]); TclGetIntForIndexM(interp, startIndex, stringLength, &offset); Tcl_DecrRefCount(startIndex); @@ -1316,7 +1316,7 @@ StringFirstCmd( } if (objc == 4) { - int size = Tcl_GetCharLength(objv[2]); + int size = TclGetCharLength(objv[2]); if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &start)) { return TCL_ERROR; @@ -1360,7 +1360,7 @@ StringLastCmd( } if (objc == 4) { - int size = Tcl_GetCharLength(objv[2]); + int size = TclGetCharLength(objv[2]); if (TCL_OK != TclGetIntForIndexM(interp, objv[3], size - 1, &last)) { return TCL_ERROR; @@ -1406,7 +1406,7 @@ StringIndexCmd( * Get the char length to calculate what 'end' means. */ - length = Tcl_GetCharLength(objv[1]); + length = TclGetCharLength(objv[1]); if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK) { return TCL_ERROR; } @@ -1474,7 +1474,7 @@ StringInsertCmd( return TCL_ERROR; } - length = Tcl_GetCharLength(objv[1]); + length = TclGetCharLength(objv[1]); if (TclGetIntForIndexM(interp, objv[2], length, &index) != TCL_OK) { return TCL_ERROR; } @@ -1669,7 +1669,7 @@ StringIsCmd( p++; } TclNewStringObj(tmpStr, string1, p-string1); - failat = Tcl_GetCharLength(tmpStr); + failat = TclGetCharLength(tmpStr); TclDecrRefCount(tmpStr); break; } @@ -1849,7 +1849,7 @@ StringIsCmd( p++; } TclNewStringObj(tmpStr, string1, p-string1); - failat = Tcl_GetCharLength(tmpStr); + failat = TclGetCharLength(tmpStr); TclDecrRefCount(tmpStr); break; } @@ -2293,7 +2293,7 @@ StringRangeCmd( * 'end' refers to the last character, not one past it. */ - length = Tcl_GetCharLength(objv[1]) - 1; + length = TclGetCharLength(objv[1]) - 1; if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK) { @@ -2394,7 +2394,7 @@ StringRplcCmd( return TCL_ERROR; } - length = Tcl_GetCharLength(objv[1]); + length = TclGetCharLength(objv[1]); end = length - 1; if (TclGetIntForIndexM(interp, objv[2], end, &first) != TCL_OK || @@ -2880,7 +2880,7 @@ StringLenCmd( return TCL_ERROR; } - Tcl_SetObjResult(interp, Tcl_NewWideIntObj(Tcl_GetCharLength(objv[1]))); + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(TclGetCharLength(objv[1]))); return TCL_OK; } diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index aa2d13e..62909eb 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -890,7 +890,7 @@ TclCompileStringLenCmd( */ char buf[TCL_INTEGER_SPACE]; - int len = Tcl_GetCharLength(objPtr); + int len = TclGetCharLength(objPtr); len = sprintf(buf, "%d", len); PushLiteral(envPtr, buf, len); diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 67df5f4..f09f75c 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5244,7 +5244,7 @@ TEBCresume( case INST_STR_LEN: valuePtr = OBJ_AT_TOS; - length = Tcl_GetCharLength(valuePtr); + length = TclGetCharLength(valuePtr); TclNewIntObj(objResultPtr, length); TRACE(("\"%.20s\" => %d\n", O2S(valuePtr), length)); NEXT_INST_F(1, 1, 1); @@ -5310,7 +5310,7 @@ TEBCresume( * Get char length to calulate what 'end' means. */ - length = Tcl_GetCharLength(valuePtr); + length = TclGetCharLength(valuePtr); DECACHE_STACK_INFO(); if (TclGetIntForIndexM(interp, value2Ptr, length-1, &index)!=TCL_OK) { CACHE_STACK_INFO(); @@ -5353,7 +5353,7 @@ TEBCresume( case INST_STR_RANGE: TRACE(("\"%.20s\" %.20s %.20s =>", O2S(OBJ_AT_DEPTH(2)), O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS))); - length = Tcl_GetCharLength(OBJ_AT_DEPTH(2)) - 1; + length = TclGetCharLength(OBJ_AT_DEPTH(2)) - 1; DECACHE_STACK_INFO(); if (TclGetIntForIndexM(interp, OBJ_UNDER_TOS, length, @@ -5382,7 +5382,7 @@ TEBCresume( valuePtr = OBJ_AT_TOS; fromIdx = TclGetInt4AtPtr(pc+1); toIdx = TclGetInt4AtPtr(pc+5); - length = Tcl_GetCharLength(valuePtr); + length = TclGetCharLength(valuePtr); TRACE(("\"%.20s\" %d %d => ", O2S(valuePtr), fromIdx, toIdx)); /* Every range of an empty value is an empty value */ @@ -5428,7 +5428,7 @@ TEBCresume( case INST_STR_REPLACE: value3Ptr = POP_OBJECT(); valuePtr = OBJ_AT_DEPTH(2); - endIdx = Tcl_GetCharLength(valuePtr) - 1; + endIdx = TclGetCharLength(valuePtr) - 1; TRACE(("\"%.20s\" %s %s \"%.20s\" => ", O2S(valuePtr), O2S(OBJ_UNDER_TOS), O2S(OBJ_AT_TOS), O2S(value3Ptr))); DECACHE_STACK_INFO(); diff --git a/generic/tclIO.c b/generic/tclIO.c index 9d88948..f41e481 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -3556,7 +3556,7 @@ Tcl_Close( result = flushcode; } if ((result != 0) && (result != TCL_ERROR) && (interp != NULL) - && 0 == Tcl_GetCharLength(Tcl_GetObjResult(interp))) { + && 0 == TclGetCharLength(Tcl_GetObjResult(interp))) { Tcl_SetErrno(result); Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_PosixError(interp), -1)); diff --git a/generic/tclInt.h b/generic/tclInt.h index 75f8858..2d828e8 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3322,6 +3322,7 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); #if TCL_UTF_MAX > 3 MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); + MODULE_SCOPE int TclGetCharLength(Tcl_Obj *); MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); @@ -3329,6 +3330,7 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); #else # define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj # define TclNewUnicodeObj Tcl_NewUnicodeObj +# define TclGetCharLength Tcl_GetCharLength # define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj # define TclUniCharNcasecmp Tcl_UniCharNcasecmp # define TclUniCharCaseMatch Tcl_UniCharCaseMatch diff --git a/generic/tclProc.c b/generic/tclProc.c index 45d1afd..75687f0 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -529,7 +529,7 @@ TclCreateProc( "FORMALARGUMENTFORMAT", NULL); goto procError; } - if ((fieldCount == 0) || (Tcl_GetCharLength(fieldValues[0]) == 0)) { + if ((fieldCount == 0) || (TclGetCharLength(fieldValues[0]) == 0)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "argument with no name", -1)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC", diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 1c24716..2dc79eb 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -590,7 +590,7 @@ Tcl_NewUnicodeObj( */ int -Tcl_GetCharLength( +TclGetCharLength( Tcl_Obj *objPtr) /* The String object to get the num chars * of. */ { @@ -643,6 +643,52 @@ Tcl_GetCharLength( return numChars; } +#if TCL_UTF_MAX > 3 +int +Tcl_GetCharLength( + Tcl_Obj *objPtr) /* The String object to get the num chars + * of. */ +{ + String *stringPtr; + int numChars; + + /* + * Quick, no-shimmer return for short string reps. + */ + + if ((objPtr->bytes) && (objPtr->length < 2)) { + /* 0 bytes -> 0 chars; 1 byte -> 1 char */ + return objPtr->length; + } + + /* + * Optimize the case where we're really dealing with a bytearray object; + * we don't need to convert to a string to perform the get-length operation. + * + * Starting in Tcl 8.7, we check for a "pure" bytearray, because the + * machinery behind that test is using a proper bytearray ObjType. We + * could also compute length of an improper bytearray without shimmering + * but there's no value in that. We *want* to shimmer an improper bytearray + * because improper bytearrays have worthless internal reps. + */ + + if (TclIsPureByteArray(objPtr)) { + int length; + + (void) Tcl_GetByteArrayFromObj(objPtr, &length); + return length; + } + + /* + * OK, need to work with the object as a string. + */ + + SetUTF16StringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + return stringPtr->numChars; +} +#endif + /* *---------------------------------------------------------------------- * @@ -2336,7 +2382,7 @@ Tcl_AppendFormatToObj( goto errorMsg; case 's': if (gotPrecision) { - numChars = Tcl_GetCharLength(segment); + numChars = TclGetCharLength(segment); if (precision < numChars) { if (precision < 1) { TclNewObj(segment); @@ -2521,7 +2567,7 @@ Tcl_AppendFormatToObj( gotZero = 0; } if (gotZero) { - length += Tcl_GetCharLength(segment); + length += TclGetCharLength(segment); if (length < width) { segmentLimit -= width - length; } @@ -2652,7 +2698,7 @@ Tcl_AppendFormatToObj( gotZero = 0; } if (gotZero) { - length += Tcl_GetCharLength(segment); + length += TclGetCharLength(segment); if (length < width) { segmentLimit -= width - length; } @@ -2763,7 +2809,7 @@ Tcl_AppendFormatToObj( } if (width>0 && numChars<0) { - numChars = Tcl_GetCharLength(segment); + numChars = TclGetCharLength(segment); } if (!gotMinus && width>0) { if (numChars < width) { @@ -3720,8 +3766,8 @@ TclStringCmp( s2 = (char *) TclGetUnicodeFromObj_(value2Ptr, &s2len); memCmpFn = (memCmpFn_t)(void *)TclUniCharNcasecmp; } else { - s1len = Tcl_GetCharLength(value1Ptr); - s2len = Tcl_GetCharLength(value2Ptr); + s1len = TclGetCharLength(value1Ptr); + s2len = TclGetCharLength(value2Ptr); if ((s1len == value1Ptr->length) && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) @@ -3866,7 +3912,7 @@ TclStringFirst( Tcl_Obj *haystack, int start) { - int lh, ln = Tcl_GetCharLength(needle); + int lh, ln = TclGetCharLength(needle); Tcl_Obj *result; int value = -1; Tcl_UniChar *checkStr, *endStr, *uh, *un; @@ -3973,7 +4019,7 @@ TclStringLast( Tcl_Obj *haystack, int last) { - int lh, ln = Tcl_GetCharLength(needle); + int lh, ln = TclGetCharLength(needle); Tcl_Obj *result; int value = -1; Tcl_UniChar *checkStr, *uh, *un; -- cgit v0.12 From 17098d70c767e79c2deb65981af371cb209cd159 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 16 Mar 2022 23:08:28 +0000 Subject: Handle Tcl_UtfAtIndex --- generic/tclBinary.c | 4 ++-- generic/tclCmdMZ.c | 12 ++++++------ generic/tclEncoding.c | 2 +- generic/tclIO.c | 2 +- generic/tclInt.h | 6 ++++-- generic/tclRegexp.c | 4 ++-- generic/tclUtf.c | 38 +++++++++++++++++++++++++++++++++----- 7 files changed, 49 insertions(+), 19 deletions(-) diff --git a/generic/tclBinary.c b/generic/tclBinary.c index 4717b05..bc17232 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -434,7 +434,7 @@ TclGetBytesFromObj( irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); baPtr = GET_BYTEARRAY(irPtr); - nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); + nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); TclUtfToUCS4(nonbyte, &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf( @@ -473,7 +473,7 @@ Tcl_GetBytesFromObj( irPtr = TclFetchInternalRep(objPtr, &tclByteArrayType); baPtr = GET_BYTEARRAY(irPtr); - nonbyte = Tcl_UtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); + nonbyte = TclUtfAtIndex(Tcl_GetString(objPtr), baPtr->bad); TclUtfToUCS4(nonbyte, &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf( diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index c2b4eb3..bd5745b 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2954,8 +2954,8 @@ StringLowerCmd( } string1 = TclGetStringFromObj(objv[1], &length1); - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); + start = TclUtfAtIndex(string1, first); + end = TclUtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); @@ -3039,8 +3039,8 @@ StringUpperCmd( } string1 = TclGetStringFromObj(objv[1], &length1); - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); + start = TclUtfAtIndex(string1, first); + end = TclUtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); @@ -3124,8 +3124,8 @@ StringTitleCmd( } string1 = TclGetStringFromObj(objv[1], &length1); - start = Tcl_UtfAtIndex(string1, first); - end = Tcl_UtfAtIndex(start, last - first + 1); + start = TclUtfAtIndex(string1, first); + end = TclUtfAtIndex(start, last - first + 1); resultPtr = Tcl_NewStringObj(string1, end - string1); string2 = TclGetString(resultPtr) + (start - string1); diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 4630a02..6890d3a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1294,7 +1294,7 @@ Tcl_ExternalToUtf( if (*dstCharsPtr <= maxChars) { break; } - dstLen = Tcl_UtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1); + dstLen = TclUtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1); *statePtr = savedState; } while (1); if (!noTerminate) { diff --git a/generic/tclIO.c b/generic/tclIO.c index f41e481..59aa50f 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -6379,7 +6379,7 @@ ReadChars( * bytes demanded by the Tcl_ExternalToUtf() call! */ - dstLimit = Tcl_UtfAtIndex(dst, charsToRead) - dst + (TCL_UTF_MAX - 1); + dstLimit = TclUtfAtIndex(dst, charsToRead) - dst + (TCL_UTF_MAX - 1); statePtr->flags = savedFlags; statePtr->inputEncodingFlags = savedIEFlags; statePtr->inputEncodingState = savedState; diff --git a/generic/tclInt.h b/generic/tclInt.h index 2d828e8..0705c1d 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3327,6 +3327,7 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); + MODULE_SCOPE const char *TclUtfAtIndex(const char *, int); #else # define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj # define TclNewUnicodeObj Tcl_NewUnicodeObj @@ -3335,6 +3336,7 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); # define TclUniCharNcasecmp Tcl_UniCharNcasecmp # define TclUniCharCaseMatch Tcl_UniCharCaseMatch # define TclUniCharNcmp Tcl_UniCharNcmp +# define TclUtfAtIndex Tcl_UtfAtIndex #endif @@ -4741,8 +4743,8 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[]; : Tcl_UtfToUniChar(str, chPtr)) #else #define TclUtfToUniChar(str, chPtr) \ - ((((unsigned char) *(str)) < 0x80) ? \ - ((*(chPtr) = (unsigned char) *(str)), 1) \ + (((UCHAR(*(str))) < 0x80) ? \ + ((*(chPtr) = UCHAR(*(str))), 1) \ : Tcl_UtfToChar16(str, chPtr)) #endif diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index bb8a6ad..ff7c72c 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -271,8 +271,8 @@ Tcl_RegExpRange( } else { string = regexpPtr->string; } - *startPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_so); - *endPtr = Tcl_UtfAtIndex(string, regexpPtr->matches[index].rm_eo); + *startPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_so); + *endPtr = TclUtfAtIndex(string, regexpPtr->matches[index].rm_eo); } } diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 02f4358..c47ee97 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1166,27 +1166,55 @@ Tcl_UniCharAtIndex( *--------------------------------------------------------------------------- */ +#if TCL_UTF_MAX < 4 +# undef Tcl_UtfToUniChar +# define Tcl_UtfToUniChar Tcl_UtfToChar16 +#endif + const char * -Tcl_UtfAtIndex( +TclUtfAtIndex( const char *src, /* The UTF-8 string. */ int index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; + Tcl_UniChar ch = 0; int len = 0; while (index-- > 0) { - len = TclUtfToUniChar(src, &ch); + len = (Tcl_UtfToUniChar)(src, &ch); src += len; } #if TCL_UTF_MAX < 4 if ((ch >= 0xD800) && (len < 3)) { /* Index points at character following high Surrogate */ - src += TclUtfToUniChar(src, &ch); + src += (Tcl_UtfToUniChar)(src, &ch); } #endif return src; } +#if TCL_UTF_MAX > 3 +const char * +Tcl_UtfAtIndex( + const char *src, /* The UTF-8 string. */ + int index) /* The position of the desired character. */ +{ + unsigned short ch = 0; + int len = 0; + + while (index-- > 0) { + len = Tcl_UtfToChar16(src, &ch); + src += len; + } + if ((ch >= 0xD800) && (len < 3)) { + /* Index points at character following high Surrogate */ + src += Tcl_UtfToChar16(src, &ch); + } + return src; +} + + +#endif + /* *--------------------------------------------------------------------------- * @@ -2896,7 +2924,7 @@ TclUtfToUCS4( int *ucs4Ptr) /* Filled with the UCS4 codepoint represented * by the UTF-8 string. */ { - /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */ +# undef Tcl_UtfToUniChar return Tcl_UtfToUniChar(src, ucs4Ptr); } -- cgit v0.12 From 7d893da1b984ded235163f3ec8018195d9058f2a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 22 Mar 2022 11:25:13 +0000 Subject: More progress --- generic/tclInt.h | 4 ++++ generic/tclStringObj.c | 17 +++++------------ generic/tclStubInit.c | 2 ++ generic/tclUtf.c | 1 + tests/string.test | 2 +- tests/utf.test | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 0705c1d..538bca3 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3328,6 +3328,10 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); MODULE_SCOPE const char *TclUtfAtIndex(const char *, int); +# undef Tcl_GetCharLength +# define Tcl_GetCharLength TclGetCharLength +# undef Tcl_UtfAtIndex +# define Tcl_UtfAtIndex TclUtfAtIndex #else # define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj # define TclNewUnicodeObj Tcl_NewUnicodeObj diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 2dc79eb..6417e1b 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -644,12 +644,12 @@ TclGetCharLength( } #if TCL_UTF_MAX > 3 +#undef Tcl_GetCharLength int Tcl_GetCharLength( Tcl_Obj *objPtr) /* The String object to get the num chars * of. */ { - String *stringPtr; int numChars; /* @@ -673,19 +673,12 @@ Tcl_GetCharLength( */ if (TclIsPureByteArray(objPtr)) { - int length; - (void) Tcl_GetByteArrayFromObj(objPtr, &length); - return length; + (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); + } else { + numChars = Tcl_NumUtfChars(Tcl_GetString(objPtr), -1); } - - /* - * OK, need to work with the object as a string. - */ - - SetUTF16StringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); - return stringPtr->numChars; + return numChars; } #endif diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index f9430cb..e3ebb8b 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -76,6 +76,8 @@ #undef Tcl_MacOSXOpenBundleResources #undef TclWinConvertWSAError #undef TclWinConvertError +#undef Tcl_GetCharLength +#undef Tcl_UtfAtIndex #if defined(_WIN32) || defined(__CYGWIN__) #define TclWinConvertWSAError (void (*)(DWORD))(void *)Tcl_WinConvertError #define TclWinConvertError (void (*)(DWORD))(void *)Tcl_WinConvertError diff --git a/generic/tclUtf.c b/generic/tclUtf.c index c47ee97..4dd1e09 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1193,6 +1193,7 @@ TclUtfAtIndex( } #if TCL_UTF_MAX > 3 +#undef Tcl_UtfAtIndex const char * Tcl_UtfAtIndex( const char *src, /* The UTF-8 string. */ diff --git a/tests/string.test b/tests/string.test index 9cac73d..203d0c6 100644 --- a/tests/string.test +++ b/tests/string.test @@ -422,7 +422,7 @@ test string-4.16.$noComp {string first, normal string vs pure unicode string} -b # Representation checks are canaries run {list [representationpoke $s] [representationpoke $m] \ [string first $m $s]} -} -result {{utf32string 1} {utf32string 0} 2} +} -result {{string 1} {string 0} 2} test string-4.17.$noComp {string first, corner case} -body { run {string first a aaa 4294967295} } -result {-1} diff --git a/tests/utf.test b/tests/utf.test index 477216c..c79492e 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -1230,7 +1230,7 @@ test utf-19.1 {TclUniCharLen} -body { test utf-20.1 {TclUniCharNcmp} ucs4 { string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] } -1 -test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} { +test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} ucs4 { set one [format %c 0xFFFF] set two [format %c 0x10000] set first [string compare $one $two] -- cgit v0.12 From b5e6f95ea90be59cb281c089806f0e446e1272bc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 22 Mar 2022 15:38:25 +0000 Subject: Feature-complete --- generic/tcl.decls | 9 ++++++++ generic/tclDecls.h | 15 +++++++++++++ generic/tclInt.h | 12 +++++++---- generic/tclStringObj.c | 8 +++---- generic/tclStubInit.c | 3 +++ generic/tclUtf.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++- tests/string.test | 2 +- 7 files changed, 96 insertions(+), 10 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index f5b2e78..fa844e0 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2446,6 +2446,15 @@ declare 660 { declare 668 { int Tcl_UniCharLen(const int *uniStr) } +declare 669 { + int TclNumUtfChars(const char *src, int length) +} +declare 670 { + int TclGetCharLength(Tcl_Obj *objPtr) +} +declare 671 { + const char *TclUtfAtIndex(const char *src, int index) +} # ----- BASELINE -- FOR -- 8.7.0 ----- # diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 1952641..9f5e798 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1957,6 +1957,12 @@ EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async, /* Slot 667 is reserved */ /* 668 */ EXTERN int Tcl_UniCharLen(const int *uniStr); +/* 669 */ +EXTERN int TclNumUtfChars(const char *src, int length); +/* 670 */ +EXTERN int TclGetCharLength(Tcl_Obj *objPtr); +/* 671 */ +EXTERN const char * TclUtfAtIndex(const char *src, int index); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2661,6 +2667,9 @@ typedef struct TclStubs { void (*reserved666)(void); void (*reserved667)(void); int (*tcl_UniCharLen) (const int *uniStr); /* 668 */ + int (*tclNumUtfChars) (const char *src, int length); /* 669 */ + int (*tclGetCharLength) (Tcl_Obj *objPtr); /* 670 */ + const char * (*tclUtfAtIndex) (const char *src, int index); /* 671 */ } TclStubs; extern const TclStubs *tclStubsPtr; @@ -4020,6 +4029,12 @@ extern const TclStubs *tclStubsPtr; /* Slot 667 is reserved */ #define Tcl_UniCharLen \ (tclStubsPtr->tcl_UniCharLen) /* 668 */ +#define TclNumUtfChars \ + (tclStubsPtr->tclNumUtfChars) /* 669 */ +#define TclGetCharLength \ + (tclStubsPtr->tclGetCharLength) /* 670 */ +#define TclUtfAtIndex \ + (tclStubsPtr->tclUtfAtIndex) /* 671 */ #endif /* defined(USE_TCL_STUBS) */ diff --git a/generic/tclInt.h b/generic/tclInt.h index 538bca3..73d6386 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3322,12 +3322,12 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); #if TCL_UTF_MAX > 3 MODULE_SCOPE int *TclGetUnicodeFromObj_(Tcl_Obj *, int *); MODULE_SCOPE Tcl_Obj *TclNewUnicodeObj(const int *, int); - MODULE_SCOPE int TclGetCharLength(Tcl_Obj *); MODULE_SCOPE void TclAppendUnicodeToObj(Tcl_Obj *, const int *, int); MODULE_SCOPE int TclUniCharNcasecmp(const int *, const int *, unsigned long); MODULE_SCOPE int TclUniCharCaseMatch(const int *, const int *, int); MODULE_SCOPE int TclUniCharNcmp(const int *, const int *, unsigned long); - MODULE_SCOPE const char *TclUtfAtIndex(const char *, int); +# undef Tcl_NumUtfChars +# define Tcl_NumUtfChars TclNumUtfChars # undef Tcl_GetCharLength # define Tcl_GetCharLength TclGetCharLength # undef Tcl_UtfAtIndex @@ -3335,11 +3335,15 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); #else # define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj # define TclNewUnicodeObj Tcl_NewUnicodeObj -# define TclGetCharLength Tcl_GetCharLength # define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj # define TclUniCharNcasecmp Tcl_UniCharNcasecmp # define TclUniCharCaseMatch Tcl_UniCharCaseMatch # define TclUniCharNcmp Tcl_UniCharNcmp +# undef TclNumUtfChars +# define TclNumUtfChars Tcl_NumUtfChars +# undef TclGetCharLength +# define TclGetCharLength Tcl_GetCharLength +# undef TclUtfAtIndex # define TclUtfAtIndex Tcl_UtfAtIndex #endif @@ -4764,7 +4768,7 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[]; *---------------------------------------------------------------- */ -#define TclNumUtfChars(numChars, bytes, numBytes) \ +#define TclNumUtfChars_UNUSED(numChars, bytes, numBytes) \ do { \ int _count, _i = (numBytes); \ unsigned char *_str = (unsigned char *) (bytes); \ diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 6417e1b..2b11877 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -637,7 +637,7 @@ TclGetCharLength( */ if (numChars == -1) { - TclNumUtfChars(numChars, objPtr->bytes, objPtr->length); + numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); stringPtr->numChars = numChars; } return numChars; @@ -782,7 +782,7 @@ Tcl_GetUniChar( */ if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); + stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { return (unsigned char) objPtr->bytes[index]; @@ -991,7 +991,7 @@ Tcl_GetRange( */ if (stringPtr->numChars == -1) { - TclNumUtfChars(stringPtr->numChars, objPtr->bytes, objPtr->length); + stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { if (last < 0 || last >= stringPtr->numChars) { @@ -4447,7 +4447,7 @@ ExtendUnicodeRepWithString( numOrigChars = stringPtr->numChars; } if (numAppendChars == -1) { - TclNumUtfChars(numAppendChars, bytes, numBytes); + numAppendChars = Tcl_NumUtfChars(bytes, numBytes); } needed = numOrigChars + numAppendChars; uniCharStringCheckLimits(needed); diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index e3ebb8b..09163c3 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1937,6 +1937,9 @@ const TclStubs tclStubs = { 0, /* 666 */ 0, /* 667 */ Tcl_UniCharLen, /* 668 */ + TclNumUtfChars, /* 669 */ + TclGetCharLength, /* 670 */ + TclUtfAtIndex, /* 671 */ }; /* !END!: Do not edit above this line. */ diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 4dd1e09..eda317f 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -799,7 +799,7 @@ Tcl_UtfCharComplete( */ int -Tcl_NumUtfChars( +TclNumUtfChars( const char *src, /* The UTF-8 string to measure. */ int length) /* The length of the string in bytes, or -1 * for strlen(string). */ @@ -850,6 +850,61 @@ Tcl_NumUtfChars( return i; } +#if TCL_UTF_MAX > 3 +#undef Tcl_NumUtfChars +int +Tcl_NumUtfChars( + const char *src, /* The UTF-8 string to measure. */ + int length) /* The length of the string in bytes, or -1 + * for strlen(string). */ +{ + unsigned short ch = 0; + int i = 0; + + if (length < 0) { + /* string is NUL-terminated, so TclUtfToUniChar calls are safe. */ + while ((*src != '\0') && (i < INT_MAX)) { + src += Tcl_UtfToChar16(src, &ch); + i++; + } + } else { + /* Will return value between 0 and length. No overflow checks. */ + + /* Pointer to the end of string. Never read endPtr[0] */ + const char *endPtr = src + length; + /* Pointer to last byte where optimization still can be used */ + const char *optPtr = endPtr - 4; + + /* + * Optimize away the call in this loop. Justified because... + * when (src <= optPtr), (endPtr - src) >= (endPtr - optPtr) + * By initialization above (endPtr - optPtr) = TCL_UTF_MAX + * So (endPtr - src) >= TCL_UTF_MAX, and passing that to + * Tcl_UtfCharComplete we know will cause return of 1. + */ + while (src <= optPtr + /* && Tcl_UtfCharComplete(src, endPtr - src) */ ) { + src += Tcl_UtfToChar16(src, &ch); + i++; + } + /* Loop over the remaining string where call must happen */ + while (src < endPtr) { + if (Tcl_UtfCharComplete(src, endPtr - src)) { + src += Tcl_UtfToChar16(src, &ch); + } else { + /* + * src points to incomplete UTF-8 sequence + * Treat first byte as character and count it + */ + src++; + } + i++; + } + } + return i; +} +#endif + /* *--------------------------------------------------------------------------- * diff --git a/tests/string.test b/tests/string.test index 203d0c6..6863c23 100644 --- a/tests/string.test +++ b/tests/string.test @@ -422,7 +422,7 @@ test string-4.16.$noComp {string first, normal string vs pure unicode string} -b # Representation checks are canaries run {list [representationpoke $s] [representationpoke $m] \ [string first $m $s]} -} -result {{string 1} {string 0} 2} +} -match glob -result {{*string 1} {*string 0} 2} test string-4.17.$noComp {string first, corner case} -body { run {string first a aaa 4294967295} } -result {-1} -- cgit v0.12 From d4c27c94668a30f48edee251104255b27230107e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 22 Mar 2022 16:12:20 +0000 Subject: ucs4 -> utf32 --- tests/stringObj.test | 17 +++++++++-------- tests/utf.test | 46 +++++++++++++++++++++++----------------------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/tests/stringObj.test b/tests/stringObj.test index bae61ab..c11bf7f 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -25,7 +25,8 @@ testConstraint testobj [llength [info commands testobj]] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testdstring [llength [info commands testdstring]] testConstraint tip389 [expr {[string length \U010000] == 2}] - +testConstraint utf32 [expr {[string length [format %c 0x10000]] == 1}] + test stringObj-1.1 {string type registration} testobj { set t [testobj types] set first [string first "string" $t] @@ -57,7 +58,7 @@ test stringObj-3.2 {Tcl_SetStringObj, existing non-"empty string" object} testob lappend result [testobj refcount 1] } {{} 512 foo string 2} -test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} {testobj fullutf} { +test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} {testobj utf32} { testobj freeallvars teststringobj set 1 test teststringobj setlength 1 3 @@ -70,7 +71,7 @@ test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} testobj { teststringobj setlength 1 10 list [teststringobj length 1] [teststringobj length2 1] } {10 10} -test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} {testobj fullutf} { +test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} {testobj utf32} { testobj freeallvars teststringobj set 1 abcdef teststringobj append 1 xyzq -1 @@ -97,7 +98,7 @@ test stringObj-5.2 {Tcl_AppendToObj procedure, length calculation} testobj { teststringobj append 1 123 -1 teststringobj get 1 } {x y bbCC123} -test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} {testobj fullutf} { +test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} {testobj utf32} { testobj freeallvars teststringobj set 1 xyz teststringobj setlength 1 15 @@ -135,7 +136,7 @@ test stringObj-6.4 {Tcl_AppendStringsToObj procedure, counting space} testobj { teststringobj appendstrings 1 { 123 } abcdefg list [teststringobj length 1] [teststringobj get 1] } {15 {abc 123 abcdefg}} -test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} {testobj fullutf} { +test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} {testobj utf32} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 123 abcdefg @@ -150,7 +151,7 @@ test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} testob list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] } {10 10 ab34567890} -test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj fullutf} { +test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj utf32} { testobj freeallvars teststringobj set 1 abc teststringobj setlength 1 10 @@ -172,7 +173,7 @@ test stringObj-6.9 {Tcl_AppendStringToObj, pure unicode} testobj { teststringobj get 1 } adcfoobarsoom -test stringObj-7.1 {SetStringFromAny procedure} {testobj fullutf} { +test stringObj-7.1 {SetStringFromAny procedure} {testobj utf32} { testobj freeallvars teststringobj set2 1 [list a b] teststringobj append 1 x -1 @@ -197,7 +198,7 @@ test stringObj-7.4 {SetStringFromAny called with string obj} testobj { [string length $x] [testobj objtype $x] } {6 string 6 string} -test stringObj-8.1 {DupStringInternalRep procedure} {testobj fullutf} { +test stringObj-8.1 {DupStringInternalRep procedure} {testobj utf32} { testobj freeallvars teststringobj set 1 {} teststringobj append 1 abcde -1 diff --git a/tests/utf.test b/tests/utf.test index c79492e..c0d64e2 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -19,7 +19,7 @@ catch [list package require -exact tcl::test [info patchlevel]] testConstraint ucs2 [expr {[format %c 0x010000] eq "\uFFFD"}] testConstraint fullutf [expr {[format %c 0x010000] ne "\uFFFD"}] testConstraint utf16 [expr {[string length [format %c 0x10000]] == 2}] -testConstraint ucs4 [expr {[testConstraint fullutf] +testConstraint utf32 [expr {[testConstraint fullutf] && [string length [format %c 0x10000]] == 1}] testConstraint Uesc [expr {"\U0041" eq "A"}] @@ -131,7 +131,7 @@ test utf-2.8.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testb test utf-2.8.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { string length 𐀀 } 2 -test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 { +test utf-2.8.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 { string length 𐀀 } 1 test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testbytestring} { @@ -140,7 +140,7 @@ test utf-2.9.0 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} {ucs2 testb test utf-2.9.1 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf16 { string length \U10FFFF } 2 -test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} ucs4 { +test utf-2.9.2 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} utf32 { string length \U10FFFF } 1 test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring { @@ -194,7 +194,7 @@ test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfc test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 2 -test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf32} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 1 test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { @@ -878,7 +878,7 @@ test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 } \uD842 -test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { +test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} utf32 { string index \uD842 0 } \uD842 test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} utf16 { @@ -890,7 +890,7 @@ test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 0 } \uD83D -test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 0 } 😀 test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -899,7 +899,7 @@ test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 1 } \uDE00 -test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 1 } G test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -908,7 +908,7 @@ test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 2 } G -test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.9.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 2 } {} test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -917,7 +917,7 @@ test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index 😀G 0 } \uFFFD -test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 0 } 😀 test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -926,7 +926,7 @@ test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index 😀G 1 } G -test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 1 } G test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -935,7 +935,7 @@ test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} utf16 { test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index 😀G 2 } {} -test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { +test utf-8.12.1 {Tcl_UniCharAtIndex: Emoji} utf32 { string index 😀G 2 } {} test utf-8.12.2 {Tcl_UniCharAtIndex: Emoji} utf16 { @@ -951,7 +951,7 @@ test utf-9.2 {Tcl_UtfAtIndex: index > 0} { test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range \uD83D\uDE00G 0 0 } \uD83D -test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { +test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 { string range 😀G 0 0 } 😀 test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { @@ -960,7 +960,7 @@ test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 1 1 } \uDE00 -test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { +test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { string range 😀G 1 1 } G test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { @@ -969,7 +969,7 @@ test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { test utf-9.5.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 2 2 } G -test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { +test utf-9.5.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { string range 😀G 2 2 } {} test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { @@ -978,7 +978,7 @@ test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range 😀G 0 0 } \uFFFD -test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { +test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} utf32 { string range 😀G 0 0 } 😀 test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { @@ -987,7 +987,7 @@ test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} utf16 { test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range 😀G 1 1 } G -test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { +test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { string range 😀G 1 1 } G test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { @@ -996,7 +996,7 @@ test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { test utf-9.8.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range 😀G 2 2 } {} -test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { +test utf-9.8.1 {Tcl_UtfAtIndex: index > 0, Emoji} utf32 { string range 😀G 2 2 } {} test utf-9.8.2 {Tcl_UtfAtIndex: index > 0, Emoji} utf16 { @@ -1227,10 +1227,10 @@ test utf-19.1 {TclUniCharLen} -body { unset -nocomplain foo } -result {1 4} -test utf-20.1 {TclUniCharNcmp} ucs4 { +test utf-20.1 {TclUniCharNcmp} utf32 { string compare [string range [format %c 0xFFFF] 0 0] [string range [format %c 0x10000] 0 0] } -1 -test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} ucs4 { +test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} utf32 { set one [format %c 0xFFFF] set two [format %c 0x10000] set first [string compare $one $two] @@ -1357,10 +1357,10 @@ UniCharCaseCmpTest < a b UniCharCaseCmpTest > b a UniCharCaseCmpTest > B a UniCharCaseCmpTest > aBcB abca -UniCharCaseCmpTest < \uFFFF [format %c 0x10000] ucs4 -UniCharCaseCmpTest < \uFFFF \U10000 ucs4 -UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4 -UniCharCaseCmpTest > \U10000 \uFFFF ucs4 +UniCharCaseCmpTest < \uFFFF [format %c 0x10000] utf32 +UniCharCaseCmpTest < \uFFFF \U10000 utf32 +UniCharCaseCmpTest > [format %c 0x10000] \uFFFF utf32 +UniCharCaseCmpTest > \U10000 \uFFFF utf32 test utf-26.1 {Tcl_UniCharDString} -setup { -- cgit v0.12 From 1f3d3d7671748e4eb36cc0846e3e953bdb29c227 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 23 Mar 2022 13:31:43 +0000 Subject: Fix crash in (compabitility) "string" objType --- generic/tclStringObj.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index d43c507..9655479 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -208,13 +208,14 @@ SetUTF16StringFromAny( Tcl_DStringInit(&ds); unsigned short *utf16string = Tcl_UtfToChar16DString(objPtr->bytes, objPtr->length, &ds); - size_t size = Tcl_DStringLength(&ds); - String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + 2U) + size); + int size = Tcl_DStringLength(&ds); + String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + sizeof(unsigned short)) + size); + memcpy(stringPtr->unicode, utf16string, size); - stringPtr->unicode[size] = 0; Tcl_DStringFree(&ds); - size /= sizeof(unsigned short); + stringPtr->unicode[size] = 0; + stringPtr->numChars = size; stringPtr->allocated = size; stringPtr->maxChars = size; @@ -222,7 +223,7 @@ SetUTF16StringFromAny( objPtr->internalRep.twoPtrValue.ptr1 = stringPtr; objPtr->typePtr = &tclStringType; } - return TCL_OK; + return TCL_OK; } static void @@ -237,10 +238,10 @@ UpdateStringOfUTF16String( char *bytes = (char *)ckalloc(Tcl_DStringLength(&ds) + 1U); memcpy(bytes, string, Tcl_DStringLength(&ds)); + bytes[Tcl_DStringLength(&ds)] = 0; Tcl_DStringFree(&ds); objPtr->bytes = bytes; objPtr->length = Tcl_DStringLength(&ds); - printf("UpdateStringOfUTF16String %d %d\n", stringPtr->unicode[0], stringPtr->unicode[1]); } #endif -- cgit v0.12 From 369bc9b1594ea304387a97eef5658a5998699534 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 23 Mar 2022 14:32:08 +0000 Subject: Fix Tcl_UniCharAtIndex() for UTF-16 compabitility layer --- generic/tclUtf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index eda317f..cfd9915 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1182,22 +1182,20 @@ Tcl_UniCharAtIndex( const char *src, /* The UTF-8 string to dereference. */ int index) /* The position of the desired character. */ { - Tcl_UniChar ch = 0; + unsigned short ch = 0; int i = 0; if (index < 0) { return -1; } while (index-- > 0) { - i = TclUtfToUniChar(src, &ch); + i = Tcl_UtfToChar16(src, &ch); src += i; } -#if TCL_UTF_MAX < 4 if ((ch >= 0xD800) && (i < 3)) { /* Index points at character following high Surrogate */ return -1; } -#endif TclUtfToUCS4(src, &i); return i; } -- cgit v0.12 From af426f828e2e3391d7b4ed399040300821a156e9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 23 Mar 2022 16:19:54 +0000 Subject: Put back TclNumUtfChars() macro as TclNumUtfCharsM() --- generic/tclInt.h | 4 ++-- generic/tclStringObj.c | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 73d6386..3aa2626 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4768,14 +4768,14 @@ MODULE_SCOPE const TclFileAttrProcs tclpFileAttrProcs[]; *---------------------------------------------------------------- */ -#define TclNumUtfChars_UNUSED(numChars, bytes, numBytes) \ +#define TclNumUtfCharsM(numChars, bytes, numBytes) \ do { \ int _count, _i = (numBytes); \ unsigned char *_str = (unsigned char *) (bytes); \ while (_i && (*_str < 0xC0)) { _i--; _str++; } \ _count = (numBytes) - _i; \ if (_i) { \ - _count += Tcl_NumUtfChars((bytes) + _count, _i); \ + _count += TclNumUtfChars((bytes) + _count, _i); \ } \ (numChars) = _count; \ } while (0); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 9655479..784ed44 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -55,14 +55,14 @@ static void AppendUtfToUtfRep(Tcl_Obj *objPtr, const char *bytes, int numBytes); static void DupStringInternalRep(Tcl_Obj *objPtr, Tcl_Obj *copyPtr); -static int ExtendUniCharStringRepWithUniCharString(Tcl_Obj *objPtr, +static int ExtendStringRepWithUnicode(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); static void ExtendUnicodeRepWithString(Tcl_Obj *objPtr, const char *bytes, int numBytes, int numAppendChars); static void FillUnicodeRep(Tcl_Obj *objPtr); static void FreeStringInternalRep(Tcl_Obj *objPtr); -static void GrowUniCharStringBuffer(Tcl_Obj *objPtr, int needed, int flag); +static void GrowStringBuffer(Tcl_Obj *objPtr, int needed, int flag); static void GrowUnicodeBuffer(Tcl_Obj *objPtr, int needed); static int SetStringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static void SetUnicodeObj(Tcl_Obj *objPtr, @@ -284,7 +284,7 @@ UpdateStringOfUTF16String( #endif static void -GrowUniCharStringBuffer( +GrowStringBuffer( Tcl_Obj *objPtr, int needed, int flag) @@ -638,7 +638,7 @@ TclGetCharLength( */ if (numChars == -1) { - numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + TclNumUtfCharsM(numChars, objPtr->bytes, objPtr->length); stringPtr->numChars = numChars; } return numChars; @@ -783,7 +783,7 @@ Tcl_GetUniChar( */ if (stringPtr->numChars == -1) { - stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { return (unsigned char) objPtr->bytes[index]; @@ -928,7 +928,7 @@ TclGetUnicodeFromObj( } return stringPtr->unicode; } - + /* *---------------------------------------------------------------------- * @@ -992,7 +992,7 @@ Tcl_GetRange( */ if (stringPtr->numChars == -1) { - stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); + TclNumUtfCharsM(stringPtr->numChars, objPtr->bytes, objPtr->length); } if (stringPtr->numChars == objPtr->length) { if (last < 0 || last >= stringPtr->numChars) { @@ -1884,7 +1884,7 @@ AppendUnicodeToUtfRep( { UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); - numChars = ExtendUniCharStringRepWithUniCharString(objPtr, unicode, numChars); + numChars = ExtendStringRepWithUnicode(objPtr, unicode, numChars); if (stringPtr->numChars != -1) { stringPtr->numChars += numChars; @@ -1992,7 +1992,7 @@ AppendUtfToUtfRep( * would make test stringObj-8.1 fail. */ - GrowUniCharStringBuffer(objPtr, newLength, 0); + GrowStringBuffer(objPtr, newLength, 0); /* * Relocate bytes if needed; see above. @@ -4448,7 +4448,7 @@ ExtendUnicodeRepWithString( numOrigChars = stringPtr->numChars; } if (numAppendChars == -1) { - numAppendChars = Tcl_NumUtfChars(bytes, numBytes); + TclNumUtfCharsM(numAppendChars, bytes, numBytes); } needed = numOrigChars + numAppendChars; uniCharStringCheckLimits(needed); @@ -4643,13 +4643,13 @@ UpdateStringOfString( if (stringPtr->numChars == 0) { TclInitStringRep(objPtr, NULL, 0); } else { - (void) ExtendUniCharStringRepWithUniCharString(objPtr, stringPtr->unicode, + (void) ExtendStringRepWithUnicode(objPtr, stringPtr->unicode, stringPtr->numChars); } } static int -ExtendUniCharStringRepWithUniCharString( +ExtendStringRepWithUnicode( Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars) @@ -4696,7 +4696,7 @@ ExtendUniCharStringRepWithUniCharString( */ if (size > stringPtr->allocated) { - GrowUniCharStringBuffer(objPtr, size, 1); + GrowStringBuffer(objPtr, size, 1); } copyBytes: -- cgit v0.12 From 66cbd6ac71e01082f9e8b0e088cd829defdf9886 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 23 Mar 2022 16:33:41 +0000 Subject: Fix for UpdateStringOfUTF16String() --- generic/tclStringObj.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 784ed44..17c7067 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -239,9 +239,9 @@ UpdateStringOfUTF16String( char *bytes = (char *)ckalloc(Tcl_DStringLength(&ds) + 1U); memcpy(bytes, string, Tcl_DStringLength(&ds)); bytes[Tcl_DStringLength(&ds)] = 0; - Tcl_DStringFree(&ds); objPtr->bytes = bytes; objPtr->length = Tcl_DStringLength(&ds); + Tcl_DStringFree(&ds); } #endif -- cgit v0.12 From 4e3426067cddf752f0c15e8a2ce29f5f9052341f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 24 Mar 2022 10:48:44 +0000 Subject: When compiled with TCL_NO_DEPRECATED, remove the UTF16 compatibility layer. So, we make sure that it is never used internally for the Core. This means that extensions using the compatibility layer won't work any more in this mode; extensions should be compiled using TCL_UTF_MAX=4 then they work again. --- generic/tclExecute.c | 4 ++-- generic/tclInt.h | 2 ++ generic/tclObj.c | 2 ++ generic/tclStringObj.c | 45 ++++++++++++++++++++++++++------------------- generic/tclStubInit.c | 9 +++++++++ generic/tclTestObj.c | 24 ++++++++++++++++-------- generic/tclUtf.c | 4 ++-- generic/tclUtil.c | 2 +- tests/stringObj.test | 26 +++++++++++++------------- 9 files changed, 73 insertions(+), 45 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index f09f75c..0456146 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5592,8 +5592,8 @@ TEBCresume( * both. */ - if (TclHasInternalRep(valuePtr, &tclStringType) - || TclHasInternalRep(value2Ptr, &tclStringType)) { + if (TclHasInternalRep(valuePtr, &tclUniCharStringType) + || TclHasInternalRep(value2Ptr, &tclUniCharStringType)) { Tcl_UniChar *ustring1, *ustring2; ustring1 = TclGetUnicodeFromObj_(valuePtr, &length); diff --git a/generic/tclInt.h b/generic/tclInt.h index 3aa2626..6804996 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2768,6 +2768,7 @@ MODULE_SCOPE const Tcl_ObjType tclListType; MODULE_SCOPE const Tcl_ObjType tclDictType; MODULE_SCOPE const Tcl_ObjType tclProcBodyType; MODULE_SCOPE const Tcl_ObjType tclStringType; +MODULE_SCOPE const Tcl_ObjType tclUniCharStringType; MODULE_SCOPE const Tcl_ObjType tclEnsembleCmdType; MODULE_SCOPE const Tcl_ObjType tclRegexpType; MODULE_SCOPE Tcl_ObjType tclCmdNameType; @@ -3333,6 +3334,7 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); # undef Tcl_UtfAtIndex # define Tcl_UtfAtIndex TclUtfAtIndex #else +# define tclUniCharStringType tclStringType # define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj # define TclNewUnicodeObj Tcl_NewUnicodeObj # define TclAppendUnicodeToObj Tcl_AppendUnicodeToObj diff --git a/generic/tclObj.c b/generic/tclObj.c index a06b8fd..7596880 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -387,7 +387,9 @@ TclInitObjSubsystem(void) Tcl_RegisterObjType(&tclByteArrayType); Tcl_RegisterObjType(&tclDoubleType); +#if (TCL_UTF_MAX) > 3 && !defined(TCL_NO_DEPRECATED) Tcl_RegisterObjType(&tclStringType); +#endif Tcl_RegisterObjType(&tclListType); Tcl_RegisterObjType(&tclDictType); Tcl_RegisterObjType(&tclByteCodeType); diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 17c7067..627fadc 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -69,7 +69,7 @@ static void SetUnicodeObj(Tcl_Obj *objPtr, const Tcl_UniChar *unicode, int numChars); static int UnicodeLength(const Tcl_UniChar *unicode); static void UpdateStringOfString(Tcl_Obj *objPtr); -#if TCL_UTF_MAX > 3 +#if (TCL_UTF_MAX) > 3 && !defined(TCL_NO_DEPRECATED) static void DupUTF16StringInternalRep(Tcl_Obj *objPtr, Tcl_Obj *copyPtr); static int SetUTF16StringFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); @@ -88,7 +88,7 @@ static void UpdateStringOfUTF16String(Tcl_Obj *objPtr); #if TCL_UTF_MAX < 4 -#define uniCharStringType tclStringType +#define tclUniCharStringType tclStringType #define GET_UNICHAR_STRING GET_STRING #define UniCharString String #define UNICHAR_STRING_MAXCHARS STRING_MAXCHARS @@ -110,6 +110,8 @@ const Tcl_ObjType tclStringType = { #else +#define tclStringType xxx +#ifndef TCL_NO_DEPRECATED const Tcl_ObjType tclStringType = { "string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ @@ -117,8 +119,9 @@ const Tcl_ObjType tclStringType = { UpdateStringOfUTF16String, /* updateStringProc */ SetUTF16StringFromAny /* setFromAnyProc */ }; +#endif -static const Tcl_ObjType uniCharStringType = { +const Tcl_ObjType tclUniCharStringType = { "utf32string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ DupStringInternalRep, /* dupIntRepProc */ @@ -170,6 +173,7 @@ typedef struct { ((objPtr)->internalRep.twoPtrValue.ptr1 = (void *) (stringPtr)) +#ifndef TCL_NO_DEPRECATED static void DupUTF16StringInternalRep( Tcl_Obj *srcPtr, /* Object with internal rep to copy. Must have @@ -243,6 +247,7 @@ UpdateStringOfUTF16String( objPtr->length = Tcl_DStringLength(&ds); Tcl_DStringFree(&ds); } +#endif #endif @@ -545,7 +550,7 @@ TclNewUnicodeObj( return objPtr; } -#if TCL_UTF_MAX > 3 +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) Tcl_Obj * Tcl_NewUnicodeObj( const unsigned short *unicode, /* The unicode string used to initialize the @@ -644,7 +649,7 @@ TclGetCharLength( return numChars; } -#if TCL_UTF_MAX > 3 +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) #undef Tcl_GetCharLength int Tcl_GetCharLength( @@ -889,7 +894,7 @@ TclGetUnicodeFromObj_( return stringPtr->unicode; } -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) unsigned short * Tcl_GetUnicodeFromObj( Tcl_Obj *objPtr, /* The object to find the unicode string @@ -1327,6 +1332,7 @@ Tcl_AttemptSetObjLength( *--------------------------------------------------------------------------- */ +#if !defined(TCL_NO_DEPRECATED) void Tcl_SetUnicodeObj( Tcl_Obj *objPtr, /* The object to set the string of. */ @@ -1366,6 +1372,7 @@ Tcl_SetUnicodeObj( TclInvalidateStringRep(objPtr); stringPtr->allocated = numChars; } +#endif static int UnicodeLength( @@ -1403,7 +1410,7 @@ SetUnicodeObj( uniCharStringCheckLimits(numChars); stringPtr = uniCharStringAlloc(numChars); SET_UNICHAR_STRING(objPtr, stringPtr); - objPtr->typePtr = &uniCharStringType; + objPtr->typePtr = &tclUniCharStringType; stringPtr->maxChars = numChars; memcpy(stringPtr->unicode, unicode, numChars * sizeof(Tcl_UniChar)); @@ -1591,7 +1598,7 @@ TclAppendUnicodeToObj( } } -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) void Tcl_AppendUnicodeToObj( Tcl_Obj *objPtr, /* Points to the object to append to. */ @@ -1736,7 +1743,7 @@ Tcl_AppendObjToObj( * If appendObjPtr is not of the "String" type, don't convert it. */ - if (TclHasInternalRep(appendObjPtr, &uniCharStringType)) { + if (TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) { Tcl_UniChar *unicode = TclGetUnicodeFromObj_(appendObjPtr, &numChars); @@ -1757,7 +1764,7 @@ Tcl_AppendObjToObj( bytes = TclGetStringFromObj(appendObjPtr, &length); numChars = stringPtr->numChars; - if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &uniCharStringType)) { + if ((numChars >= 0) && TclHasInternalRep(appendObjPtr, &tclUniCharStringType)) { UniCharString *appendStringPtr = GET_UNICHAR_STRING(appendObjPtr); appendNumChars = appendStringPtr->numChars; @@ -3166,7 +3173,7 @@ TclGetStringStorage( { UniCharString *stringPtr; - if (!TclHasInternalRep(objPtr, &uniCharStringType) || objPtr->bytes == NULL) { + if (!TclHasInternalRep(objPtr, &tclUniCharStringType) || objPtr->bytes == NULL) { return TclGetStringFromObj(objPtr, (int *)sizePtr); } @@ -3214,7 +3221,7 @@ TclStringRepeat( */ if (!binary) { - if (TclHasInternalRep(objPtr, &uniCharStringType)) { + if (TclHasInternalRep(objPtr, &tclUniCharStringType)) { UniCharString *stringPtr = GET_UNICHAR_STRING(objPtr); if (stringPtr->hasUnicode) { unichar = 1; @@ -3385,7 +3392,7 @@ TclStringCat( binary = 0; if (ov > objv+1 && ISCONTINUATION(TclGetString(objPtr))) { forceUniChar = 1; - } else if ((objPtr->typePtr) && (objPtr->typePtr != &uniCharStringType)) { + } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclUniCharStringType)) { /* Prevent shimmer of non-string types. */ allowUniChar = 0; } @@ -3393,7 +3400,7 @@ TclStringCat( } else { /* assert (objPtr->typePtr != NULL) -- stork! */ binary = 0; - if (TclHasInternalRep(objPtr, &uniCharStringType)) { + if (TclHasInternalRep(objPtr, &tclUniCharStringType)) { /* Have a pure Unicode value; ask to preserve it */ requestUniChar = 1; } else { @@ -3746,8 +3753,8 @@ TclStringCmp( s1 = (char *) Tcl_GetByteArrayFromObj(value1Ptr, &s1len); s2 = (char *) Tcl_GetByteArrayFromObj(value2Ptr, &s2len); memCmpFn = memcmp; - } else if (TclHasInternalRep(value1Ptr, &uniCharStringType) - && TclHasInternalRep(value2Ptr, &uniCharStringType)) { + } else if (TclHasInternalRep(value1Ptr, &tclUniCharStringType) + && TclHasInternalRep(value2Ptr, &tclUniCharStringType)) { /* * Do a unicode-specific comparison if both of the args are of * String type. If the char length == byte length, we can do a @@ -4556,7 +4563,7 @@ DupStringInternalRep( copyStringPtr->allocated = copyPtr->bytes ? copyPtr->length : 0; SET_UNICHAR_STRING(copyPtr, copyStringPtr); - copyPtr->typePtr = &uniCharStringType; + copyPtr->typePtr = &tclUniCharStringType; } /* @@ -4581,7 +4588,7 @@ SetStringFromAny( TCL_UNUSED(Tcl_Interp *), Tcl_Obj *objPtr) /* The object to convert. */ { - if (!TclHasInternalRep(objPtr, &uniCharStringType)) { + if (!TclHasInternalRep(objPtr, &tclUniCharStringType)) { UniCharString *stringPtr = uniCharStringAlloc(0); /* @@ -4601,7 +4608,7 @@ SetStringFromAny( stringPtr->maxChars = 0; stringPtr->hasUnicode = 0; SET_UNICHAR_STRING(objPtr, stringPtr); - objPtr->typePtr = &uniCharStringType; + objPtr->typePtr = &tclUniCharStringType; } return TCL_OK; } diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 9757ad2..7c8c219 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -87,6 +87,15 @@ #define TclUtfNext UtfNext #define TclUtfPrev UtfPrev +#if TCL_UTF_MAX > 3 && defined(TCL_NO_DEPRECATED) +#define Tcl_GetUnicodeFromObj 0 +#define Tcl_AppendUnicodeToObj 0 +#define Tcl_NewUnicodeObj 0 +#define Tcl_SetUnicodeObj 0 +#define Tcl_UtfAtIndex 0 +#define Tcl_GetCharLength 0 +#endif + static int TclUtfCharComplete(const char *src, int length) { if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) { return length < 3; diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index 9884a9a..223eb98 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1264,10 +1264,14 @@ TeststringobjCmd( goto wrongNumArgs; } if (varPtr[varIndex] != NULL) { - Tcl_ConvertToType(NULL, varPtr[varIndex], - Tcl_GetObjType("string")); - strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; - length = (int) strPtr->allocated; + const Tcl_ObjType *objType = Tcl_GetObjType("string"); + if (objType != NULL) { + Tcl_ConvertToType(NULL, varPtr[varIndex], objType); + strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; + length = (int) strPtr->allocated; + } else { + length = -1; + } } else { length = -1; } @@ -1318,10 +1322,14 @@ TeststringobjCmd( goto wrongNumArgs; } if (varPtr[varIndex] != NULL) { - Tcl_ConvertToType(NULL, varPtr[varIndex], - Tcl_GetObjType("string")); - strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; - length = strPtr->maxChars; + const Tcl_ObjType *objType = Tcl_GetObjType("string"); + if (objType != NULL) { + Tcl_ConvertToType(NULL, varPtr[varIndex],objType); + strPtr = (String *)varPtr[varIndex]->internalRep.twoPtrValue.ptr1; + length = strPtr->maxChars; + } else { + length = -1; + } } else { length = -1; } diff --git a/generic/tclUtf.c b/generic/tclUtf.c index cfd9915..2a335d7 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -850,7 +850,7 @@ TclNumUtfChars( return i; } -#if TCL_UTF_MAX > 3 +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) #undef Tcl_NumUtfChars int Tcl_NumUtfChars( @@ -1245,7 +1245,7 @@ TclUtfAtIndex( return src; } -#if TCL_UTF_MAX > 3 +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) #undef Tcl_UtfAtIndex const char * Tcl_UtfAtIndex( diff --git a/generic/tclUtil.c b/generic/tclUtil.c index 9cc82cb..3537ecc 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -2591,7 +2591,7 @@ TclStringMatchObj( trivial = nocase ? 0 : TclMatchIsTrivial(TclGetString(ptnObj)); */ - if (TclHasInternalRep(strObj, &tclStringType) || (strObj->typePtr == NULL)) { + if (TclHasInternalRep(strObj, &tclUniCharStringType) || (strObj->typePtr == NULL)) { Tcl_UniChar *udata, *uptn; udata = TclGetUnicodeFromObj_(strObj, &length); diff --git a/tests/stringObj.test b/tests/stringObj.test index c11bf7f..0aa9a47 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -27,7 +27,7 @@ testConstraint testdstring [llength [info commands testdstring]] testConstraint tip389 [expr {[string length \U010000] == 2}] testConstraint utf32 [expr {[string length [format %c 0x10000]] == 1}] -test stringObj-1.1 {string type registration} testobj { +test stringObj-1.1 {string type registration} {testobj deprecated} { set t [testobj types] set first [string first "string" $t] set result [expr {$first >= 0}] @@ -58,27 +58,27 @@ test stringObj-3.2 {Tcl_SetStringObj, existing non-"empty string" object} testob lappend result [testobj refcount 1] } {{} 512 foo string 2} -test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} {testobj utf32} { +test stringObj-4.1 {Tcl_SetObjLength procedure, string gets shorter} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 test teststringobj setlength 1 3 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] } {3 3 tes} -test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} testobj { +test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} {testobj deprecated} { testobj freeallvars teststringobj set 1 abcdef teststringobj setlength 1 10 list [teststringobj length 1] [teststringobj length2 1] } {10 10} -test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} {testobj utf32} { +test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 abcdef teststringobj append 1 xyzq -1 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] } {10 10 abcdefxyzq} -test stringObj-4.4 {Tcl_SetObjLength procedure, "expty string", length 0} testobj { +test stringObj-4.4 {Tcl_SetObjLength procedure, "expty string", length 0} {testobj deprecated} { testobj freeallvars testobj newobj 1 teststringobj setlength 1 0 @@ -98,7 +98,7 @@ test stringObj-5.2 {Tcl_AppendToObj procedure, length calculation} testobj { teststringobj append 1 123 -1 teststringobj get 1 } {x y bbCC123} -test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} {testobj utf32} { +test stringObj-5.3 {Tcl_AppendToObj procedure, reallocating space} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 xyz teststringobj setlength 1 15 @@ -136,13 +136,13 @@ test stringObj-6.4 {Tcl_AppendStringsToObj procedure, counting space} testobj { teststringobj appendstrings 1 { 123 } abcdefg list [teststringobj length 1] [teststringobj get 1] } {15 {abc 123 abcdefg}} -test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} {testobj utf32} { +test stringObj-6.5 {Tcl_AppendStringsToObj procedure, don't double space if initial string empty} {testobj utf32 deprecated} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 123 abcdefg list [teststringobj length 1] [teststringobj length2 1] [teststringobj get 1] } {10 10 123abcdefg} -test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} testobj { +test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj deprecated} { testobj freeallvars teststringobj set 1 abc teststringobj setlength 1 10 @@ -151,7 +151,7 @@ test stringObj-6.6 {Tcl_AppendStringsToObj procedure, space reallocation} testob list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] } {10 10 ab34567890} -test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj utf32} { +test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 abc teststringobj setlength 1 10 @@ -160,7 +160,7 @@ test stringObj-6.7 {Tcl_AppendStringsToObj procedure, space reallocation} {testo list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] } {11 11 ab34567890x} -test stringObj-6.8 {Tcl_AppendStringsToObj procedure, object totally empty} testobj { +test stringObj-6.8 {Tcl_AppendStringsToObj procedure, object totally empty} {testobj deprecated} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 {} @@ -173,14 +173,14 @@ test stringObj-6.9 {Tcl_AppendStringToObj, pure unicode} testobj { teststringobj get 1 } adcfoobarsoom -test stringObj-7.1 {SetStringFromAny procedure} {testobj utf32} { +test stringObj-7.1 {SetStringFromAny procedure} {testobj utf32 deprecated} { testobj freeallvars teststringobj set2 1 [list a b] teststringobj append 1 x -1 list [teststringobj length 1] [teststringobj length2 1] \ [teststringobj get 1] } {4 4 {a bx}} -test stringObj-7.2 {SetStringFromAny procedure, null object} testobj { +test stringObj-7.2 {SetStringFromAny procedure, null object} {testobj deprecated} { testobj freeallvars testobj newobj 1 teststringobj appendstrings 1 {} @@ -198,7 +198,7 @@ test stringObj-7.4 {SetStringFromAny called with string obj} testobj { [string length $x] [testobj objtype $x] } {6 string 6 string} -test stringObj-8.1 {DupStringInternalRep procedure} {testobj utf32} { +test stringObj-8.1 {DupStringInternalRep procedure} {testobj utf32 deprecated} { testobj freeallvars teststringobj set 1 {} teststringobj append 1 abcde -1 -- cgit v0.12 From 0febd13af2d2275d3b63ef191462fc4fea18db99 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 24 Mar 2022 11:34:39 +0000 Subject: Implement PANIC when the UTF16 compatibility layer is used in combination with -DTCL_NO_DEPRECATED --- generic/tclStubInit.c | 28 ++++++++++++++++++---------- generic/tclUtf.c | 6 +++--- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 7c8c219..131053a 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -83,19 +83,27 @@ #define TclWinConvertError (void (*)(DWORD))(void *)Tcl_WinConvertError #endif -#define TclUtfCharComplete UtfCharComplete -#define TclUtfNext UtfNext -#define TclUtfPrev UtfPrev #if TCL_UTF_MAX > 3 && defined(TCL_NO_DEPRECATED) -#define Tcl_GetUnicodeFromObj 0 -#define Tcl_AppendUnicodeToObj 0 -#define Tcl_NewUnicodeObj 0 -#define Tcl_SetUnicodeObj 0 -#define Tcl_UtfAtIndex 0 -#define Tcl_GetCharLength 0 +static void uniCodePanic(void) { + Tcl_Panic("Tcl is compiled without the the UTF16 compatibility layer (-DTCL_NO_DEPRECATED)"); +} +# define Tcl_GetUnicode (unsigned short *(*)(Tcl_Obj *))(void *)uniCodePanic +# define Tcl_GetUnicodeFromObj (unsigned short *(*)(Tcl_Obj *, int *))(void *)uniCodePanic +# define Tcl_NewUnicodeObj (Tcl_Obj *(*)(const unsigned short *, int))(void *)uniCodePanic +# define Tcl_SetUnicodeObj (void(*)(Tcl_Obj *, const unsigned short *, int))(void *)uniCodePanic +# define Tcl_AppendUnicodeToObj (void(*)(Tcl_Obj *, const unsigned short *, int))(void *)uniCodePanic +# define Tcl_UtfAtIndex (const char *(*)(const char *, int))(void *)uniCodePanic +# define Tcl_GetCharLength (int(*)(Tcl_Obj *))(void *)uniCodePanic +# define Tcl_UniCharNcmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic +# define Tcl_UniCharNcasecmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic +# define Tcl_UniCharCaseMatch (int(*)(const unsigned short *, const unsigned short *, int))(void *)uniCodePanic #endif +#define TclUtfCharComplete UtfCharComplete +#define TclUtfNext UtfNext +#define TclUtfPrev UtfPrev + static int TclUtfCharComplete(const char *src, int length) { if ((unsigned)((unsigned char)*(src) - 0xF0) < 5) { return length < 3; @@ -679,8 +687,8 @@ static int utfNcasecmp(const char *s1, const char *s2, unsigned int n){ # define Tcl_SetExitProc 0 # define Tcl_SetPanicProc 0 # define Tcl_FindExecutable 0 -# define Tcl_GetUnicode 0 #if TCL_UTF_MAX < 4 +# define Tcl_GetUnicode 0 # define Tcl_AppendUnicodeToObj 0 # define Tcl_UniCharCaseMatch 0 # define Tcl_UniCharNcasecmp 0 diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2a335d7..82adf65 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -1957,7 +1957,7 @@ TclUniCharNcmp( #endif /* WORDS_BIGENDIAN */ } -#if TCL_UTF_MAX > 3 +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) int Tcl_UniCharNcmp( const unsigned short *ucs, /* Unicode string to compare to uct. */ @@ -2028,7 +2028,7 @@ TclUniCharNcasecmp( return 0; } -#if TCL_UTF_MAX > 3 +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) int Tcl_UniCharNcasecmp( const unsigned short *ucs, /* Unicode string to compare to uct. */ @@ -2584,7 +2584,7 @@ TclUniCharCaseMatch( } } -#if TCL_UTF_MAX > 3 +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) int Tcl_UniCharCaseMatch( const unsigned short *uniStr, /* Unicode String. */ -- cgit v0.12 From bab10ccf5c152dec04a0eed4fac248b749af4fd9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 24 Mar 2022 12:43:05 +0000 Subject: Add TclGetRange() to the compatibility set --- generic/tcl.decls | 3 +++ generic/tclDecls.h | 5 ++++ generic/tclInt.h | 6 +++-- generic/tclStringObj.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++- generic/tclStubInit.c | 3 +++ 5 files changed, 77 insertions(+), 3 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index cb6e282..f6a4c05 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2463,6 +2463,9 @@ declare 670 { declare 671 { const char *TclUtfAtIndex(const char *src, int index) } +declare 672 { + Tcl_Obj *TclGetRange(Tcl_Obj *objPtr, int first, int last) +} # ----- BASELINE -- FOR -- 8.7.0 ----- # diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 06ee797..fdf8673 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1969,6 +1969,8 @@ EXTERN int TclNumUtfChars(const char *src, int length); EXTERN int TclGetCharLength(Tcl_Obj *objPtr); /* 671 */ EXTERN const char * TclUtfAtIndex(const char *src, int index); +/* 672 */ +EXTERN Tcl_Obj * TclGetRange(Tcl_Obj *objPtr, int first, int last); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2676,6 +2678,7 @@ typedef struct TclStubs { int (*tclNumUtfChars) (const char *src, int length); /* 669 */ int (*tclGetCharLength) (Tcl_Obj *objPtr); /* 670 */ const char * (*tclUtfAtIndex) (const char *src, int index); /* 671 */ + Tcl_Obj * (*tclGetRange) (Tcl_Obj *objPtr, int first, int last); /* 672 */ } TclStubs; extern const TclStubs *tclStubsPtr; @@ -4043,6 +4046,8 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tclGetCharLength) /* 670 */ #define TclUtfAtIndex \ (tclStubsPtr->tclUtfAtIndex) /* 671 */ +#define TclGetRange \ + (tclStubsPtr->tclGetRange) /* 672 */ #endif /* defined(USE_TCL_STUBS) */ diff --git a/generic/tclInt.h b/generic/tclInt.h index 6804996..7486d60 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3333,6 +3333,8 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); # define Tcl_GetCharLength TclGetCharLength # undef Tcl_UtfAtIndex # define Tcl_UtfAtIndex TclUtfAtIndex +# undef Tcl_GetRange +# define Tcl_GetRange TclGetRange #else # define tclUniCharStringType tclStringType # define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj @@ -3345,8 +3347,8 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); # define TclNumUtfChars Tcl_NumUtfChars # undef TclGetCharLength # define TclGetCharLength Tcl_GetCharLength -# undef TclUtfAtIndex -# define TclUtfAtIndex Tcl_UtfAtIndex +# undef TclGetRange +# define TclGetRange Tcl_GetRange #endif diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 627fadc..332d1d2 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -110,7 +110,6 @@ const Tcl_ObjType tclStringType = { #else -#define tclStringType xxx #ifndef TCL_NO_DEPRECATED const Tcl_ObjType tclStringType = { "string", /* name */ @@ -952,6 +951,8 @@ TclGetUnicodeFromObj( *---------------------------------------------------------------------- */ +#if TCL_UTF_MAX > 3 && !defined(TCL_NO_DEPRECATED) +#undef Tcl_GetRange Tcl_Obj * Tcl_GetRange( Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ @@ -959,6 +960,66 @@ Tcl_GetRange( int last) /* Last index of the range. */ { Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ + String *stringPtr; + int length; + + if (first < 0) { + first = 0; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the substring operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + + if (last < 0 || last >= length) { + last = length - 1; + } + if (last < first) { + TclNewObj(newObjPtr); + return newObjPtr; + } + return Tcl_NewByteArrayObj(bytes + first, last - first + 1); + } + + /* + * OK, need to work with the object as a utf16 string. + */ + + SetUTF16StringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if (last < 0 || last >= stringPtr->numChars) { + last = stringPtr->numChars - 1; + } + if (last < first) { + TclNewObj(newObjPtr); + return newObjPtr; + } + /* See: bug [11ae2be95dac9417] */ + if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00) + && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) { + ++first; + } + if ((last + 1 < stringPtr->numChars) + && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00) + && ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) { + ++last; + } + return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); +} +#endif + +Tcl_Obj * +TclGetRange( + Tcl_Obj *objPtr, /* The Tcl object to find the range of. */ + int first, /* First index of the range. */ + int last) /* Last index of the range. */ +{ + Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ UniCharString *stringPtr; int length; diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 131053a..2046938 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -48,6 +48,7 @@ #undef Tcl_UniCharCaseMatch #undef Tcl_UniCharLen #undef Tcl_UniCharNcmp +#undef Tcl_GetRange #undef Tcl_DumpActiveMemory #undef Tcl_ValidateAllMemory #undef Tcl_FindHashEntry @@ -98,6 +99,7 @@ static void uniCodePanic(void) { # define Tcl_UniCharNcmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic # define Tcl_UniCharNcasecmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic # define Tcl_UniCharCaseMatch (int(*)(const unsigned short *, const unsigned short *, int))(void *)uniCodePanic +# define Tcl_GetRange (Tcl_Obj *(*)(Tcl_Obj *, int, int))(void *)uniCodePanic #endif #define TclUtfCharComplete UtfCharComplete @@ -1957,6 +1959,7 @@ const TclStubs tclStubs = { TclNumUtfChars, /* 669 */ TclGetCharLength, /* 670 */ TclUtfAtIndex, /* 671 */ + TclGetRange, /* 672 */ }; /* !END!: Do not edit above this line. */ -- cgit v0.12 From 53f15cc67318ebe942c270c60268b0396688befc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 24 Mar 2022 13:18:33 +0000 Subject: Fix internal usage of Tcl_GetRange/Tcl_UtfAtIndex --- generic/tclCmdMZ.c | 6 +++--- generic/tclExecute.c | 4 ++-- generic/tclInt.h | 2 ++ generic/tclStringObj.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index bd5745b..1beb732 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -395,7 +395,7 @@ Tcl_RegexpObjCmd( newPtr = Tcl_NewListObj(2, objs); } else { if ((i <= info.nsubs) && (info.matches[i].end > 0)) { - newPtr = Tcl_GetRange(objPtr, + newPtr = TclGetRange(objPtr, offset + info.matches[i].start, offset + info.matches[i].end - 1); } else { @@ -2301,7 +2301,7 @@ StringRangeCmd( } if (last >= 0) { - Tcl_SetObjResult(interp, Tcl_GetRange(objv[1], first, last)); + Tcl_SetObjResult(interp, TclGetRange(objv[1], first, last)); } return TCL_OK; } @@ -3790,7 +3790,7 @@ TclNRSwitchObjCmd( if (matchVarObj != NULL) { Tcl_Obj *substringObj; - substringObj = Tcl_GetRange(stringObj, + substringObj = TclGetRange(stringObj, info.matches[j].start, info.matches[j].end-1); /* diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 0456146..965d821 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5373,7 +5373,7 @@ TEBCresume( if (toIdx < 0) { TclNewObj(objResultPtr); } else { - objResultPtr = Tcl_GetRange(OBJ_AT_DEPTH(2), fromIdx, toIdx); + objResultPtr = TclGetRange(OBJ_AT_DEPTH(2), fromIdx, toIdx); } TRACE_APPEND(("\"%.30s\"\n", O2S(objResultPtr))); NEXT_INST_V(1, 3, 1); @@ -5414,7 +5414,7 @@ TEBCresume( if (toIdx < 0) { TclNewObj(objResultPtr); } else { - objResultPtr = Tcl_GetRange(valuePtr, fromIdx, toIdx); + objResultPtr = TclGetRange(valuePtr, fromIdx, toIdx); } } TRACE_APPEND(("%.30s\n", O2S(objResultPtr))); diff --git a/generic/tclInt.h b/generic/tclInt.h index 7486d60..b6cf3b4 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3347,6 +3347,8 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); # define TclNumUtfChars Tcl_NumUtfChars # undef TclGetCharLength # define TclGetCharLength Tcl_GetCharLength +# undef TclUtfAtIndex +# define TclUtfAtIndex Tcl_UtfAtIndex # undef TclGetRange # define TclGetRange Tcl_GetRange #endif diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 332d1d2..6032dbb 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2449,7 +2449,7 @@ Tcl_AppendFormatToObj( if (precision < 1) { TclNewObj(segment); } else { - segment = Tcl_GetRange(segment, 0, precision - 1); + segment = TclGetRange(segment, 0, precision - 1); } numChars = precision; Tcl_IncrRefCount(segment); -- cgit v0.12 From 4576277c9931b6267da5083fb250652d077bb819 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 24 Mar 2022 13:58:06 +0000 Subject: Add TclGetUniChar() to the compatibility set --- generic/tcl.decls | 3 +++ generic/tclCmdMZ.c | 4 ++-- generic/tclDecls.h | 5 +++++ generic/tclExecute.c | 2 +- generic/tclInt.h | 4 ++++ generic/tclStringObj.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ generic/tclStubInit.c | 7 ++++++ 7 files changed, 80 insertions(+), 3 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index f6a4c05..3b5c8a9 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2466,6 +2466,9 @@ declare 671 { declare 672 { Tcl_Obj *TclGetRange(Tcl_Obj *objPtr, int first, int last) } +declare 673 { + int TclGetUniChar(Tcl_Obj *objPtr, int index) +} # ----- BASELINE -- FOR -- 8.7.0 ----- # diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 1beb732..29a73cf 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -310,7 +310,7 @@ Tcl_RegexpObjCmd( eflags = 0; } else if (offset > stringLength) { eflags = TCL_REG_NOTBOL; - } else if (Tcl_GetUniChar(objPtr, offset-1) == '\n') { + } else if (TclGetUniChar(objPtr, offset-1) == '\n') { eflags = 0; } else { eflags = TCL_REG_NOTBOL; @@ -1412,7 +1412,7 @@ StringIndexCmd( } if ((index >= 0) && (index < length)) { - int ch = Tcl_GetUniChar(objv[1], index); + int ch = TclGetUniChar(objv[1], index); if (ch == -1) { return TCL_OK; diff --git a/generic/tclDecls.h b/generic/tclDecls.h index fdf8673..ac90006 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1971,6 +1971,8 @@ EXTERN int TclGetCharLength(Tcl_Obj *objPtr); EXTERN const char * TclUtfAtIndex(const char *src, int index); /* 672 */ EXTERN Tcl_Obj * TclGetRange(Tcl_Obj *objPtr, int first, int last); +/* 673 */ +EXTERN int TclGetUniChar(Tcl_Obj *objPtr, int index); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2679,6 +2681,7 @@ typedef struct TclStubs { int (*tclGetCharLength) (Tcl_Obj *objPtr); /* 670 */ const char * (*tclUtfAtIndex) (const char *src, int index); /* 671 */ Tcl_Obj * (*tclGetRange) (Tcl_Obj *objPtr, int first, int last); /* 672 */ + int (*tclGetUniChar) (Tcl_Obj *objPtr, int index); /* 673 */ } TclStubs; extern const TclStubs *tclStubsPtr; @@ -4048,6 +4051,8 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tclUtfAtIndex) /* 671 */ #define TclGetRange \ (tclStubsPtr->tclGetRange) /* 672 */ +#define TclGetUniChar \ + (tclStubsPtr->tclGetUniChar) /* 673 */ #endif /* defined(USE_TCL_STUBS) */ diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 965d821..1f72d63 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5329,7 +5329,7 @@ TEBCresume( valuePtr->bytes+index, 1); } else { char buf[4] = ""; - int ch = Tcl_GetUniChar(valuePtr, index); + int ch = TclGetUniChar(valuePtr, index); /* * This could be: Tcl_NewUnicodeObj((const Tcl_UniChar *)&ch, 1) diff --git a/generic/tclInt.h b/generic/tclInt.h index b6cf3b4..cc13c61 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3335,6 +3335,8 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); # define Tcl_UtfAtIndex TclUtfAtIndex # undef Tcl_GetRange # define Tcl_GetRange TclGetRange +# undef Tcl_GetUniChar +# define Tcl_GetUniChar TclGetUniChar #else # define tclUniCharStringType tclStringType # define TclGetUnicodeFromObj_ Tcl_GetUnicodeFromObj @@ -3351,6 +3353,8 @@ MODULE_SCOPE int TclZipfs_Init(Tcl_Interp *interp); # define TclUtfAtIndex Tcl_UtfAtIndex # undef TclGetRange # define TclGetRange Tcl_GetRange +# undef TclGetUniChar +# define TclGetUniChar Tcl_GetUniChar #endif diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 6032dbb..e8777cd 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -747,12 +747,70 @@ TclCheckEmptyString( *---------------------------------------------------------------------- */ +#if (TCL_UTF_MAX > 3) && !defined(TCL_NO_DEPRECATED) +#undef Tcl_GetUniChar int Tcl_GetUniChar( Tcl_Obj *objPtr, /* The object to get the Unicode charater * from. */ int index) /* Get the index'th Unicode character. */ { + String *stringPtr; + int ch, length; + + if (index < 0) { + return -1; + } + + /* + * Optimize the case where we're really dealing with a bytearray object + * we don't need to convert to a string to perform the indexing operation. + */ + + if (TclIsPureByteArray(objPtr)) { + unsigned char *bytes = Tcl_GetByteArrayFromObj(objPtr, &length); + if (index >= length) { + return -1; + } + + return (int) bytes[index]; + } + + /* + * OK, need to work with the object as a string. + */ + + SetUTF16StringFromAny(NULL, objPtr); + stringPtr = GET_STRING(objPtr); + + if (index >= stringPtr->numChars) { + return -1; + } + ch = stringPtr->unicode[index]; + /* See: bug [11ae2be95dac9417] */ + if ((ch & 0xF800) == 0xD800) { + if (ch & 0x400) { + if ((index > 0) + && ((stringPtr->unicode[index-1] & 0xFC00) == 0xD800)) { + ch = -1; /* low surrogate preceded by high surrogate */ + } + } else if ((++index < stringPtr->numChars) + && ((stringPtr->unicode[index] & 0xFC00) == 0xDC00)) { + /* high surrogate followed by low surrogate */ + ch = (((ch & 0x3FF) << 10) | + (stringPtr->unicode[index] & 0x3FF)) + 0x10000; + } + } + return ch; +} +#endif + +int +TclGetUniChar( + Tcl_Obj *objPtr, /* The object to get the Unicode charater + * from. */ + int index) /* Get the index'th Unicode character. */ +{ UniCharString *stringPtr; int ch, length; diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 2046938..fc78f74 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -49,6 +49,7 @@ #undef Tcl_UniCharLen #undef Tcl_UniCharNcmp #undef Tcl_GetRange +#undef Tcl_GetUniChar #undef Tcl_DumpActiveMemory #undef Tcl_ValidateAllMemory #undef Tcl_FindHashEntry @@ -79,6 +80,10 @@ #undef TclWinConvertError #undef Tcl_GetCharLength #undef Tcl_UtfAtIndex +#undef TclNumUtfChars +#undef TclGetCharLength +#undef TclUtfAtIndex +#undef TclGetUniChar #if defined(_WIN32) || defined(__CYGWIN__) #define TclWinConvertWSAError (void (*)(DWORD))(void *)Tcl_WinConvertError #define TclWinConvertError (void (*)(DWORD))(void *)Tcl_WinConvertError @@ -100,6 +105,7 @@ static void uniCodePanic(void) { # define Tcl_UniCharNcasecmp (int(*)(const unsigned short *, const unsigned short *, unsigned long))(void *)uniCodePanic # define Tcl_UniCharCaseMatch (int(*)(const unsigned short *, const unsigned short *, int))(void *)uniCodePanic # define Tcl_GetRange (Tcl_Obj *(*)(Tcl_Obj *, int, int))(void *)uniCodePanic +# define Tcl_GetUniChar (int(*)(Tcl_Obj *, int))(void *)uniCodePanic #endif #define TclUtfCharComplete UtfCharComplete @@ -1960,6 +1966,7 @@ const TclStubs tclStubs = { TclGetCharLength, /* 670 */ TclUtfAtIndex, /* 671 */ TclGetRange, /* 672 */ + TclGetUniChar, /* 673 */ }; /* !END!: Do not edit above this line. */ -- cgit v0.12 From 5438d045182b2bc669f663a22728d7306c459112 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 24 Mar 2022 23:29:50 +0000 Subject: bugfix: Handle NULL characters in Tcl_GetCharLength() --- generic/tclStringObj.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index e8777cd..988c6e5 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -681,7 +681,8 @@ Tcl_GetCharLength( (void) Tcl_GetByteArrayFromObj(objPtr, &numChars); } else { - numChars = Tcl_NumUtfChars(Tcl_GetString(objPtr), -1); + Tcl_GetString(objPtr); + numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); } return numChars; } -- cgit v0.12 From 29b5c15de58bfb8f522ebfcde30d4ded5712d4a2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 25 Mar 2022 12:14:41 +0000 Subject: Unbreak build with-DTCL_UTF_MAX=3 --- generic/tclStubInit.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index fc78f74..7d04481 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -80,10 +80,7 @@ #undef TclWinConvertError #undef Tcl_GetCharLength #undef Tcl_UtfAtIndex -#undef TclNumUtfChars -#undef TclGetCharLength -#undef TclUtfAtIndex -#undef TclGetUniChar + #if defined(_WIN32) || defined(__CYGWIN__) #define TclWinConvertWSAError (void (*)(DWORD))(void *)Tcl_WinConvertError #define TclWinConvertError (void (*)(DWORD))(void *)Tcl_WinConvertError -- cgit v0.12 From d4140ec31b18515b482006a2d9cac7e2d281f8db Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 26 Mar 2022 23:02:29 +0000 Subject: Don't run obj-1.1 test with -DTCL_NO_DEPRECATED --- tests/obj.test | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/obj.test b/tests/obj.test index 4fa8d3a..7563422 100644 --- a/tests/obj.test +++ b/tests/obj.test @@ -19,11 +19,13 @@ if {"::tcltest" ni [namespace children]} { ::tcltest::loadTestedCommands catch [list package require -exact tcl::test [info patchlevel]] +package require tcltests + testConstraint testobj [llength [info commands testobj]] testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}] testConstraint wideIs64bit [expr {wide(0x8000000000000000) < 0}] -test obj-1.1 {Tcl_AppendAllObjTypes, and InitTypeTable, Tcl_RegisterObjType} testobj { +test obj-1.1 {Tcl_AppendAllObjTypes, and InitTypeTable, Tcl_RegisterObjType} {testobj deprecated} { set r 1 foreach {t} { bytearray -- cgit v0.12 From ec52ceb63b1563cfdeb057731661d0ae92332765 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 29 Mar 2022 21:14:54 +0000 Subject: Undo deprecation for Tcl_AppendUnicodeToObj (as described in the TIP) --- generic/tcl.decls | 2 +- generic/tclDecls.h | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 3b5c8a9..f0718c1 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -1356,7 +1356,7 @@ declare 382 {deprecated {No longer in use, changed to macro}} { declare 383 { Tcl_Obj *Tcl_GetRange(Tcl_Obj *objPtr, int first, int last) } -declare 384 {deprecated {Use Tcl_AppendStringsToObj}} { +declare 384 { void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const unsigned short *unicode, int length) } diff --git a/generic/tclDecls.h b/generic/tclDecls.h index ac90006..020c4bb 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1157,8 +1157,7 @@ unsigned short * Tcl_GetUnicode(Tcl_Obj *objPtr); /* 383 */ EXTERN Tcl_Obj * Tcl_GetRange(Tcl_Obj *objPtr, int first, int last); /* 384 */ -TCL_DEPRECATED("Use Tcl_AppendStringsToObj") -void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, +EXTERN void Tcl_AppendUnicodeToObj(Tcl_Obj *objPtr, const unsigned short *unicode, int length); /* 385 */ EXTERN int Tcl_RegExpMatchObj(Tcl_Interp *interp, @@ -2392,7 +2391,7 @@ typedef struct TclStubs { int (*tcl_GetUniChar) (Tcl_Obj *objPtr, int index); /* 381 */ TCL_DEPRECATED_API("No longer in use, changed to macro") unsigned short * (*tcl_GetUnicode) (Tcl_Obj *objPtr); /* 382 */ Tcl_Obj * (*tcl_GetRange) (Tcl_Obj *objPtr, int first, int last); /* 383 */ - TCL_DEPRECATED_API("Use Tcl_AppendStringsToObj") void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int length); /* 384 */ + void (*tcl_AppendUnicodeToObj) (Tcl_Obj *objPtr, const unsigned short *unicode, int length); /* 384 */ int (*tcl_RegExpMatchObj) (Tcl_Interp *interp, Tcl_Obj *textObj, Tcl_Obj *patternObj); /* 385 */ void (*tcl_SetNotifier) (const Tcl_NotifierProcs *notifierProcPtr); /* 386 */ Tcl_Mutex * (*tcl_GetAllocMutex) (void); /* 387 */ -- cgit v0.12 From 572557152a0233310ad8df9c1f877cecd65e3406 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 30 Mar 2022 11:56:16 +0000 Subject: Change UTF-16 version of Tcl_GetRange() not to use the tclStringObj type any more --- generic/tclStringObj.c | 55 +++++++++++++++++++------------------------------- generic/tclTest.c | 3 ++- 2 files changed, 23 insertions(+), 35 deletions(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 988c6e5..63b833f 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -180,12 +180,12 @@ DupUTF16StringInternalRep( Tcl_Obj *copyPtr) /* Object with internal rep to set. Must not * currently have an internal rep.*/ { - String *srcStringPtr = ((String *) (srcPtr)->internalRep.twoPtrValue.ptr1); + String *srcStringPtr = GET_STRING(srcPtr); size_t size = offsetof(String, unicode) + (((srcStringPtr->allocated) + 1U) * sizeof(unsigned short)); String *copyStringPtr = (String *)ckalloc(size); memcpy(copyStringPtr, srcStringPtr, size); - copyPtr->internalRep.twoPtrValue.ptr1 = copyStringPtr; + SET_STRING(copyPtr, copyStringPtr); copyPtr->typePtr = &tclStringType; } @@ -223,7 +223,7 @@ SetUTF16StringFromAny( stringPtr->allocated = size; stringPtr->maxChars = size; stringPtr->hasUnicode = 1; - objPtr->internalRep.twoPtrValue.ptr1 = stringPtr; + SET_STRING(objPtr, stringPtr); objPtr->typePtr = &tclStringType; } return TCL_OK; @@ -234,7 +234,7 @@ UpdateStringOfUTF16String( Tcl_Obj *objPtr) /* Object with string rep to update. */ { Tcl_DString ds; - String *stringPtr = ((String *) (objPtr)->internalRep.twoPtrValue.ptr1); + String *stringPtr = GET_STRING(objPtr); Tcl_DStringInit(&ds); const char *string = Tcl_Char16ToUtfDString(stringPtr->unicode, stringPtr->numChars, &ds); @@ -560,18 +560,19 @@ Tcl_NewUnicodeObj( Tcl_Obj *objPtr; TclNewObj(objPtr); - TclFreeInternalRep(objPtr); + TclInvalidateStringRep(objPtr); - String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + 2U) + numChars * sizeof(unsigned short)); - memcpy(stringPtr->unicode, unicode, numChars); - stringPtr->unicode[numChars] = 0; + String *stringPtr = (String *)ckalloc((offsetof(String, unicode) + + sizeof(unsigned short)) + numChars * sizeof(unsigned short)); + memcpy(stringPtr->unicode, unicode, numChars); + stringPtr->unicode[numChars] = 0; - stringPtr->numChars = numChars; - stringPtr->allocated = numChars; - stringPtr->maxChars = numChars; - stringPtr->hasUnicode = 1; - objPtr->internalRep.twoPtrValue.ptr1 = stringPtr; - objPtr->typePtr = &tclStringType; + stringPtr->numChars = numChars; + stringPtr->allocated = numChars; + stringPtr->maxChars = numChars; + stringPtr->hasUnicode = 1; + SET_STRING(objPtr, stringPtr); + objPtr->typePtr = &tclStringType; return objPtr; } @@ -1019,7 +1020,6 @@ Tcl_GetRange( int last) /* Last index of the range. */ { Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ - String *stringPtr; int length; if (first < 0) { @@ -1044,31 +1044,18 @@ Tcl_GetRange( return Tcl_NewByteArrayObj(bytes + first, last - first + 1); } - /* - * OK, need to work with the object as a utf16 string. - */ - - SetUTF16StringFromAny(NULL, objPtr); - stringPtr = GET_STRING(objPtr); + int numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); - if (last < 0 || last >= stringPtr->numChars) { - last = stringPtr->numChars - 1; + if (last >= numChars) { + last = numChars - 1; } if (last < first) { TclNewObj(newObjPtr); return newObjPtr; } - /* See: bug [11ae2be95dac9417] */ - if ((first > 0) && ((stringPtr->unicode[first] & 0xFC00) == 0xDC00) - && ((stringPtr->unicode[first-1] & 0xFC00) == 0xD800)) { - ++first; - } - if ((last + 1 < stringPtr->numChars) - && ((stringPtr->unicode[last+1] & 0xFC00) == 0xDC00) - && ((stringPtr->unicode[last] & 0xFC00) == 0xD800)) { - ++last; - } - return Tcl_NewUnicodeObj(stringPtr->unicode + first, last - first + 1); + const char *begin = Tcl_UtfAtIndex(objPtr->bytes, first); + const char *end = Tcl_UtfAtIndex(objPtr->bytes, last + 1); + return Tcl_NewStringObj(begin, end - begin); } #endif diff --git a/generic/tclTest.c b/generic/tclTest.c index 30681d9..cbd1f70 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -16,6 +16,7 @@ */ #undef STATIC_BUILD +#undef BUILD_tcl #ifndef USE_TCL_STUBS # define USE_TCL_STUBS #endif @@ -6971,7 +6972,7 @@ TestUtfNextCmd( int objc, Tcl_Obj *const objv[]) { - size_t numBytes; + int numBytes; char *bytes; const char *result, *first; char buffer[32]; -- cgit v0.12 From 4aa2998438dbe02e6c704ad6484602ada5ead2df Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 29 Apr 2022 10:01:44 +0000 Subject: Since tclDecls.h uses 'size_t', make sure to include . Since ISO-C has both 'size_t' and 'ptrdiff_t', use those to define INT2PTR and friends (including UINT2PTR) --- generic/tclDate.c | 1 - generic/tclDecls.h | 2 ++ generic/tclHash.c | 64 +++++++++++++++++++++++++----------------------------- generic/tclInt.h | 26 +++++----------------- 4 files changed, 38 insertions(+), 55 deletions(-) diff --git a/generic/tclDate.c b/generic/tclDate.c index 7688f2c..adc7fb9 100644 --- a/generic/tclDate.c +++ b/generic/tclDate.c @@ -352,7 +352,6 @@ typedef short yytype_int16; # elif defined size_t # define YYSIZE_T size_t # elif ! defined YYSIZE_T -# include /* INFRINGES ON USER NAME SPACE */ # define YYSIZE_T size_t # else # define YYSIZE_T unsigned diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 624903e..897fda4 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -12,6 +12,8 @@ #ifndef _TCLDECLS #define _TCLDECLS +#include /* for size_t */ + #undef TCL_STORAGE_CLASS #ifdef BUILD_tcl # define TCL_STORAGE_CLASS DLLEXPORT diff --git a/generic/tclHash.c b/generic/tclHash.c index 7538821..606d26b 100644 --- a/generic/tclHash.c +++ b/generic/tclHash.c @@ -273,8 +273,7 @@ CreateHashEntry( { Tcl_HashEntry *hPtr; const Tcl_HashKeyType *typePtr; - unsigned int hash; - int index; + TCL_HASH_TYPE hash, index; if (tablePtr->keyType == TCL_STRING_KEYS) { typePtr = &tclStringHashKeyType; @@ -350,11 +349,11 @@ CreateHashEntry( } else { hPtr = (Tcl_HashEntry *)ckalloc(sizeof(Tcl_HashEntry)); hPtr->key.oneWordValue = (char *) key; - hPtr->clientData = 0; + Tcl_SetHashValue(hPtr, NULL); } hPtr->tablePtr = tablePtr; - hPtr->hash = INT2PTR(hash); + hPtr->hash = UINT2PTR(hash); hPtr->nextPtr = tablePtr->buckets[index]; tablePtr->buckets[index] = hPtr; tablePtr->numEntries++; @@ -396,7 +395,7 @@ Tcl_DeleteHashEntry( const Tcl_HashKeyType *typePtr; Tcl_HashTable *tablePtr; Tcl_HashEntry **bucketPtr; - int index; + TCL_HASH_TYPE index; tablePtr = entryPtr->tablePtr; @@ -413,7 +412,7 @@ Tcl_DeleteHashEntry( if (typePtr->hashKeyProc == NULL || typePtr->flags & TCL_HASH_KEY_RANDOMIZE_HASH) { - index = RANDOM_INDEX(tablePtr, PTR2INT(entryPtr->hash)); + index = RANDOM_INDEX(tablePtr, PTR2UINT(entryPtr->hash)); } else { index = PTR2UINT(entryPtr->hash) & tablePtr->mask; } @@ -614,7 +613,8 @@ Tcl_HashStats( Tcl_HashTable *tablePtr) /* Table for which to produce stats. */ { #define NUM_COUNTERS 10 - int count[NUM_COUNTERS], overflow, i, j; + int i; + TCL_HASH_TYPE count[NUM_COUNTERS], overflow, j; double average, tmp; Tcl_HashEntry *hPtr; char *result, *p; @@ -649,15 +649,15 @@ Tcl_HashStats( */ result = (char *)ckalloc((NUM_COUNTERS * 60) + 300); - sprintf(result, "%d entries in table, %d buckets\n", + sprintf(result, "%u entries in table, %u buckets\n", tablePtr->numEntries, tablePtr->numBuckets); p = result + strlen(result); for (i = 0; i < NUM_COUNTERS; i++) { - sprintf(p, "number of buckets with %d entries: %d\n", + sprintf(p, "number of buckets with %u entries: %u\n", i, count[i]); p += strlen(p); } - sprintf(p, "number of buckets with %d or more entries: %d\n", + sprintf(p, "number of buckets with %u or more entries: %u\n", NUM_COUNTERS, overflow); p += strlen(p); sprintf(p, "average search distance for entry: %.1f", average); @@ -683,17 +683,14 @@ Tcl_HashStats( static Tcl_HashEntry * AllocArrayEntry( Tcl_HashTable *tablePtr, /* Hash table. */ - void *keyPtr) /* Key to store in the hash table entry. */ + void *keyPtr) /* Key to store in the hash table entry. */ { int *array = (int *) keyPtr; int *iPtr1, *iPtr2; Tcl_HashEntry *hPtr; - int count; - unsigned int size; - - count = tablePtr->keyType; + int count = tablePtr->keyType; + TCL_HASH_TYPE size = sizeof(Tcl_HashEntry) + (count*sizeof(int)) - sizeof(hPtr->key); - size = sizeof(Tcl_HashEntry) + (count*sizeof(int)) - sizeof(hPtr->key); if (size < sizeof(Tcl_HashEntry)) { size = sizeof(Tcl_HashEntry); } @@ -703,7 +700,7 @@ AllocArrayEntry( count > 0; count--, iPtr1++, iPtr2++) { *iPtr2 = *iPtr1; } - hPtr->clientData = 0; + Tcl_SetHashValue(hPtr, NULL); return hPtr; } @@ -727,11 +724,11 @@ AllocArrayEntry( static int CompareArrayKeys( - void *keyPtr, /* New key to compare. */ + void *keyPtr, /* New key to compare. */ Tcl_HashEntry *hPtr) /* Existing key to compare. */ { - const int *iPtr1 = (const int *) keyPtr; - const int *iPtr2 = (const int *) hPtr->key.words; + const int *iPtr1 = (const int *)keyPtr; + const int *iPtr2 = hPtr->key.words; Tcl_HashTable *tablePtr = hPtr->tablePtr; int count; @@ -767,7 +764,7 @@ CompareArrayKeys( static TCL_HASH_TYPE HashArrayKey( Tcl_HashTable *tablePtr, /* Hash table. */ - void *keyPtr) /* Key from which to compute hash value. */ + void *keyPtr) /* Key from which to compute hash value. */ { const int *array = (const int *) keyPtr; TCL_HASH_TYPE result; @@ -799,7 +796,7 @@ HashArrayKey( static Tcl_HashEntry * AllocStringEntry( TCL_UNUSED(Tcl_HashTable *), - void *keyPtr) /* Key to store in the hash table entry. */ + void *keyPtr) /* Key to store in the hash table entry. */ { const char *string = (const char *) keyPtr; Tcl_HashEntry *hPtr; @@ -812,7 +809,7 @@ AllocStringEntry( hPtr = (Tcl_HashEntry *)ckalloc(offsetof(Tcl_HashEntry, key) + allocsize); memset(hPtr, 0, sizeof(Tcl_HashEntry) + allocsize - sizeof(hPtr->key)); memcpy(hPtr->key.string, string, size); - hPtr->clientData = 0; + Tcl_SetHashValue(hPtr, NULL); return hPtr; } @@ -835,13 +832,10 @@ AllocStringEntry( static int CompareStringKeys( - void *keyPtr, /* New key to compare. */ + void *keyPtr, /* New key to compare. */ Tcl_HashEntry *hPtr) /* Existing key to compare. */ { - const char *p1 = (const char *) keyPtr; - const char *p2 = (const char *) hPtr->key.string; - - return !strcmp(p1, p2); + return !strcmp((char *)keyPtr, hPtr->key.string); } /* @@ -864,7 +858,7 @@ CompareStringKeys( static TCL_HASH_TYPE HashStringKey( TCL_UNUSED(Tcl_HashTable *), - void *keyPtr) /* Key from which to compute hash value. */ + void *keyPtr) /* Key from which to compute hash value. */ { const char *string = (const char *)keyPtr; TCL_HASH_TYPE result; @@ -985,14 +979,14 @@ static void RebuildTable( Tcl_HashTable *tablePtr) /* Table to enlarge. */ { - int count, index, oldSize = tablePtr->numBuckets; + TCL_HASH_TYPE count, index, oldSize = tablePtr->numBuckets; Tcl_HashEntry **oldBuckets = tablePtr->buckets; Tcl_HashEntry **oldChainPtr, **newChainPtr; Tcl_HashEntry *hPtr; const Tcl_HashKeyType *typePtr; /* Avoid outgrowing capability of the memory allocators */ - if (oldSize > (int)(UINT_MAX / (4 * sizeof(Tcl_HashEntry *)))) { + if (oldSize > UINT_MAX / (4 * sizeof(Tcl_HashEntry *))) { tablePtr->rebuildSize = INT_MAX; return; } @@ -1015,7 +1009,7 @@ RebuildTable( tablePtr->numBuckets *= 4; if (typePtr->flags & TCL_HASH_KEY_SYSTEM_HASH) { - tablePtr->buckets = (Tcl_HashEntry **) TclpSysAlloc( + tablePtr->buckets = (Tcl_HashEntry **)TclpSysAlloc( tablePtr->numBuckets * sizeof(Tcl_HashEntry *), 0); } else { tablePtr->buckets = @@ -1026,7 +1020,9 @@ RebuildTable( *newChainPtr = NULL; } tablePtr->rebuildSize *= 4; - tablePtr->downShift -= 2; + if (tablePtr->downShift > 1) { + tablePtr->downShift -= 2; + } tablePtr->mask = (tablePtr->mask << 2) + 3; /* @@ -1038,7 +1034,7 @@ RebuildTable( *oldChainPtr = hPtr->nextPtr; if (typePtr->hashKeyProc == NULL || typePtr->flags & TCL_HASH_KEY_RANDOMIZE_HASH) { - index = RANDOM_INDEX(tablePtr, PTR2INT(hPtr->hash)); + index = RANDOM_INDEX(tablePtr, PTR2UINT(hPtr->hash)); } else { index = PTR2UINT(hPtr->hash) & tablePtr->mask; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 61cc3b3..6a56c10 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -89,11 +89,6 @@ #else #include #endif -#if !defined(STDC_HEADERS) && !defined(__STDC__) && !defined(__C99__FUNC__) \ - && !defined(__cplusplus) && !defined(_MSC_VER) && !defined(__ICC) -typedef int ptrdiff_t; -#endif -#include #include /* @@ -129,25 +124,16 @@ typedef int ptrdiff_t; */ #if !defined(INT2PTR) -# if defined(HAVE_INTPTR_T) || defined(intptr_t) -# define INT2PTR(p) ((void *)(intptr_t)(p)) -# else -# define INT2PTR(p) ((void *)(p)) -# endif +# define INT2PTR(p) ((void *)(ptrdiff_t)(p)) #endif #if !defined(PTR2INT) -# if defined(HAVE_INTPTR_T) || defined(intptr_t) -# define PTR2INT(p) ((intptr_t)(p)) -# else -# define PTR2INT(p) ((long)(p)) -# endif +# define PTR2INT(p) ((ptrdiff_t)(p)) +#endif +#if !defined(UINT2PTR) +# define UINT2PTR(p) ((void *)(size_t)(p)) #endif #if !defined(PTR2UINT) -# if defined(HAVE_UINTPTR_T) || defined(uintptr_t) -# define PTR2UINT(p) ((uintptr_t)(p)) -# else -# define PTR2UINT(p) ((unsigned long)(p)) -# endif +# define PTR2UINT(p) ((size_t)(p)) #endif #if defined(_WIN32) && defined(_MSC_VER) -- cgit v0.12 From 3631e889be9e340e2639829b6c0eed3470d07a4d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 6 May 2022 15:12:48 +0000 Subject: A number of fixes from zlib's 'develop' branch. --- compat/zlib/configure | 3 +++ compat/zlib/contrib/infback9/inftree9.c | 2 +- compat/zlib/contrib/minizip/iowin32.c | 5 +++++ compat/zlib/crc32.c | 8 ++++---- compat/zlib/deflate.h | 4 ++-- compat/zlib/inftrees.c | 2 +- compat/zlib/trees.c | 6 +++--- 7 files changed, 19 insertions(+), 11 deletions(-) diff --git a/compat/zlib/configure b/compat/zlib/configure index 52ff4a0..3fa3e86 100755 --- a/compat/zlib/configure +++ b/compat/zlib/configure @@ -174,7 +174,10 @@ if test -z "$CC"; then else cc=${CROSS_PREFIX}cc fi +else + cc=${CC} fi + cflags=${CFLAGS-"-O3"} # to force the asm version use: CFLAGS="-O3 -DASMV" ./configure case "$cc" in diff --git a/compat/zlib/contrib/infback9/inftree9.c b/compat/zlib/contrib/infback9/inftree9.c index 0550606..2175bde 100644 --- a/compat/zlib/contrib/infback9/inftree9.c +++ b/compat/zlib/contrib/infback9/inftree9.c @@ -64,7 +64,7 @@ unsigned short FAR *work; static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, 130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132, - 133, 133, 133, 133, 144, 199, 202}; + 133, 133, 133, 133, 144, 76, 202}; static const unsigned short dbase[32] = { /* Distance codes 0..31 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, diff --git a/compat/zlib/contrib/minizip/iowin32.c b/compat/zlib/contrib/minizip/iowin32.c index 274f39e..7df5251 100644 --- a/compat/zlib/contrib/minizip/iowin32.c +++ b/compat/zlib/contrib/minizip/iowin32.c @@ -28,6 +28,11 @@ // see Include/shared/winapifamily.h in the Windows Kit #if defined(WINAPI_FAMILY_PARTITION) && (!(defined(IOWIN32_USING_WINRT_API))) + +#if !defined(WINAPI_FAMILY_ONE_PARTITION) +#define WINAPI_FAMILY_ONE_PARTITION(PartitionSet, Partition) ((WINAPI_FAMILY & PartitionSet) == Partition) +#endif + #if WINAPI_FAMILY_ONE_PARTITION(WINAPI_FAMILY, WINAPI_PARTITION_APP) #define IOWIN32_USING_WINRT_API 1 #endif diff --git a/compat/zlib/crc32.c b/compat/zlib/crc32.c index a1bdce5..451887b 100644 --- a/compat/zlib/crc32.c +++ b/compat/zlib/crc32.c @@ -630,7 +630,7 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) #endif /* DYNAMIC_CRC_TABLE */ /* Pre-condition the CRC */ - crc ^= 0xffffffff; + crc = (~crc) & 0xffffffff; /* Compute the CRC up to a word boundary. */ while (len && ((z_size_t)buf & 7) != 0) { @@ -749,7 +749,7 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) #endif /* DYNAMIC_CRC_TABLE */ /* Pre-condition the CRC */ - crc ^= 0xffffffff; + crc = (~crc) & 0xffffffff; #ifdef W @@ -1077,7 +1077,7 @@ uLong ZEXPORT crc32_combine64(crc1, crc2, len2) #ifdef DYNAMIC_CRC_TABLE once(&made, make_crc_table); #endif /* DYNAMIC_CRC_TABLE */ - return multmodp(x2nmodp(len2, 3), crc1) ^ crc2; + return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); } /* ========================================================================= */ @@ -1112,5 +1112,5 @@ uLong crc32_combine_op(crc1, crc2, op) uLong crc2; uLong op; { - return multmodp(op, crc1) ^ crc2; + return multmodp(op, crc1) ^ (crc2 & 0xffffffff); } diff --git a/compat/zlib/deflate.h b/compat/zlib/deflate.h index 17c2261..1a06cd5 100644 --- a/compat/zlib/deflate.h +++ b/compat/zlib/deflate.h @@ -329,8 +329,8 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, # define _tr_tally_dist(s, distance, length, flush) \ { uch len = (uch)(length); \ ush dist = (ush)(distance); \ - s->sym_buf[s->sym_next++] = dist; \ - s->sym_buf[s->sym_next++] = dist >> 8; \ + s->sym_buf[s->sym_next++] = (uch)dist; \ + s->sym_buf[s->sym_next++] = (uch)(dist >> 8); \ s->sym_buf[s->sym_next++] = len; \ dist--; \ s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ diff --git a/compat/zlib/inftrees.c b/compat/zlib/inftrees.c index 09462a7..a2f386c 100644 --- a/compat/zlib/inftrees.c +++ b/compat/zlib/inftrees.c @@ -62,7 +62,7 @@ unsigned short FAR *work; 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 199, 202}; + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 76, 202}; static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, diff --git a/compat/zlib/trees.c b/compat/zlib/trees.c index f73fd99..8b438cc 100644 --- a/compat/zlib/trees.c +++ b/compat/zlib/trees.c @@ -1017,9 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) unsigned dist; /* distance of matched string */ unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ { - s->sym_buf[s->sym_next++] = dist; - s->sym_buf[s->sym_next++] = dist >> 8; - s->sym_buf[s->sym_next++] = lc; + s->sym_buf[s->sym_next++] = (uch)dist; + s->sym_buf[s->sym_next++] = (uch)(dist >> 8); + s->sym_buf[s->sym_next++] = (uch)lc; if (dist == 0) { /* lc is the unmatched char */ s->dyn_ltree[lc].Freq++; -- cgit v0.12 From 8a2c8e2ec7abae84c15d9888e7c0eeba4397b6ee Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 9 May 2022 07:15:13 +0000 Subject: rebuild zlib1.dll 1.2.12.1 for i686 and x86_64 --- compat/zlib/win32/zlib1.dll | Bin 122880 -> 122880 bytes compat/zlib/win64/zlib1.dll | Bin 135168 -> 134144 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/compat/zlib/win32/zlib1.dll b/compat/zlib/win32/zlib1.dll index 6c45f4d..e87de8c 100755 Binary files a/compat/zlib/win32/zlib1.dll and b/compat/zlib/win32/zlib1.dll differ diff --git a/compat/zlib/win64/zlib1.dll b/compat/zlib/win64/zlib1.dll index a5c6da6..9e38c08 100755 Binary files a/compat/zlib/win64/zlib1.dll and b/compat/zlib/win64/zlib1.dll differ -- cgit v0.12 From d166e4d7a11a0f526ddd5667210777e260609595 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 9 May 2022 19:22:51 +0000 Subject: rebuild zlib1.dll 1.2.12.1 for aarch64 (win64-arm) --- compat/zlib/win64-arm/zlib1.dll | Bin 95232 -> 95232 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/compat/zlib/win64-arm/zlib1.dll b/compat/zlib/win64-arm/zlib1.dll index 161b60f..9025467 100755 Binary files a/compat/zlib/win64-arm/zlib1.dll and b/compat/zlib/win64-arm/zlib1.dll differ -- cgit v0.12 -- cgit v0.12 From 3bdcb898d3c643d00352fcebe7fe7a3b79d2e9cc Mon Sep 17 00:00:00 2001 From: kjnash Date: Tue, 10 May 2022 07:48:31 +0000 Subject: Fix the bug. Standardise and document protocol upgrades. --- doc/http.n | 34 ++++++++++++++++++++++++++++ library/http/http.tcl | 57 ++++++++++++++++++++++++++++++++++++++++++++--- library/http/pkgIndex.tcl | 2 +- unix/Makefile.in | 4 ++-- win/Makefile.in | 4 ++-- 5 files changed, 93 insertions(+), 8 deletions(-) diff --git a/doc/http.n b/doc/http.n index 181b48b..a451f80 100644 --- a/doc/http.n +++ b/doc/http.n @@ -786,6 +786,40 @@ Subsequent GET and HEAD requests in a failed pipeline will also be retried. that the retry is appropriate\fR - specifically, the application must know that if the failed POST successfully modified the state of the server, a repeat POST would have no adverse effect. +.SH "PROTOCOL UPGRADES" +.PP +The HTTP/1.1 \fBConnection\fR and \fBUpgrade\fR client headers inform the server +that the client wishes to change the protocol used over the existing connection +(RFC 7230). This mechanism can be used to request a WebSocket (RFC 6455), a +higher version of the HTTP protocol (HTTP 2 or 3), or TLS encryption. If the +server accepts the upgrade request, its response code will be 101. +.PP +To request a protocol upgrade when calling \fBhttp::geturl\fR, the \fB-headers\fR +option must supply appropriate values for \fBConnection\fR and \fBUpgrade\fR, and +the \fB-command\fR option must supply a command that implements the requested +protocol and can also handle the server response if the server refuses the +protocol upgrade. For upgrade requests \fBhttp::geturl\fR ignores the value of +option \fB-keepalive\fR, and always uses the value \fB0\fR so that the upgrade +request is not made over a connection that is intended for multiple HTTP requests. +.PP +The Tcllib library \fBwebsocket\fR implements WebSockets, and makes the necessary +calls to commands in the \fBhttp\fR package. +.PP +There is currently no native Tcl client library for HTTP/2 or HTTP/3. +.PP +The \fBUpgrade\fR mechanism is not used to request TLS in web browsers, because +\fBhttp\fR and \fBhttps\fR are served over different ports. It is used by +protocols such as Internet Printing Protocol (IPP) that are built on top of +\fBhttp(s)\fR and use the same TCP port number for both secure and insecure +traffic. +.PP +In browsers, opportunistic encryption is instead implemented by the +\fBUpgrade-Insecure-Requests\fR client header. If a secure service is available, +the server response code is a 307 redirect, and the response header +\fBLocation\fR specifies the target URL. The browser must call \fBhttp::geturl\fR +again in order to fetch this URL. +See https://w3c.github.io/webappsec-upgrade-insecure-requests/ +.PP .SH EXAMPLE .PP This example creates a procedure to copy a URL to a file while printing a diff --git a/library/http/http.tcl b/library/http/http.tcl index 326d9d8..92d3a5a 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -11,7 +11,7 @@ package require Tcl 8.6- # Keep this in sync with pkgIndex.tcl and with the install directories in # Makefiles -package provide http 2.9.6 +package provide http 2.9.7 namespace eval http { # Allow resourcing to not clobber existing data @@ -255,10 +255,49 @@ proc http::Finish {token {errormsg ""} {skipCB 0}} { if {[info commands ${token}EventCoroutine] ne {}} { rename ${token}EventCoroutine {} } + + # Is this an upgrade request/response? + set upgradeResponse 0 + if { [info exists state(upgradeRequest)] + && [info exists state(http)] + && $state(upgradeRequest) + && ([ncode $token] eq {101}) + } { + # An upgrade must be requested by the client. + # If 101 response, test server response headers for an upgrade. + set connectionHd {} + set upgradeHd {} + if {[dict exists $state(meta) connection]} { + set connectionHd [string tolower [dict get $state(meta) connection]] + } + if {[dict exists $state(meta) upgrade]} { + set upgradeHd [string tolower [dict get $state(meta) upgrade]] + } + if {($connectionHd eq {upgrade}) && ($upgradeHd ne {})} { + set upgradeResponse 1 + } + } + if { ($state(status) eq "timeout") || ($state(status) eq "error") || ($state(status) eq "eof") - || ([info exists state(-keepalive)] && !$state(-keepalive)) + } { + set closeQueue 1 + set connId $state(socketinfo) + set sock $state(sock) + CloseSocket $state(sock) $token + } elseif {$upgradeResponse} { + # Special handling for an upgrade request/response. + # - geturl ensures that this is not a "persistent" socket used for + # multiple HTTP requests, so a call to KeepSocket is not needed. + # - Leave socket open, so a call to CloseSocket is not needed either. + # - Remove fileevent bindings. The caller will set its own bindings. + # - THE CALLER MUST PROCESS THE UPGRADED SOCKET IN THE CALLBACK COMMAND + # PASSED TO http::geturl AS -command callback. + catch {fileevent $state(sock) readable {}} + catch {fileevent $state(sock) writable {}} + } elseif { + ([info exists state(-keepalive)] && !$state(-keepalive)) || ([info exists state(connection)] && ($state(connection) eq "close")) } { set closeQueue 1 @@ -946,6 +985,13 @@ proc http::geturl {url args} { # c11a51c482] set state(accept-types) $http(-accept) + set state(upgradeRequest) [expr { + [dict exists $state(-headers) Upgrade] + && [dict exists $state(-headers) Connection] + && ([dict get $state(-headers) Connection] eq {Upgrade}) + && ([dict get $state(-headers) Upgrade] ne {}) + }] + if {$isQuery || $isQueryChannel} { # It's a POST. # A client wishing to send a non-idempotent request SHOULD wait to send @@ -961,8 +1007,13 @@ proc http::geturl {url args} { # There is a small risk of a race against server timeout. set state(-pipeline) 0 } + } elseif {$state(upgradeRequest)} { + # It's an upgrade request. Method must be GET (untested). + # Force -keepalive to 0 so the connection is not made over a persistent + # socket, i.e. one used for multiple HTTP requests. + set state(-keepalive) 0 } else { - # It's a GET or HEAD. + # It's a non-upgrade GET or HEAD. set state(-pipeline) $http(-pipeline) } diff --git a/library/http/pkgIndex.tcl b/library/http/pkgIndex.tcl index 7249547..e12cf84 100644 --- a/library/http/pkgIndex.tcl +++ b/library/http/pkgIndex.tcl @@ -1,2 +1,2 @@ if {![package vsatisfies [package provide Tcl] 8.6-]} {return} -package ifneeded http 2.9.6 [list tclPkgSetup $dir http 2.9.6 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister ::http::mapReply}}}] +package ifneeded http 2.9.7 [list tclPkgSetup $dir http 2.9.7 {{http.tcl source {::http::config ::http::formatQuery ::http::geturl ::http::reset ::http::wait ::http::register ::http::unregister ::http::mapReply}}}] diff --git a/unix/Makefile.in b/unix/Makefile.in index 2acaf8a..de7f25b 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -950,9 +950,9 @@ install-libraries: libraries do \ $(INSTALL_DATA) $$i "$(SCRIPT_INSTALL_DIR)/http1.0"; \ done - @echo "Installing package http 2.9.6 as a Tcl Module"; + @echo "Installing package http 2.9.7 as a Tcl Module"; @$(INSTALL_DATA) $(TOP_DIR)/library/http/http.tcl \ - "$(MODULE_INSTALL_DIR)/8.6/http-2.9.6.tm" + "$(MODULE_INSTALL_DIR)/8.6/http-2.9.7.tm" @echo "Installing package opt0.4 files to $(SCRIPT_INSTALL_DIR)/opt0.4/" @for i in $(TOP_DIR)/library/opt/*.tcl; do \ $(INSTALL_DATA) $$i "$(SCRIPT_INSTALL_DIR)/opt0.4"; \ diff --git a/win/Makefile.in b/win/Makefile.in index 99f04c8..37d579b 100644 --- a/win/Makefile.in +++ b/win/Makefile.in @@ -735,8 +735,8 @@ install-libraries: libraries install-tzdata install-msgs do \ $(COPY) "$$j" "$(SCRIPT_INSTALL_DIR)/http1.0"; \ done; - @echo "Installing package http 2.9.6 as a Tcl Module"; - @$(COPY) $(ROOT_DIR)/library/http/http.tcl "$(MODULE_INSTALL_DIR)/8.6/http-2.9.6.tm"; + @echo "Installing package http 2.9.7 as a Tcl Module"; + @$(COPY) $(ROOT_DIR)/library/http/http.tcl "$(MODULE_INSTALL_DIR)/8.6/http-2.9.7.tm"; @echo "Installing library opt0.4 directory"; @for j in $(ROOT_DIR)/library/opt/*.tcl; \ do \ -- cgit v0.12 From 0a5154ce87f664c62309803df4d7b8ab14b4721d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 10 May 2022 16:38:22 +0000 Subject: Now we're on it, keep better track with state(host) and state(path). Backported from http 2.10 --- library/http/http.tcl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index 92d3a5a..e754264 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -931,8 +931,12 @@ proc http::geturl {url args} { } return -code error "Illegal characters in URL path" } + if {![regexp {^[^?#]+} $srvurl state(path)]} { + set state(path) / + } } else { set srvurl / + set state(path) / } if {$proto eq ""} { set proto http @@ -1409,12 +1413,16 @@ proc http::Connected {token proto phost srvurl} { puts $sock "$how $srvurl HTTP/$state(-protocol)" if {[dict exists $state(-headers) Host]} { # Allow Host spoofing. [Bug 928154] - puts $sock "Host: [dict get $state(-headers) Host]" + set hostHdr [dict get $state(-headers) Host] + regexp {^[^:]+} $hostHdr state(host) + puts $sock "Host: $hostHdr" } elseif {$port == $defport} { # Don't add port in this case, to handle broken servers. [Bug # #504508] + set state(host) $host puts $sock "Host: $host" } else { + set state(host) $host puts $sock "Host: $host:$port" } puts $sock "User-Agent: $http(-useragent)" @@ -3175,7 +3183,7 @@ proc http::BlockingGets {sock} { while 1 { set count [gets $sock line] set eof [eof $sock] - if {$count > -1 || $eof} { + if {$count >= 0 || $eof} { return $line } else { yield -- cgit v0.12 -- cgit v0.12 From e95aa99b53ecc065995e046417ae4379a4b0c160 Mon Sep 17 00:00:00 2001 From: kjnash Date: Wed, 11 May 2022 17:21:37 +0000 Subject: Revise http(n) to fix #15e4bd83c2 --- doc/http.n | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/http.n b/doc/http.n index a451f80..ed698f2 100644 --- a/doc/http.n +++ b/doc/http.n @@ -318,7 +318,8 @@ otherwise complain about HTTP/1.1. .TP \fB\-query\fR \fIquery\fR . -This flag causes \fB::http::geturl\fR to do a POST request that passes the +This flag (if the value is non-empty) causes \fB::http::geturl\fR to do a +POST request that passes the \fIquery\fR as payload verbatim to the server. The content format (and encoding) of \fIquery\fR is announced by the header field \fBcontent-type\fR set by the option \fB-type\fR. -- cgit v0.12 From f9d3eb28e94e4e1a235e70e3ca01c25e5aeeaed0 Mon Sep 17 00:00:00 2001 From: kjnash Date: Wed, 11 May 2022 18:13:32 +0000 Subject: Revise http(n) to fix #1414262 --- doc/http.n | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/doc/http.n b/doc/http.n index ed698f2..7bcf21c 100644 --- a/doc/http.n +++ b/doc/http.n @@ -135,6 +135,13 @@ the proxy server and proxy port. Otherwise the filter should return an empty list. The default filter returns the values of the \fB\-proxyhost\fR and \fB\-proxyport\fR settings if they are non-empty. +.RS +.PP +The \fB::http::geturl\fR command runs the \fB-proxyfilter\fR callback inside +a \fBcatch\fR command. Therefore an error in the callback command does +not call the \fBbgerror\fR handler. See the \fBERRORS\fR section for +details. +.RE .TP \fB\-repost\fR \fIboolean\fR . @@ -228,6 +235,11 @@ proc httpCallback {token} { # Access state as a Tcl array } .CE +.PP +The \fB::http::geturl\fR command runs the \fB-command\fR callback inside +a \fBcatch\fR command. Therefore an error in the callback command does +not call the \fBbgerror\fR handler. See the \fBERRORS\fR section for +details. .RE .TP \fB\-handler\fR \fIcallback\fR @@ -257,6 +269,11 @@ proc httpHandlerCallback {socket token} { The \fBhttp::geturl\fR code for the \fB-handler\fR option is not compatible with either compression or chunked transfer-encoding. If \fB-handler\fR is specified, then to work around these issues \fBhttp::geturl\fR will reduce the HTTP protocol to 1.0, and override the \fB-zip\fR option (i.e. it will not send the header "\fBAccept-Encoding: gzip,deflate,compress\fR"). .PP If options \fB-handler\fR and \fB-channel\fR are used together, the handler is responsible for copying the data from the HTTP socket to the specified channel. The name of the channel is available to the handler as element \fB-channel\fR of the token array. +.PP +The \fB::http::geturl\fR command runs the \fB-handler\fR callback inside +a \fBcatch\fR command. Therefore an error in the callback command does +not call the \fBbgerror\fR handler. See the \fBERRORS\fR section for +details. .RE .TP \fB\-headers\fR \fIkeyvaluelist\fR @@ -523,6 +540,14 @@ to know the result of the asynchronous HTTP request, it can call \fB::http::wait\fR and then check status and error, just as the callback does. .PP +The \fB::http::geturl\fR command runs the \fB-command\fR, \fB-handler\fR, +and \fB-proxyfilter\fR callbacks inside a \fBcatch\fR command. Therefore +an error in the callback command does not call the \fBbgerror\fR handler. +When debugging one of these +callbacks, it may be convenient to report errors by using a +\fBcatch\fR command within the callback command itself, e.g. to write +an error message to stdout. +.PP In any case, you must still call \fB::http::cleanup\fR to delete the state array when you are done. .PP -- cgit v0.12 From cdea365a569c1ba3e22c816c475847b4bd3f5085 Mon Sep 17 00:00:00 2001 From: kjnash Date: Wed, 11 May 2022 18:24:00 +0000 Subject: Revise http(n) to fix #443195 --- doc/http.n | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/http.n b/doc/http.n index 7bcf21c..c4a8688 100644 --- a/doc/http.n +++ b/doc/http.n @@ -49,6 +49,11 @@ http \- Client-side implementation of the HTTP/1.1 protocol \fB::http::registerError \fIport\fR ?\fImessage\fR? .sp \fB::http::unregister \fIproto\fR +.SH "EXPORTED COMMANDS" +.PP +Namespace \fBhttp\fR exports the commands \fBconfig\fR, \fBformatQuery\fR, \fBgeturl\fR, \fBquoteString\fR, \fBregister\fR, \fBregisterError\fR, \fBreset\fR, \fBunregister\fR, and \fBwait\fR. +.PP +It does not export the commands \fBcleanup\fR, \fBcode\fR, \fBdata\fR, \fBerror\fR, \fBmeta\fR, \fBncode\fR, \fBsize\fR, or \fBstatus\fR. .BE .SH DESCRIPTION .PP -- cgit v0.12 From a96ca020dfbf40d8a61669620d17c5a1e455a99a Mon Sep 17 00:00:00 2001 From: kjnash Date: Wed, 11 May 2022 18:25:55 +0000 Subject: Revise library/http/http.tcl to fix shift_jis typo and #6898f9cb71 --- library/http/http.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index e754264..838fbee 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -3479,7 +3479,7 @@ proc http::CharsetToEncoding {charset} { set encoding "iso8859-$num" } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} { set encoding "iso2022-$ext" - } elseif {[regexp {shift[-_]?js} $charset]} { + } elseif {[regexp {shift[-_]?jis} $charset]} { set encoding "shiftjis" } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} { set encoding "cp$num" -- cgit v0.12 From b9ef9f9cefccc2ef851149acbd14c280dd70e622 Mon Sep 17 00:00:00 2001 From: kjnash Date: Wed, 11 May 2022 18:48:20 +0000 Subject: Revise http(n) to fix #14e3c258d9 --- doc/http.n | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/doc/http.n b/doc/http.n index c4a8688..47ed057 100644 --- a/doc/http.n +++ b/doc/http.n @@ -341,12 +341,20 @@ otherwise complain about HTTP/1.1. \fB\-query\fR \fIquery\fR . This flag (if the value is non-empty) causes \fB::http::geturl\fR to do a -POST request that passes the -\fIquery\fR as payload verbatim to the server. -The content format (and encoding) of \fIquery\fR is announced by the header -field \fBcontent-type\fR set by the option \fB-type\fR. -\fIquery\fR is an x-url-encoding formatted query, if used for html forms. -The \fB::http::formatQuery\fR procedure can be used to do the formatting. +POST request that passes the string +\fIquery\fR verbatim to the server as the request payload. +The content format (and encoding) of \fIquery\fR is announced by the request +header \fBContent-Type\fR which is set by the option \fB-type\fR. Any value +of \fB-type\fR is permitted, and it is the responsibility of the caller to +supply \fIquery\fR in the correct format. +.RS +.PP +If \fB-type\fR is not specified, it defaults to +\fIapplication/x-www-form-urlencoded\fR, which requires \fIquery\fR to be an +x-url-encoding formatted query-string (this \fB-type\fR and query format are +used in a POST submitted from an html form). The \fB::http::formatQuery\fR +procedure can be used to do the formatting. +.RE .TP \fB\-queryblocksize\fR \fIsize\fR . -- cgit v0.12 From 355490c37c7a596e846ffeb26c4d82f2dfed9d37 Mon Sep 17 00:00:00 2001 From: kjnash Date: Wed, 11 May 2022 19:05:33 +0000 Subject: Revise http(n) to fix #6afa947d1a --- doc/http.n | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/http.n b/doc/http.n index 47ed057..b5197a9 100644 --- a/doc/http.n +++ b/doc/http.n @@ -306,8 +306,16 @@ multiple requests. Default is 0. \fB\-method\fR \fItype\fR . Force the HTTP request method to \fItype\fR. \fB::http::geturl\fR will -auto-select GET, POST or HEAD based on other options, but this option -enables choices like PUT and DELETE for webdav support. +auto-select GET, POST or HEAD based on other options, but this option overrides +that selection and enables choices like PUT and DELETE for WebDAV support. +.RS +.PP +It is the caller's responsibility to ensure that the headers and request body +(if any) conform to the requirements of the request method. For example, if +using \fB\-method\fR \fIPOST\fR to send a POST with an empty request body, the +caller must also supply the option +.QW "-headers {Content-Length 0}" . +.RE .TP \fB\-myaddr\fR \fIaddress\fR . -- cgit v0.12 From 6d44e5c0f77c03a6daaf47db88c752018d980055 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 11 May 2022 19:06:56 +0000 Subject: Fix [6898f9cb71]: shiftjis is presumably misspelled in http 2.7.7. Also add more testcases for http::CharsetToEncoding, revealing one more bug --- library/http/http.tcl | 5 ++++- tests/http.test | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index e754264..ae0a538 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -3479,7 +3479,7 @@ proc http::CharsetToEncoding {charset} { set encoding "iso8859-$num" } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} { set encoding "iso2022-$ext" - } elseif {[regexp {shift[-_]?js} $charset]} { + } elseif {[regexp {shift[-_]?jis} $charset]} { set encoding "shiftjis" } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} { set encoding "cp$num" @@ -3491,6 +3491,9 @@ proc http::CharsetToEncoding {charset} { 1 - 2 - 3 { set encoding "iso8859-$num" } + default { + set encoding "binary" + } } } else { # other charset, like euc-xx, utf-8,... may directly map to encoding diff --git a/tests/http.test b/tests/http.test index c45a45a..40113dc 100644 --- a/tests/http.test +++ b/tests/http.test @@ -119,6 +119,27 @@ test http-1.6 {http::config} -setup { test http-2.1 {http::reset} { catch {http::reset http#1} } 0 +test http-2.2 {http::CharsetToEncoding} { + http::CharsetToEncoding iso-8859-11 +} iso8859-11 +test http-2.3 {http::CharsetToEncoding} { + http::CharsetToEncoding iso-2022-kr +} iso2022-kr +test http-2.4 {http::CharsetToEncoding} { + http::CharsetToEncoding shift-jis +} shiftjis +test http-2.5 {http::CharsetToEncoding} { + http::CharsetToEncoding windows-437 +} cp437 +test http-2.6 {http::CharsetToEncoding} { + http::CharsetToEncoding latin5 +} iso8859-9 +test http-2.7 {http::CharsetToEncoding} { + http::CharsetToEncoding latin1 +} iso8859-1 +test http-2.8 {http::CharsetToEncoding} { + http::CharsetToEncoding latin4 +} binary test http-3.1 {http::geturl} -returnCodes error -body { http::geturl -bogus flag -- cgit v0.12 From b68a248552b95b41779deac3bb98023718502216 Mon Sep 17 00:00:00 2001 From: kjnash Date: Wed, 11 May 2022 20:00:48 +0000 Subject: In http(n), escape - to \- in all command options except example code. --- doc/http.n | 62 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/doc/http.n b/doc/http.n index b5197a9..89af32f 100644 --- a/doc/http.n +++ b/doc/http.n @@ -84,7 +84,7 @@ must be active. In Tk applications this is always true. For pure-Tcl applications, the caller can use \fB::http::wait\fR after calling \fB::http::geturl\fR to start the event loop. .PP -\fBNote:\fR The event queue is even used without the \fB-command\fR option. +\fBNote:\fR The event queue is even used without the \fB\-command\fR option. As a side effect, arbitrary commands may be processed while \fBhttp::geturl\fR is running. .SH COMMANDS .TP @@ -116,7 +116,7 @@ is 1. \fB\-postfresh\fR \fIboolean\fR . Specifies whether requests that use the \fBPOST\fR method will always use a -fresh socket, overriding the \fB-keepalive\fR option of +fresh socket, overriding the \fB\-keepalive\fR option of command \fBhttp::geturl\fR. See the \fBPERSISTENT SOCKETS\fR section for details. The default is 0. .TP @@ -142,7 +142,7 @@ an empty list. The default filter returns the values of the non-empty. .RS .PP -The \fB::http::geturl\fR command runs the \fB-proxyfilter\fR callback inside +The \fB::http::geturl\fR command runs the \fB\-proxyfilter\fR callback inside a \fBcatch\fR command. Therefore an error in the callback command does not call the \fBbgerror\fR handler. See the \fBERRORS\fR section for details. @@ -187,7 +187,7 @@ If the value is boolean \fBtrue\fR, then by default requests will send a header .QW "\fBAccept-Encoding: gzip,deflate,compress\fR" . If the value is boolean \fBfalse\fR, then by default this header will not be sent. In either case the default can be overridden for an individual request by -supplying a custom \fBAccept-Encoding\fR header in the \fB-headers\fR option +supplying a custom \fBAccept-Encoding\fR header in the \fB\-headers\fR option of \fBhttp::geturl\fR. The default is 1. .RE .TP @@ -241,7 +241,7 @@ proc httpCallback {token} { } .CE .PP -The \fB::http::geturl\fR command runs the \fB-command\fR callback inside +The \fB::http::geturl\fR command runs the \fB\-command\fR callback inside a \fBcatch\fR command. Therefore an error in the callback command does not call the \fBbgerror\fR handler. See the \fBERRORS\fR section for details. @@ -271,11 +271,11 @@ proc httpHandlerCallback {socket token} { } .CE .PP -The \fBhttp::geturl\fR code for the \fB-handler\fR option is not compatible with either compression or chunked transfer-encoding. If \fB-handler\fR is specified, then to work around these issues \fBhttp::geturl\fR will reduce the HTTP protocol to 1.0, and override the \fB-zip\fR option (i.e. it will not send the header "\fBAccept-Encoding: gzip,deflate,compress\fR"). +The \fBhttp::geturl\fR code for the \fB\-handler\fR option is not compatible with either compression or chunked transfer-encoding. If \fB\-handler\fR is specified, then to work around these issues \fBhttp::geturl\fR will reduce the HTTP protocol to 1.0, and override the \fB\-zip\fR option (i.e. it will not send the header "\fBAccept-Encoding: gzip,deflate,compress\fR"). .PP -If options \fB-handler\fR and \fB-channel\fR are used together, the handler is responsible for copying the data from the HTTP socket to the specified channel. The name of the channel is available to the handler as element \fB-channel\fR of the token array. +If options \fB\-handler\fR and \fB\-channel\fR are used together, the handler is responsible for copying the data from the HTTP socket to the specified channel. The name of the channel is available to the handler as element \fB\-channel\fR of the token array. .PP -The \fB::http::geturl\fR command runs the \fB-handler\fR callback inside +The \fB::http::geturl\fR command runs the \fB\-handler\fR callback inside a \fBcatch\fR command. Therefore an error in the callback command does not call the \fBbgerror\fR handler. See the \fBERRORS\fR section for details. @@ -314,7 +314,7 @@ It is the caller's responsibility to ensure that the headers and request body (if any) conform to the requirements of the request method. For example, if using \fB\-method\fR \fIPOST\fR to send a POST with an empty request body, the caller must also supply the option -.QW "-headers {Content-Length 0}" . +.QW "\-headers {Content-Length 0}" . .RE .TP \fB\-myaddr\fR \fIaddress\fR @@ -352,14 +352,14 @@ This flag (if the value is non-empty) causes \fB::http::geturl\fR to do a POST request that passes the string \fIquery\fR verbatim to the server as the request payload. The content format (and encoding) of \fIquery\fR is announced by the request -header \fBContent-Type\fR which is set by the option \fB-type\fR. Any value -of \fB-type\fR is permitted, and it is the responsibility of the caller to +header \fBContent-Type\fR which is set by the option \fB\-type\fR. Any value +of \fB\-type\fR is permitted, and it is the responsibility of the caller to supply \fIquery\fR in the correct format. .RS .PP -If \fB-type\fR is not specified, it defaults to +If \fB\-type\fR is not specified, it defaults to \fIapplication/x-www-form-urlencoded\fR, which requires \fIquery\fR to be an -x-url-encoding formatted query-string (this \fB-type\fR and query format are +x-url-encoding formatted query-string (this \fB\-type\fR and query format are used in a POST submitted from an html form). The \fB::http::formatQuery\fR procedure can be used to do the formatting. .RE @@ -561,8 +561,8 @@ to know the result of the asynchronous HTTP request, it can call \fB::http::wait\fR and then check status and error, just as the callback does. .PP -The \fB::http::geturl\fR command runs the \fB-command\fR, \fB-handler\fR, -and \fB-proxyfilter\fR callbacks inside a \fBcatch\fR command. Therefore +The \fB::http::geturl\fR command runs the \fB\-command\fR, \fB\-handler\fR, +and \fB\-proxyfilter\fR callbacks inside a \fBcatch\fR command. Therefore an error in the callback command does not call the \fBbgerror\fR handler. When debugging one of these callbacks, it may be convenient to report errors by using a @@ -751,9 +751,9 @@ whether the server was modified by the failed POST request, before sending the same request again. .PP A HTTP request will use a persistent socket if the call to -\fBhttp::geturl\fR has the option \fB-keepalive true\fR. It will use +\fBhttp::geturl\fR has the option \fB\-keepalive true\fR. It will use pipelining where permitted if the \fBhttp::config\fR option -\fB-pipeline\fR is boolean \fBtrue\fR (its default value). +\fB\-pipeline\fR is boolean \fBtrue\fR (its default value). .PP The http package maintains no more than one persistent connection to each server (i.e. each value of @@ -775,7 +775,7 @@ In accordance with RFC 7230, \fBhttp::geturl\fR does not pipeline requests that use the POST method. If a POST uses a persistent connection and is not the first request on that connection, \fBhttp::geturl\fR waits until it has received the response for the previous -request; or (if \fBhttp::config\fR option \fB-postfresh\fR is boolean \fBtrue\fR) it +request; or (if \fBhttp::config\fR option \fB\-postfresh\fR is boolean \fBtrue\fR) it uses a new connection for each POST. .PP If the server is processing a number of pipelined requests, and sends a @@ -796,7 +796,7 @@ GET requests, \fBhttp::geturl\fR opens another connection and retransmits the failed request. However, if the request was a POST, RFC 7230 forbids automatic retry by default, suggesting either user confirmation, or confirmation by user-agent software that has semantic understanding of -the application. The \fBhttp::config\fR option \fB-repost\fR allows for +the application. The \fBhttp::config\fR option \fB\-repost\fR allows for either possibility. .PP Asynchronous close events can occur only in a short interval of time. The @@ -804,16 +804,16 @@ Asynchronous close events can occur only in a short interval of time. The server. Upon detection, the connection is also closed at the client end, and subsequent requests will use a fresh connection. .PP -If the \fBhttp::geturl\fR command is called with option \fB-keepalive true\fR, +If the \fBhttp::geturl\fR command is called with option \fB\-keepalive true\fR, then it will both try to use an existing persistent connection (if one is available), and it will send the server a .QW "\fBConnection: keep-alive\fR" request header asking to keep the connection open for future requests. .PP -The \fBhttp::config\fR options \fB-pipeline\fR, \fB-postfresh\fR, and -\fB-repost\fR relate to persistent connections. +The \fBhttp::config\fR options \fB\-pipeline\fR, \fB\-postfresh\fR, and +\fB\-repost\fR relate to persistent connections. .PP -Option \fB-pipeline\fR, if boolean \fBtrue\fR, will pipeline GET and HEAD requests +Option \fB\-pipeline\fR, if boolean \fBtrue\fR, will pipeline GET and HEAD requests made over a persistent connection. POST requests will not be pipelined - if the POST is not the first transaction on the connection, its request will not @@ -821,15 +821,15 @@ be sent until the previous response has finished. GET and HEAD requests made after a POST will not be sent until the POST response has been delivered, and will not be sent if the POST fails. .PP -Option \fB-postfresh\fR, if boolean \fBtrue\fR, will override the \fBhttp::geturl\fR option -\fB-keepalive\fR, and always open a fresh connection for a POST request. +Option \fB\-postfresh\fR, if boolean \fBtrue\fR, will override the \fBhttp::geturl\fR option +\fB\-keepalive\fR, and always open a fresh connection for a POST request. .PP -Option \fB-repost\fR, if \fBtrue\fR, permits automatic retry of a POST request +Option \fB\-repost\fR, if \fBtrue\fR, permits automatic retry of a POST request that fails because it uses a persistent connection that the server has half-closed (an .QW "asynchronous close event" ). Subsequent GET and HEAD requests in a failed pipeline will also be retried. -\fIThe -repost option should be used only if the application understands +\fIThe \-repost option should be used only if the application understands that the retry is appropriate\fR - specifically, the application must know that if the failed POST successfully modified the state of the server, a repeat POST would have no adverse effect. @@ -841,12 +841,12 @@ that the client wishes to change the protocol used over the existing connection higher version of the HTTP protocol (HTTP 2 or 3), or TLS encryption. If the server accepts the upgrade request, its response code will be 101. .PP -To request a protocol upgrade when calling \fBhttp::geturl\fR, the \fB-headers\fR +To request a protocol upgrade when calling \fBhttp::geturl\fR, the \fB\-headers\fR option must supply appropriate values for \fBConnection\fR and \fBUpgrade\fR, and -the \fB-command\fR option must supply a command that implements the requested +the \fB\-command\fR option must supply a command that implements the requested protocol and can also handle the server response if the server refuses the protocol upgrade. For upgrade requests \fBhttp::geturl\fR ignores the value of -option \fB-keepalive\fR, and always uses the value \fB0\fR so that the upgrade +option \fB\-keepalive\fR, and always uses the value \fB0\fR so that the upgrade request is not made over a connection that is intended for multiple HTTP requests. .PP The Tcllib library \fBwebsocket\fR implements WebSockets, and makes the necessary @@ -902,7 +902,7 @@ proc httpcopy { url file {chunk 4096} } { return $token } proc httpCopyProgress {args} { - puts -nonewline stderr . + puts \-nonewline stderr . flush stderr } .CE -- cgit v0.12 From 511688085318c6f4a765786b9ea88a8f77861d6c Mon Sep 17 00:00:00 2001 From: kjnash Date: Wed, 11 May 2022 20:08:57 +0000 Subject: In http(n), repaginate long lines. --- doc/http.n | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/doc/http.n b/doc/http.n index 89af32f..9617934 100644 --- a/doc/http.n +++ b/doc/http.n @@ -51,9 +51,12 @@ http \- Client-side implementation of the HTTP/1.1 protocol \fB::http::unregister \fIproto\fR .SH "EXPORTED COMMANDS" .PP -Namespace \fBhttp\fR exports the commands \fBconfig\fR, \fBformatQuery\fR, \fBgeturl\fR, \fBquoteString\fR, \fBregister\fR, \fBregisterError\fR, \fBreset\fR, \fBunregister\fR, and \fBwait\fR. +Namespace \fBhttp\fR exports the commands \fBconfig\fR, \fBformatQuery\fR, +\fBgeturl\fR, \fBquoteString\fR, \fBregister\fR, \fBregisterError\fR, +\fBreset\fR, \fBunregister\fR, and \fBwait\fR. .PP -It does not export the commands \fBcleanup\fR, \fBcode\fR, \fBdata\fR, \fBerror\fR, \fBmeta\fR, \fBncode\fR, \fBsize\fR, or \fBstatus\fR. +It does not export the commands \fBcleanup\fR, \fBcode\fR, \fBdata\fR, +\fBerror\fR, \fBmeta\fR, \fBncode\fR, \fBsize\fR, or \fBstatus\fR. .BE .SH DESCRIPTION .PP @@ -85,7 +88,8 @@ applications, the caller can use \fB::http::wait\fR after calling \fB::http::geturl\fR to start the event loop. .PP \fBNote:\fR The event queue is even used without the \fB\-command\fR option. -As a side effect, arbitrary commands may be processed while \fBhttp::geturl\fR is running. +As a side effect, arbitrary commands may be processed while \fBhttp::geturl\fR +is running. .SH COMMANDS .TP \fB::http::config\fR ?\fIoptions\fR? @@ -117,8 +121,8 @@ is 1. . Specifies whether requests that use the \fBPOST\fR method will always use a fresh socket, overriding the \fB\-keepalive\fR option of -command \fBhttp::geturl\fR. See the \fBPERSISTENT SOCKETS\fR section for details. -The default is 0. +command \fBhttp::geturl\fR. See the \fBPERSISTENT SOCKETS\fR section for +details. The default is 0. .TP \fB\-proxyhost\fR \fIhostname\fR . @@ -185,8 +189,8 @@ numbers of \fBhttp\fR and \fBTcl\fR. . If the value is boolean \fBtrue\fR, then by default requests will send a header .QW "\fBAccept-Encoding: gzip,deflate,compress\fR" . -If the value is boolean \fBfalse\fR, then by default this header will not be sent. -In either case the default can be overridden for an individual request by +If the value is boolean \fBfalse\fR, then by default this header will not be +sent. In either case the default can be overridden for an individual request by supplying a custom \fBAccept-Encoding\fR header in the \fB\-headers\fR option of \fBhttp::geturl\fR. The default is 1. .RE @@ -271,9 +275,16 @@ proc httpHandlerCallback {socket token} { } .CE .PP -The \fBhttp::geturl\fR code for the \fB\-handler\fR option is not compatible with either compression or chunked transfer-encoding. If \fB\-handler\fR is specified, then to work around these issues \fBhttp::geturl\fR will reduce the HTTP protocol to 1.0, and override the \fB\-zip\fR option (i.e. it will not send the header "\fBAccept-Encoding: gzip,deflate,compress\fR"). +The \fBhttp::geturl\fR code for the \fB\-handler\fR option is not compatible +with either compression or chunked transfer-encoding. If \fB\-handler\fR is +specified, then to work around these issues \fBhttp::geturl\fR will reduce the +HTTP protocol to 1.0, and override the \fB\-zip\fR option (i.e. it will not +send the header "\fBAccept-Encoding: gzip,deflate,compress\fR"). .PP -If options \fB\-handler\fR and \fB\-channel\fR are used together, the handler is responsible for copying the data from the HTTP socket to the specified channel. The name of the channel is available to the handler as element \fB\-channel\fR of the token array. +If options \fB\-handler\fR and \fB\-channel\fR are used together, the handler +is responsible for copying the data from the HTTP socket to the specified +channel. The name of the channel is available to the handler as element +\fB\-channel\fR of the token array. .PP The \fB::http::geturl\fR command runs the \fB\-handler\fR callback inside a \fBcatch\fR command. Therefore an error in the callback command does @@ -639,7 +650,8 @@ if the HTTP response is text. \fBbody\fR . The contents of the URL. This will be empty if the \fB\-channel\fR -option has been specified. This value is returned by the \fB::http::data\fR command. +option has been specified. This value is returned by the \fB::http::data\fR +command. .TP \fBcharset\fR . @@ -775,8 +787,8 @@ In accordance with RFC 7230, \fBhttp::geturl\fR does not pipeline requests that use the POST method. If a POST uses a persistent connection and is not the first request on that connection, \fBhttp::geturl\fR waits until it has received the response for the previous -request; or (if \fBhttp::config\fR option \fB\-postfresh\fR is boolean \fBtrue\fR) it -uses a new connection for each POST. +request; or (if \fBhttp::config\fR option \fB\-postfresh\fR is boolean +\fBtrue\fR) it uses a new connection for each POST. .PP If the server is processing a number of pipelined requests, and sends a response header @@ -813,16 +825,17 @@ request header asking to keep the connection open for future requests. The \fBhttp::config\fR options \fB\-pipeline\fR, \fB\-postfresh\fR, and \fB\-repost\fR relate to persistent connections. .PP -Option \fB\-pipeline\fR, if boolean \fBtrue\fR, will pipeline GET and HEAD requests -made -over a persistent connection. POST requests will not be pipelined - if the +Option \fB\-pipeline\fR, if boolean \fBtrue\fR, will pipeline GET and HEAD +requests made over a persistent connection. POST requests will not be +pipelined - if the POST is not the first transaction on the connection, its request will not be sent until the previous response has finished. GET and HEAD requests made after a POST will not be sent until the POST response has been delivered, and will not be sent if the POST fails. .PP -Option \fB\-postfresh\fR, if boolean \fBtrue\fR, will override the \fBhttp::geturl\fR option -\fB\-keepalive\fR, and always open a fresh connection for a POST request. +Option \fB\-postfresh\fR, if boolean \fBtrue\fR, will override the +\fBhttp::geturl\fR option \fB\-keepalive\fR, and always open a fresh connection +for a POST request. .PP Option \fB\-repost\fR, if \fBtrue\fR, permits automatic retry of a POST request that fails because it uses a persistent connection that the server has @@ -831,8 +844,8 @@ half-closed (an Subsequent GET and HEAD requests in a failed pipeline will also be retried. \fIThe \-repost option should be used only if the application understands that the retry is appropriate\fR - specifically, the application must know -that if the failed POST successfully modified the state of the server, a repeat POST -would have no adverse effect. +that if the failed POST successfully modified the state of the server, a repeat +POST would have no adverse effect. .SH "PROTOCOL UPGRADES" .PP The HTTP/1.1 \fBConnection\fR and \fBUpgrade\fR client headers inform the server -- cgit v0.12 From 6c00e4db82fb7bd7fda66af0d9652ae2b950cc1f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 12 May 2022 12:36:30 +0000 Subject: Remove "testsize time_t" command, it isn't being used --- win/tclWinTest.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/win/tclWinTest.c b/win/tclWinTest.c index 8525718..357bbc5 100644 --- a/win/tclWinTest.c +++ b/win/tclWinTest.c @@ -323,18 +323,14 @@ TestSizeCmd( if (objc != 2) { goto syntax; } - if (strcmp(Tcl_GetString(objv[1]), "time_t") == 0) { - Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(time_t))); - return TCL_OK; - } if (strcmp(Tcl_GetString(objv[1]), "st_mtime") == 0) { - Tcl_StatBuf *statPtr; - Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(statPtr->st_mtime))); - return TCL_OK; + Tcl_StatBuf *statPtr; + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(sizeof(statPtr->st_mtime))); + return TCL_OK; } syntax: - Tcl_WrongNumArgs(interp, 1, objv, "time_t|st_mtime"); + Tcl_WrongNumArgs(interp, 1, objv, "st_mtime"); return TCL_ERROR; } -- cgit v0.12 From a7d78a8fb4183478700856b8ba6471d5157e8a5a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 13 May 2022 10:11:39 +0000 Subject: Remove [https://core.tcl-lang.org/tcl/info/1fe8ca8d3eb87b65|Horrible Hack] to keep the old error message. Instead depend on [https://core.tcl-lang.org/tips/doc/trunk/tip/601.md|TIP #601] to let "encoding convertto" generate an exception. --- doc/http.n | 7 +------ library/http/http.tcl | 14 ++------------ tests/http.test | 7 ++++--- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/doc/http.n b/doc/http.n index 2b2d09d..84c75b0 100644 --- a/doc/http.n +++ b/doc/http.n @@ -161,12 +161,7 @@ default is 0. . The \fIencoding\fR used for creating the x-url-encoded URLs with \fB::http::formatQuery\fR and \fB::http::quoteString\fR. -The default is \fButf-8\fR, as specified by RFC -2718. Prior to http 2.5 this was unspecified, and that behavior can be -returned by specifying the empty string (\fB{}\fR), although -\fIiso8859-1\fR is recommended to restore similar behavior but without the -\fB::http::formatQuery\fR or \fB::http::quoteString\fR -throwing an error processing non-latin-1 characters. +The default is \fButf-8\fR, as specified by RFC 2718. .TP \fB\-useragent\fR \fIstring\fR . diff --git a/library/http/http.tcl b/library/http/http.tcl index 187d203..87003e4 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -3539,18 +3539,8 @@ proc http::mapReply {string} { # a pre-computed map and [string map] to do the conversion (much faster # than [regsub]/[subst]). [Bug 1020491] - if {$http(-urlencoding) ne ""} { - set string [encoding convertto $http(-urlencoding) $string] - return [string map $formMap $string] - } - set converted [string map $formMap $string] - if {[string match "*\[\u0100-\uffff\]*" $converted]} { - regexp "\[\u0100-\uffff\]" $converted badChar - # Return this error message for maximum compatibility... :^/ - return -code error \ - "can't read \"formMap($badChar)\": no such element in array" - } - return $converted + set string [encoding convertto $http(-urlencoding) $string] + return [string map $formMap $string] } interp alias {} http::quoteString {} http::mapReply diff --git a/tests/http.test b/tests/http.test index d59d588..e3baf58 100644 --- a/tests/http.test +++ b/tests/http.test @@ -677,17 +677,18 @@ test http-7.2 {http::mapReply} { test http-7.3 {http::formatQuery} -setup { set enc [http::config -urlencoding] } -returnCodes error -body { - # this would be reverting to http <=2.4 behavior + # -urlencoding "" no longer supported. Use "iso8859-1". http::config -urlencoding "" http::mapReply "∈" } -cleanup { http::config -urlencoding $enc -} -result "can't read \"formMap(∈)\": no such element in array" +} -result {unknown encoding ""} test http-7.4 {http::formatQuery} -constraints deprecated -setup { set enc [http::config -urlencoding] } -body { # this would be reverting to http <=2.4 behavior w/o errors - # (unknown chars become '?') + # with Tcl 8.x (unknown chars become '?'), generating a + # proper exception with Tcl 9.0 http::config -urlencoding "iso8859-1" http::mapReply "∈" } -cleanup { -- cgit v0.12 From ded51e0c9087a9fe17500b24f7f5fb1ec2171aa2 Mon Sep 17 00:00:00 2001 From: kjnash Date: Fri, 13 May 2022 22:49:06 +0000 Subject: Correction to http(n) - cannot use Upgrade header to upgrade to http/3 --- doc/http.n | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/http.n b/doc/http.n index 9617934..c3ce165 100644 --- a/doc/http.n +++ b/doc/http.n @@ -851,7 +851,7 @@ POST would have no adverse effect. The HTTP/1.1 \fBConnection\fR and \fBUpgrade\fR client headers inform the server that the client wishes to change the protocol used over the existing connection (RFC 7230). This mechanism can be used to request a WebSocket (RFC 6455), a -higher version of the HTTP protocol (HTTP 2 or 3), or TLS encryption. If the +higher version of the HTTP protocol (HTTP 2), or TLS encryption. If the server accepts the upgrade request, its response code will be 101. .PP To request a protocol upgrade when calling \fBhttp::geturl\fR, the \fB\-headers\fR @@ -865,7 +865,7 @@ request is not made over a connection that is intended for multiple HTTP request The Tcllib library \fBwebsocket\fR implements WebSockets, and makes the necessary calls to commands in the \fBhttp\fR package. .PP -There is currently no native Tcl client library for HTTP/2 or HTTP/3. +There is currently no native Tcl client library for HTTP/2. .PP The \fBUpgrade\fR mechanism is not used to request TLS in web browsers, because \fBhttp\fR and \fBhttps\fR are served over different ports. It is used by -- cgit v0.12 From 720797e76cbd290e097b9094da83c44cadfac5bf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 15 May 2022 20:19:30 +0000 Subject: Change (unsupported) "utfmax" option in rules.vc to it's reverse "utf16". No effect in Tcl 8.6. Not recommended (but working!) in Tcl 8.7 and 9.0 --- .travis.yml | 18 ------------------ win/makefile.vc | 4 ++-- win/rules.vc | 15 +++++++-------- 3 files changed, 9 insertions(+), 28 deletions(-) diff --git a/.travis.yml b/.travis.yml index eb1ec0c..061fe2d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -197,15 +197,6 @@ jobs: script: - cmd.exe //C vcvarsall.bat x64 '&&' nmake '-f' makefile.vc all tcltest - cmd.exe //C vcvarsall.bat x64 '&&' nmake '-f' makefile.vc test - - name: "Windows/MSVC/Shared: UTF_MAX=4" - os: windows - compiler: cl - env: *vcenv - before_install: *vcpreinst - install: [] - script: - - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=utfmax' '-f' makefile.vc all tcltest - - cmd.exe //C vcvarsall.bat x64 '&&' nmake 'OPTS=utfmax' '-f' makefile.vc test - name: "Windows/MSVC/Static" os: windows compiler: cl @@ -252,15 +243,6 @@ jobs: script: - cmd.exe //C vcvarsall.bat x86 '&&' nmake '-f' makefile.vc all tcltest - cmd.exe //C vcvarsall.bat x86 '&&' nmake '-f' makefile.vc test - - name: "Windows/MSVC-x86/Shared: UTF_MAX=4" - os: windows - compiler: cl - env: *vcenv - before_install: *vcpreinst - install: [] - script: - - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=utfmax' '-f' makefile.vc all tcltest - - cmd.exe //C vcvarsall.bat x86 '&&' nmake 'OPTS=utfmax' '-f' makefile.vc test - name: "Windows/MSVC-x86/Static" os: windows compiler: cl diff --git a/win/makefile.vc b/win/makefile.vc index 395a9ca..011546a 100644 --- a/win/makefile.vc +++ b/win/makefile.vc @@ -52,7 +52,7 @@ # turn on the 64-bit compiler, if your SDK has it. # # Basic macros and options usable on the commandline (see rules.vc for more info): -# OPTS=msvcrt,nothreads,pdbs,profile,static,staticpkg,symbols,thrdalloc,time64bit,unchecked,utfmax,none +# OPTS=msvcrt,nothreads,pdbs,profile,static,staticpkg,symbols,thrdalloc,time64bit,unchecked,utf16,none # Sets special options for the core. The default is for none. # Any combination of the above may be used (comma separated). # 'none' will over-ride everything to nothing. @@ -77,7 +77,7 @@ # unchecked = Allows a symbols build to not use the debug # enabled runtime (msvcrt.dll not msvcrtd.dll # or libcmt.lib not libcmtd.lib). -# utfmax = Forces a build using UTF-32 representation internally. +# utf16 = Forces a build using UTF-16 representation internally. # # STATS=compdbg,memdbg,none # Sets optional memory and bytecode compiler debugging code added diff --git a/win/rules.vc b/win/rules.vc index a571899..3107756 100644 --- a/win/rules.vc +++ b/win/rules.vc @@ -816,8 +816,7 @@ DOTSEPARATED=$(DOTSEPARATED:b=.) # configuration (ignored for Tcl itself) # _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build # (CRT library should support this, not needed for Tcl 9.x) -# TCL_UTF_MAX=4 - forces a build allowing 4-byte UTF-8 sequences internally. -# (Not needed for Tcl 9.x) +# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended). # Further, LINKERFLAGS are modified based on above. # Default values for all the above @@ -884,9 +883,9 @@ USE_THREAD_ALLOC= 0 _USE_64BIT_TIME_T = 1 !endif -!if [nmakehlp -f $(OPTS) "utfmax"] -!message *** Force allowing 4-byte UTF-8 sequences internally -TCL_UTF_MAX = 4 +!if [nmakehlp -f $(OPTS) "utf16"] +!message *** Force UTF-16 internally +TCL_UTF_MAX = 3 !endif !endif @@ -1423,13 +1422,13 @@ OPTDEFINES = $(OPTDEFINES) /DNO_STRTOI64=1 !if "$(_USE_64BIT_TIME_T)" == "1" OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1 !endif -!if "$(TCL_UTF_MAX)" == "4" -OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=4 -!endif # _ATL_XP_TARGETING - Newer SDK's need this to build for XP COMPILERFLAGS = /D_ATL_XP_TARGETING !endif +!if "$(TCL_UTF_MAX)" == "3" +OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3 +!endif # Like the TEA system only set this non empty for non-Tk extensions # Note: some extensions use PACKAGE_NAME and others use PACKAGE_TCLNAME -- cgit v0.12