summaryrefslogtreecommitdiffstats
path: root/library/msgcat/msgcat.tcl
blob: 77e70a52148a1d322c834b8a3909580a273d242a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
# msgcat.tcl --
#
#	This file defines various procedures which implement a
#	message catalog facility for Tcl programs.  It should be
#	loaded with the command "package require msgcat".
#
# Copyright (c) 1998-2000 by Ajuba Solutions.
# Copyright (c) 1998 by Mark Harrison.
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.

package require Tcl 8.5-
# When the version number changes, be sure to update the pkgIndex.tcl file,
# and the installation directory in the Makefiles.
package provide msgcat 1.5.1

namespace eval msgcat {
    namespace export mc mcload mclocale mcmax mcmset mcpreferences mcset \
	    mcunknown mcflset mcflmset

    # Records the current locale as passed to mclocale
    variable Locale ""

    # Records the list of locales to search
    variable Loclist {}

    # Records the locale of the currently sourced message catalogue file
    variable FileLocale

    # Records the mapping between source strings and translated strings.  The
    # dict key is of the form "<locale> <namespace> <src>", where locale and
    # namespace should be themselves dict values and the value is
    # the translated string.
    variable Msgs [dict create]

    # Map of language codes used in Windows registry to those of ISO-639
    if {[info sharedlibextension] eq ".dll"} {
	variable WinRegToISO639 [dict create  {*}{
	    01 ar 0401 ar_SA 0801 ar_IQ 0c01 ar_EG 1001 ar_LY 1401 ar_DZ
		  1801 ar_MA 1c01 ar_TN 2001 ar_OM 2401 ar_YE 2801 ar_SY
		  2c01 ar_JO 3001 ar_LB 3401 ar_KW 3801 ar_AE 3c01 ar_BH
		  4001 ar_QA
	    02 bg 0402 bg_BG
	    03 ca 0403 ca_ES
	    04 zh 0404 zh_TW 0804 zh_CN 0c04 zh_HK 1004 zh_SG 1404 zh_MO
	    05 cs 0405 cs_CZ
	    06 da 0406 da_DK
	    07 de 0407 de_DE 0807 de_CH 0c07 de_AT 1007 de_LU 1407 de_LI
	    08 el 0408 el_GR
	    09 en 0409 en_US 0809 en_GB 0c09 en_AU 1009 en_CA 1409 en_NZ
		  1809 en_IE 1c09 en_ZA 2009 en_JM 2409 en_GD 2809 en_BZ
		  2c09 en_TT 3009 en_ZW 3409 en_PH
	    0a es 040a es_ES 080a es_MX 0c0a es_ES@modern 100a es_GT 140a es_CR
		  180a es_PA 1c0a es_DO 200a es_VE 240a es_CO 280a es_PE
		  2c0a es_AR 300a es_EC 340a es_CL 380a es_UY 3c0a es_PY
		  400a es_BO 440a es_SV 480a es_HN 4c0a es_NI 500a es_PR
	    0b fi 040b fi_FI
	    0c fr 040c fr_FR 080c fr_BE 0c0c fr_CA 100c fr_CH 140c fr_LU
		  180c fr_MC
	    0d he 040d he_IL
	    0e hu 040e hu_HU
	    0f is 040f is_IS
	    10 it 0410 it_IT 0810 it_CH
	    11 ja 0411 ja_JP
	    12 ko 0412 ko_KR
	    13 nl 0413 nl_NL 0813 nl_BE
	    14 no 0414 no_NO 0814 nn_NO
	    15 pl 0415 pl_PL
	    16 pt 0416 pt_BR 0816 pt_PT
	    17 rm 0417 rm_CH
	    18 ro 0418 ro_RO 0818 ro_MO
	    19 ru 0819 ru_MO
	    1a hr 041a hr_HR 081a sr_YU 0c1a sr_YU@cyrillic
	    1b sk 041b sk_SK
	    1c sq 041c sq_AL
	    1d sv 041d sv_SE 081d sv_FI
	    1e th 041e th_TH
	    1f tr 041f tr_TR
	    20 ur 0420 ur_PK 0820 ur_IN
	    21 id 0421 id_ID
	    22 uk 0422 uk_UA
	    23 be 0423 be_BY
	    24 sl 0424 sl_SI
	    25 et 0425 et_EE
	    26 lv 0426 lv_LV
	    27 lt 0427 lt_LT
	    28 tg 0428 tg_TJ
	    29 fa 0429 fa_IR
	    2a vi 042a vi_VN
	    2b hy 042b hy_AM
	    2c az 042c az_AZ@latin 082c az_AZ@cyrillic
	    2d eu
	    2e wen 042e wen_DE
	    2f mk 042f mk_MK
	    30 bnt 0430 bnt_TZ
	    31 ts 0431 ts_ZA
	    32 tn
	    33 ven 0433 ven_ZA
	    34 xh 0434 xh_ZA
	    35 zu 0435 zu_ZA
	    36 af 0436 af_ZA
	    37 ka 0437 ka_GE
	    38 fo 0438 fo_FO
	    39 hi 0439 hi_IN
	    3a mt 043a mt_MT
	    3b se 043b se_NO
	    043c gd_UK 083c ga_IE
	    3d yi 043d yi_IL
	    3e ms 043e ms_MY 083e ms_BN
	    3f kk 043f kk_KZ
	    40 ky 0440 ky_KG
	    41 sw 0441 sw_KE
	    42 tk 0442 tk_TM
	    43 uz 0443 uz_UZ@latin 0843 uz_UZ@cyrillic
	    44 tt 0444 tt_RU
	    45 bn 0445 bn_IN
	    46 pa 0446 pa_IN
	    47 gu 0447 gu_IN
	    48 or 0448 or_IN
	    49 ta
	    4a te 044a te_IN
	    4b kn 044b kn_IN
	    4c ml 044c ml_IN
	    4d as 044d as_IN
	    4e mr 044e mr_IN
	    4f sa 044f sa_IN
	    50 mn
	    51 bo 0451 bo_CN
	    52 cy 0452 cy_GB
	    53 km 0453 km_KH
	    54 lo 0454 lo_LA
	    55 my 0455 my_MM
	    56 gl 0456 gl_ES
	    57 kok 0457 kok_IN
	    58 mni 0458 mni_IN
	    59 sd
	    5a syr 045a syr_TR
	    5b si 045b si_LK
	    5c chr 045c chr_US
	    5d iu 045d iu_CA
	    5e am 045e am_ET
	    5f ber 045f ber_MA
	    60 ks 0460 ks_PK 0860 ks_IN
	    61 ne 0461 ne_NP 0861 ne_IN
	    62 fy 0462 fy_NL
	    63 ps
	    64 tl 0464 tl_PH
	    65 div 0465 div_MV
	    66 bin 0466 bin_NG
	    67 ful 0467 ful_NG
	    68 ha 0468 ha_NG
	    69 nic 0469 nic_NG
	    6a yo 046a yo_NG
	    70 ibo 0470 ibo_NG
	    71 kau 0471 kau_NG
	    72 om 0472 om_ET
	    73 ti 0473 ti_ET
	    74 gn 0474 gn_PY
	    75 cpe 0475 cpe_US
	    76 la 0476 la_VA
	    77 so 0477 so_SO
	    78 sit 0478 sit_CN
	    79 pap 0479 pap_AN
	}]
    }
}

# msgcat::mc --
#
#	Find the translation for the given string based on the current
#	locale setting. Check the local namespace first, then look in each
#	parent namespace until the source is found.  If additional args are
#	specified, use the format command to work them into the traslated
#	string.
#
# Arguments:
#	src	The string to translate.
#	args	Args to pass to the format command
#
# Results:
#	Returns the translated string.  Propagates errors thrown by the
#	format command.

proc msgcat::mc {src args} {
    # Check for the src in each namespace starting from the local and
    # ending in the global.

    variable Msgs
    variable Loclist
    variable Locale

    set ns [uplevel 1 [list ::namespace current]]

    while {$ns != ""} {
	foreach loc $Loclist {
	    if {[dict exists $Msgs $loc $ns $src]} {
		if {[llength $args] == 0} {
		    return [dict get $Msgs $loc $ns $src]
		} else {
		    return [format [dict get $Msgs $loc $ns $src] {*}$args]
		}
	    }
	}
	set ns [namespace parent $ns]
    }
    # we have not found the translation
    return [uplevel 1 [list [namespace origin mcunknown] \
	    $Locale $src {*}$args]]
}

# msgcat::mclocale --
#
#	Query or set the current locale.
#
# Arguments:
#	newLocale	(Optional) The new locale string. Locale strings
#			should be composed of one or more sublocale parts
#			separated by underscores (e.g. en_US).
#
# Results:
#	Returns the current locale.

proc msgcat::mclocale {args} {
    variable Loclist
    variable Locale
    set len [llength $args]

    if {$len > 1} {
	return -code error "wrong # args: should be\
		\"[lindex [info level 0] 0] ?newLocale?\""
    }

    if {$len == 1} {
	set newLocale [lindex $args 0]
	if {$newLocale ne [file tail $newLocale]} {
	    return -code error "invalid newLocale value \"$newLocale\":\
		    could be path to unsafe code."
	}
	set Locale [string tolower $newLocale]
	set Loclist {}
	set word ""
	foreach part [split $Locale _] {
	    set word [string trim "${word}_${part}" _]
	    if {$word ne [lindex $Loclist 0]} {
		set Loclist [linsert $Loclist 0 $word]
	    }
	}
	lappend Loclist {}
	set Locale [lindex $Loclist 0]
    }
    return $Locale
}

# msgcat::mcpreferences --
#
#	Fetch the list of locales used to look up strings, ordered from
#	most preferred to least preferred.
#
# Arguments:
#	None.
#
# Results:
#	Returns an ordered list of the locales preferred by the user.

proc msgcat::mcpreferences {} {
    variable Loclist
    return $Loclist
}

# msgcat::mcload --
#
#	Attempt to load message catalogs for each locale in the
#	preference list from the specified directory.
#
# Arguments:
#	langdir		The directory to search.
#
# Results:
#	Returns the number of message catalogs that were loaded.

proc msgcat::mcload {langdir} {
    variable FileLocale
    # Save the file locale if we are recursively called
    if {[info exists FileLocale]} {
	set nestedFileLocale $FileLocale
    }
    set x 0
    foreach p [mcpreferences] {
	if {$p eq {}} {
	    set p ROOT
	}
	set langfile [file join $langdir $p.msg]
	if {[file exists $langfile]} {
	    incr x
	    set FileLocale [string tolower [file tail [file rootname $langfile]]]
	    if {"root" eq $FileLocale} {
		set FileLocale ""
	    }
	    uplevel 1 [list ::source -encoding utf-8 $langfile]
	    unset FileLocale
	}
    }
    if {[info exists nestedFileLocale]} {
	set FileLocale $nestedFileLocale
    }
    return $x
}

# msgcat::mcset --
#
#	Set the translation for a given string in a specified locale.
#
# Arguments:
#	locale		The locale to use.
#	src		The source string.
#	dest		(Optional) The translated string.  If omitted,
#			the source string is used.
#
# Results:
#	Returns the new locale.

proc msgcat::mcset {locale src {dest ""}} {
    variable Msgs
    if {[llength [info level 0]] == 3} { ;# dest not specified
	set dest $src
    }

    set ns [uplevel 1 [list ::namespace current]]

    set locale [string tolower $locale]

    dict set Msgs $locale $ns $src $dest
    return $dest
}

# msgcat::mcflset --
#
#	Set the translation for a given string in the current file locale.
#
# Arguments:
#	src		The source string.
#	dest		(Optional) The translated string.  If omitted,
#			the source string is used.
#
# Results:
#	Returns the new locale.

proc msgcat::mcflset {src {dest ""}} {
    variable FileLocale
    variable Msgs

    if {![info exists FileLocale]} {
	return -code error \
	    "must only be used inside a message catalog loaded with ::msgcat::mcload"
    }
    if {[llength [info level 0]] == 2} { ;# dest not specified
	set dest $src
    }

    set ns [uplevel 1 [list ::namespace current]]
    dict set Msgs $FileLocale $ns $src $dest
    return $dest
}

# msgcat::mcmset --
#
#	Set the translation for multiple strings in a specified locale.
#
# Arguments:
#	locale		The locale to use.
#	pairs		One or more src/dest pairs (must be even length)
#
# Results:
#	Returns the number of pairs processed

proc msgcat::mcmset {locale pairs} {
    variable Msgs

    set length [llength $pairs]
    if {$length % 2} {
	return -code error "bad translation list:\
		 should be \"[lindex [info level 0] 0] locale {src dest ...}\""
    }

    set locale [string tolower $locale]
    set ns [uplevel 1 [list ::namespace current]]

    foreach {src dest} $pairs {
	dict set Msgs $locale $ns $src $dest
    }

    return [expr {$length / 2}]
}

# msgcat::mcflmset --
#
#	Set the translation for multiple strings in the mc file locale.
#
# Arguments:
#	pairs		One or more src/dest pairs (must be even length)
#
# Results:
#	Returns the number of pairs processed

proc msgcat::mcflmset {pairs} {
    variable FileLocale
    variable Msgs

    if {![info exists FileLocale]} {
	return -code error \
	    "must only be used inside a message catalog loaded with ::msgcat::mcload"
    }
    set length [llength $pairs]
    if {$length % 2} {
	return -code error "bad translation list:\
		should be \"[lindex [info level 0] 0] locale {src dest ...}\""
    }

    set ns [uplevel 1 [list ::namespace current]]
    foreach {src dest} $pairs {
	dict set Msgs $FileLocale $ns $src $dest
    }
    return [expr {$length / 2}]
}

# msgcat::mcunknown --
#
#	This routine is called by msgcat::mc if a translation cannot
#	be found for a string.  This routine is intended to be replaced
#	by an application specific routine for error reporting
#	purposes.  The default behavior is to return the source string.
#	If additional args are specified, the format command will be used
#	to work them into the traslated string.
#
# Arguments:
#	locale		The current locale.
#	src		The string to be translated.
#	args		Args to pass to the format command
#
# Results:
#	Returns the translated value.

proc msgcat::mcunknown {locale src args} {
    if {[llength $args]} {
	return [format $src {*}$args]
    } else {
	return $src
    }
}

# msgcat::mcmax --
#
#	Calculates the maximum length of the translated strings of the given
#	list.
#
# Arguments:
#	args	strings to translate.
#
# Results:
#	Returns the length of the longest translated string.

proc msgcat::mcmax {args} {
    set max 0
    foreach string $args {
	set translated [uplevel 1 [list [namespace origin mc] $string]]
	set len [string length $translated]
	if {$len>$max} {
	    set max $len
	}
    }
    return $max
}

# Convert the locale values stored in environment variables to a form
# suitable for passing to [mclocale]
proc msgcat::ConvertLocale {value} {
    # Assume $value is of form: $language[_$territory][.$codeset][@modifier]
    # Convert to form: $language[_$territory][_$modifier]
    #
    # Comment out expanded RE version -- bugs alleged
    # regexp -expanded {
    #	^		# Match all the way to the beginning
    #	([^_.@]*)	# Match "lanugage"; ends with _, ., or @
    #	(_([^.@]*))?	# Match (optional) "territory"; starts with _
    #	([.]([^@]*))?	# Match (optional) "codeset"; starts with .
    #	(@(.*))?	# Match (optional) "modifier"; starts with @
    #	$		# Match all the way to the end
    # } $value -> language _ territory _ codeset _ modifier
    if {![regexp {^([^_.@]+)(_([^.@]*))?([.]([^@]*))?(@(.*))?$} $value \
	    -> language _ territory _ codeset _ modifier]} {
	return -code error "invalid locale '$value': empty language part"
    }
    set ret $language
    if {[string length $territory]} {
	append ret _$territory
    }
    if {[string length $modifier]} {
	append ret _$modifier
    }
    return $ret
}

# Initialize the default locale
proc msgcat::Init {} {
    global env

    #
    # set default locale, try to get from environment
    #
    foreach varName {LC_ALL LC_MESSAGES LANG} {
	if {[info exists env($varName)] && ("" ne $env($varName))} {
	    if {![catch {
		mclocale [ConvertLocale $env($varName)]
	    }]} {
		return
	    }
	}
    }
    #
    # On Darwin, fallback to current CFLocale identifier if available.
    #
    if {[info exists ::tcl::mac::locale] && $::tcl::mac::locale ne ""} {
	if {![catch {
	    mclocale [ConvertLocale $::tcl::mac::locale]
	}]} {
	    return
	}
    }
    #
    # The rest of this routine is special processing for Windows or
    # Cygwin. All other platforms, get out now.
    #
    if {([info sharedlibextension] ne ".dll")
	    || [catch {package require registry}]} {
	mclocale C
	return
    }
    #
    # On Windows or Cygwin, try to set locale depending on registry
    # settings, or fall back on locale of "C".
    #

    # First check registry value LocalName present from Windows Vista
    # which contains the local string as RFC5646, composed of:
    # [a-z]{2,3} : language
    # -[a-z]{4}  : script (optional, translated by table Latn->latin)
    # -[a-z]{2}|[0-9]{3} : territory (optional, numerical region codes not used)
    # (-.*)* : variant, extension, private use (optional, not used)
    # Those are translated to local strings.
    # Examples: de-CH -> de_ch, sr-Latn-CS -> sr_cs@latin, es-419 -> es
    #
    set key {HKEY_CURRENT_USER\Control Panel\International}
    if {![catch {registry get $key LocaleName} localeName]
	    && [regexp {^([a-z]{2,3})(?:-([a-z]{4}))?(?:-([a-z]{2}))?(?:-.+)?$}\
	    [string tolower $localeName] match locale script territory]} {
	if {"" ne $territory} {
	    append locale _ $territory
	}
	set modifierDict [dict create latn latin cyrl cyrillic]
	if {[dict exists $modifierDict $script]} {
	    append locale @ [dict get $modifierDict $script]
	}
	if {![catch {mclocale [ConvertLocale $locale]}]} {
	    return
	}
    }

    # then check key locale which contains a numerical language ID
    if {[catch {
	set locale [registry get $key "locale"]
    }]} {
	mclocale C
	return
    }
    #
    # Keep trying to match against smaller and smaller suffixes
    # of the registry value, since the latter hexadigits appear
    # to determine general language and earlier hexadigits determine
    # more precise information, such as territory.  For example,
    #     0409 - English - United States
    #     0809 - English - United Kingdom
    # Add more translations to the WinRegToISO639 array above.
    #
    variable WinRegToISO639
    set locale [string tolower $locale]
    while {[string length $locale]} {
	if {![catch {
	    mclocale [ConvertLocale [dict get $WinRegToISO639 $locale]]
	}]} {
	    return
	}
	set locale [string range $locale 1 end]
    }
    #
    # No translation known.  Fall back on "C" locale
    #
    mclocale C
}
msgcat::Init