summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2024-01-25 17:42:32 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2024-01-25 17:42:32 (GMT)
commit104d50836d96335baa9b788de5e85dc2359dc323 (patch)
tree776a05c354fc8a592836e9db486a86c2e8ec4366
parent7533d9c78e1953dde3a47b494b0d3cbeb729e7a0 (diff)
downloadtcl-104d50836d96335baa9b788de5e85dc2359dc323.zip
tcl-104d50836d96335baa9b788de5e85dc2359dc323.tar.gz
tcl-104d50836d96335baa9b788de5e85dc2359dc323.tar.bz2
Add script I've used to find spelling errors in docs
-rw-r--r--tools/findDocWords.tcl38
1 files changed, 38 insertions, 0 deletions
diff --git a/tools/findDocWords.tcl b/tools/findDocWords.tcl
new file mode 100644
index 0000000..2b585d5
--- /dev/null
+++ b/tools/findDocWords.tcl
@@ -0,0 +1,38 @@
+lassign $argv dir dictionary
+
+set f [open $dictionary]
+while {[gets $f line] > 0} {
+ dict set realWord [string tolower $line] yes
+}
+close $f
+puts "loaded [dict size $realWord] words from dictionary"
+
+set files [glob -directory $dir {*.[13n]}]
+set found {}
+
+proc identifyWords {fragment filename} {
+ global realWord found
+ foreach frag [split [string map {\\fB "" \\fR "" \\fI "" \\fP "" \\0 _} $fragment] _] {
+ if {[string is entier $frag]} continue
+ set frag [string trim $frag "\\0123456789"]
+ if {$frag eq ""} continue
+ foreach word [regexp -all -inline {^[a-z]+|[A-Z][a-z]*} $frag] {
+ set word [string tolower $word]
+ if {![dict exists $realWord $word]} {
+ dict lappend found $word $filename
+ }
+ }
+ }
+}
+
+foreach fn $files {
+ set f [open $fn]
+ foreach word [regexp -all -inline {[\\\w]+} [read $f]] {
+ identifyWords $word $fn
+ }
+ close $f
+}
+set len [tcl::mathfunc::max {*}[lmap word [dict keys $found] {string length $word}]]
+foreach word [lsort [dict keys $found]] {
+ puts [format "%-${len}s: %s" $word [lindex [dict get $found $word] 0]]
+}