diff options
author | dkf <donal.k.fellows@manchester.ac.uk> | 2024-01-25 17:42:32 (GMT) |
---|---|---|
committer | dkf <donal.k.fellows@manchester.ac.uk> | 2024-01-25 17:42:32 (GMT) |
commit | 104d50836d96335baa9b788de5e85dc2359dc323 (patch) | |
tree | 776a05c354fc8a592836e9db486a86c2e8ec4366 /tools | |
parent | 7533d9c78e1953dde3a47b494b0d3cbeb729e7a0 (diff) | |
download | tcl-104d50836d96335baa9b788de5e85dc2359dc323.zip tcl-104d50836d96335baa9b788de5e85dc2359dc323.tar.gz tcl-104d50836d96335baa9b788de5e85dc2359dc323.tar.bz2 |
Add script I've used to find spelling errors in docs
Diffstat (limited to 'tools')
-rw-r--r-- | tools/findDocWords.tcl | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/tools/findDocWords.tcl b/tools/findDocWords.tcl new file mode 100644 index 0000000..2b585d5 --- /dev/null +++ b/tools/findDocWords.tcl @@ -0,0 +1,38 @@ +lassign $argv dir dictionary + +set f [open $dictionary] +while {[gets $f line] > 0} { + dict set realWord [string tolower $line] yes +} +close $f +puts "loaded [dict size $realWord] words from dictionary" + +set files [glob -directory $dir {*.[13n]}] +set found {} + +proc identifyWords {fragment filename} { + global realWord found + foreach frag [split [string map {\\fB "" \\fR "" \\fI "" \\fP "" \\0 _} $fragment] _] { + if {[string is entier $frag]} continue + set frag [string trim $frag "\\0123456789"] + if {$frag eq ""} continue + foreach word [regexp -all -inline {^[a-z]+|[A-Z][a-z]*} $frag] { + set word [string tolower $word] + if {![dict exists $realWord $word]} { + dict lappend found $word $filename + } + } + } +} + +foreach fn $files { + set f [open $fn] + foreach word [regexp -all -inline {[\\\w]+} [read $f]] { + identifyWords $word $fn + } + close $f +} +set len [tcl::mathfunc::max {*}[lmap word [dict keys $found] {string length $word}]] +foreach word [lsort [dict keys $found]] { + puts [format "%-${len}s: %s" $word [lindex [dict get $found $word] 0]] +} |