summaryrefslogtreecommitdiffstats
path: root/tcllib/modules/fumagic/cfront.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tcllib/modules/fumagic/cfront.tcl')
-rw-r--r--tcllib/modules/fumagic/cfront.tcl396
1 files changed, 396 insertions, 0 deletions
diff --git a/tcllib/modules/fumagic/cfront.tcl b/tcllib/modules/fumagic/cfront.tcl
new file mode 100644
index 0000000..7d991b4
--- /dev/null
+++ b/tcllib/modules/fumagic/cfront.tcl
@@ -0,0 +1,396 @@
+# cfront.tcl --
+#
+# Generator frontend for compiler of magic(5) files into recognizers
+# based on the 'rtcore'. Parses magic(5) into a basic 'script'.
+#
+# Copyright (c) 2004-2005 Colin McCormack <coldstore@users.sourceforge.net>
+# Copyright (c) 2005 Andreas Kupries <andreas_kupries@users.sourceforge.net>
+#
+# See the file "license.terms" for information on usage and redistribution
+# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+# RCS: @(#) $Id: cfront.tcl,v 1.7 2008/03/22 01:10:32 andreas_kupries Exp $
+
+#####
+#
+# "mime type recognition in pure tcl"
+# http://wiki.tcl.tk/12526
+#
+# Tcl code harvested on: 10 Feb 2005, 04:06 GMT
+# Wiki page last updated: ???
+#
+#####
+
+# ### ### ### ######### ######### #########
+## Requirements
+
+package require Tcl 8.4
+
+# file to compile the magic file from magic(5) into a tcl program
+package require fileutil ; # File processing (input)
+package require fileutil::magic::cgen ; # Code generator.
+package require fileutil::magic::rt ; # Runtime (typemap)
+package require struct::list ; # lrepeat.
+
+package provide fileutil::magic::cfront 1.0
+
+# ### ### ### ######### ######### #########
+## Implementation
+
+namespace eval ::fileutil::magic::cfront {
+ # Configuration flag. (De)activate debugging output.
+ # This is done during initialization.
+ # Changes at runtime have no effect.
+
+ variable debug 0
+
+ # Constants
+
+ variable hashprotection [list "\#" "\\#" \" \\\" \{ \\\{ \} \\\}] ;#"
+ variable hashprotectionB [list "\#" "\\\#" \" \\\" \} \\\} ( \\( ) \\)] ;#"
+
+ # Make backend functionality accessible
+ namespace import ::fileutil::magic::cgen::*
+
+ namespace export compile procdef install
+}
+
+# parse an individual line
+proc ::fileutil::magic::cfront::parseline {line {maxlevel 10000}} {
+ # calculate the line's level
+ set unlevel [string trimleft $line >]
+ set level [expr {[string length $line] - [string length $unlevel]}]
+ if {$level > $maxlevel} {
+ return -code continue "Skip - too high a level"
+ }
+
+ # regexp parse line into (offset, type, value, command)
+ set parse [regexp -expanded -inline {^(\S+)\s+(\S+)\s*((\S|(\B\s))*)\s*(.*)$} $unlevel]
+ if {$parse == {}} {
+ error "Can't parse: '$unlevel'"
+ }
+
+ # unpack parsed line
+ set value ""
+ set command ""
+ foreach {junk offset type value junk1 junk2 command} $parse break
+
+ # handle trailing spaces
+ if {[string index $value end] eq "\\"} {
+ append value " "
+ }
+ if {[string index $command end] eq "\\"} {
+ append command " "
+ }
+
+ if {$value eq ""} {
+ # badly formatted line
+ return -code error "no value"
+ }
+
+ ::fileutil::magic::cfront::Debug {
+ puts "level:$level offset:$offset type:$type value:'$value' command:'$command'"
+ }
+
+ # return the line's fields
+ return [list $level $offset $type $value $command]
+}
+
+# process a magic file
+proc ::fileutil::magic::cfront::process {file {maxlevel 10000}} {
+ variable hashprotection
+ variable hashprotectionB
+ variable level ;# level of line
+ variable linenum ;# line number
+
+ set level 0
+ set script {}
+
+ set linenum 0
+ ::fileutil::foreachLine line $file {
+ incr linenum
+ set line [string trim $line " "]
+ if {[string index $line 0] eq "#"} {
+ continue ;# skip comments
+ } elseif {$line == ""} {
+ continue ;# skip blank lines
+ } else {
+ # parse line
+ if {[catch {parseline $line $maxlevel} parsed]} {
+ continue ;# skip erroring lines
+ }
+
+ # got a valid line
+ foreach {level offset type value message} $parsed break
+
+ # strip comparator out of value field,
+ # (they are combined)
+ set compare [string index $value 0]
+ switch -glob -- $value {
+ [<>]=* {
+ set compare [string range $value 0 1]
+ set value [string range $value 2 end]
+ }
+
+ <* - >* - &* - ^* {
+ set value [string range $value 1 end]
+ }
+
+ =* {
+ set compare "=="
+ set value [string range $value 1 end]
+ }
+
+ !* {
+ set compare "!="
+ set value [string range $value 1 end]
+ }
+
+ x {
+ # this is the 'don't care' match
+ # used for collecting values
+ set value ""
+ }
+
+ default {
+ # the default comparator is equals
+ set compare "=="
+ if {[string match {\\[<!>=]*} $value]} {
+ set value [string range $value 1 end]
+ }
+ }
+ }
+
+ # process type field
+ set qual ""
+ switch -glob -- $type {
+ pstring* - string* {
+ # String or Pascal string type
+
+ # extract string match qualifiers
+ foreach {type qual} [split $type /] break
+
+ # convert pstring to string + qualifier
+ if {$type eq "pstring"} {
+ append qual "p"
+ set type "string"
+ }
+
+ # protect hashes in output script value
+ set value [string map $hashprotection $value]
+
+ if {($value eq "\\0") && ($compare eq ">")} {
+ # record 'any string' match
+ set value ""
+ set compare x
+ } elseif {$compare eq "!="} {
+ # string doesn't allow !match
+ set value !$value
+ set compare "=="
+ }
+
+ if {$type ne "string"} {
+ # don't let any odd string types sneak in
+ puts stderr "Reject String: ${file}:$linenum $type - $line"
+ continue
+ }
+ }
+
+ regex {
+ # I am *not* going to handle regex
+ puts stderr "Reject Regex: ${file}:$linenum $type - $line"
+ continue
+ }
+
+ *byte* - *short* - *long* - *date* {
+ # Numeric types
+
+ # extract numeric match &qualifiers
+ set type [split $type &]
+ set qual [lindex $type 1]
+
+ if {$qual ne ""} {
+ # this is an &-qualifier
+ set qual &$qual
+ } else {
+ # extract -qualifier from type
+ set type [split $type -]
+ set qual [lindex $type 1]
+ if {$qual ne ""} {
+ set qual -$qual
+ }
+ }
+ set type [lindex $type 0]
+
+ # perform value adjustments
+ if {$compare ne "x"} {
+ # trim redundant Long value qualifier
+ set value [string trimright $value L]
+
+ if {[catch {set value [expr $value]} x]} {
+ upvar #0 errorInfo eo
+ # check that value is representable in tcl
+ puts stderr "Reject Value Error: ${file}:$linenum '$value' '$line' - $eo"
+ continue;
+ }
+
+ # coerce numeric value into hex
+ set value [format "0x%x" $value]
+ }
+ }
+
+ default {
+ # this is not a type we can handle
+ puts stderr "Reject Unknown Type: ${file}:$linenum $type - $line"
+ continue
+ }
+ }
+ }
+
+ # collect some summaries
+ ::fileutil::magic::cfront::Debug {
+ variable types
+ set types($type) $type
+ variable quals
+ set quals($qual) $qual
+ }
+
+ #puts $linenum level:$level offset:$offset type:$type
+ #puts qual:$qual compare:$compare value:'$value' message:'$message'
+
+ # protect hashes in output script message
+ set message [string map $hashprotectionB $message]
+
+ if {![string match "(*)" $offset]} {
+ catch {set offset [expr $offset]}
+ }
+
+ # record is the complete match command,
+ # encoded for tcl code generation
+ set record [list $linenum $type $qual $compare $offset $value $message]
+ if {$script == {}} {
+ # the original script has level 0,
+ # regardless of what the script says
+ set level 0
+ }
+
+ if {$level == 0} {
+ # add a new 0-level record
+ lappend script $record
+ } else {
+ # find the growing edge of the script
+ set depth [::struct::list repeat [expr $level] end]
+ while {[catch {
+ # get the insertion point
+ set insertion [eval [linsert $depth 0 lindex $script]]
+ # 8.5 # set insertion [lindex $script {*}$depth]
+ }]} {
+ # handle scripts which jump levels,
+ # reduce depth to current-depth+1
+ set depth [lreplace $depth end end]
+ }
+
+ # add the record at the insertion point
+ lappend insertion $record
+
+ # re-insert the record into its correct position
+ eval [linsert [linsert $depth 0 lset script] end $insertion]
+ # 8.5 # lset script {*}$depth $insertion
+ }
+ }
+ #puts "Script: $script"
+ return $script
+}
+
+# compile up magic files or directories of magic files into a single recognizer.
+proc ::fileutil::magic::cfront::compile {args} {
+ set tcl ""
+ set script {}
+ foreach arg $args {
+ if {[file type $arg] == "directory"} {
+ foreach file [glob [file join $arg *]] {
+ set script1 [process $file]
+ eval [linsert $script1 0 lappend script [list file $file]]
+ # 8.5 # lappend script [list file $file] {*}$script1
+
+ #append tcl "magic::file_start $file" \n
+ #append tcl [run $script1] \n
+ }
+ } else {
+ set file $arg
+ set script1 [process $file]
+ eval [linsert $script1 0 lappend script [list file $file]]
+ # 8.5 # lappend script [list file $file] {*}$script1
+
+ #append tcl "magic::file_start $file" \n
+ #append tcl [run $script1] \n
+ }
+ }
+
+ #puts stderr $script
+ ::fileutil::magic::cfront::Debug {puts "\# $args"}
+
+ set t [2tree $script]
+ set tcl [treegen $t root]
+ append tcl "\nreturn \{\}"
+
+ ::fileutil::magic::cfront::Debug {puts [treedump $t]}
+ #set tcl [run $script]
+
+ return $tcl
+}
+
+proc ::fileutil::magic::cfront::procdef {procname args} {
+
+ set pspace [namespace qualifiers $procname]
+
+ if {$pspace eq ""} {
+ return -code error "Cannot generate recognizer in the global namespace"
+ }
+
+ set script {}
+ lappend script "package require fileutil::magic::rt"
+ lappend script "namespace eval [list ${pspace}] \{"
+ lappend script " namespace import ::fileutil::magic::rt::*"
+ lappend script "\}"
+ lappend script ""
+ lappend script [list proc ${procname} {} \n[eval [linsert $args 0 compile]]\n]
+ return [join $script \n]
+}
+
+proc ::fileutil::magic::cfront::install {args} {
+ foreach arg $args {
+ set path [file tail $arg]
+ eval [procdef ::fileutil::magic::/${path}::run $arg]
+ }
+ return
+}
+
+# ### ### ### ######### ######### #########
+## Internal, debugging.
+
+if {!$::fileutil::magic::cfront::debug} {
+ # This procedure definition is optimized out of using code by the
+ # core bcc. It knows that neither argument checks are required,
+ # nor is anything done. So neither results, nor errors are
+ # possible, a true no-operation.
+ proc ::fileutil::magic::cfront::Debug {args} {}
+
+} else {
+ proc ::fileutil::magic::cfront::Debug {script} {
+ # Run the commands in the debug script. This usually generates
+ # some output. The uplevel is required to ensure the proper
+ # resolution of all variables found in the script.
+ uplevel 1 $script
+ return
+ }
+}
+
+#set script [magic::compile {} /usr/share/misc/file/magic]
+#puts "\# types:[array names magic::types]"
+#puts "\# quals:[array names magic::quals]"
+#puts "Script: $script"
+
+# ### ### ### ######### ######### #########
+## Ready for use.
+# EOF