diff options
Diffstat (limited to 'tcllib/modules/pt/include')
125 files changed, 3606 insertions, 0 deletions
diff --git a/tcllib/modules/pt/include/arch_core.dia b/tcllib/modules/pt/include/arch_core.dia new file mode 100644 index 0000000..ac1e034 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 2 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_core.png b/tcllib/modules/pt/include/arch_core.png Binary files differnew file mode 100644 index 0000000..c65da4a --- /dev/null +++ b/tcllib/modules/pt/include/arch_core.png diff --git a/tcllib/modules/pt/include/arch_core_container.dia b/tcllib/modules/pt/include/arch_core_container.dia new file mode 100644 index 0000000..f693ca9 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_container.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 6 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_core_container.png b/tcllib/modules/pt/include/arch_core_container.png Binary files differnew file mode 100644 index 0000000..f577233 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_container.png diff --git a/tcllib/modules/pt/include/arch_core_eplugins.dia b/tcllib/modules/pt/include/arch_core_eplugins.dia new file mode 100644 index 0000000..eac0c95 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_eplugins.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 8 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_core_eplugins.png b/tcllib/modules/pt/include/arch_core_eplugins.png Binary files differnew file mode 100644 index 0000000..9089fb2 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_eplugins.png diff --git a/tcllib/modules/pt/include/arch_core_export.dia b/tcllib/modules/pt/include/arch_core_export.dia new file mode 100644 index 0000000..78228c2 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_export.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 7 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_core_export.png b/tcllib/modules/pt/include/arch_core_export.png Binary files differnew file mode 100644 index 0000000..293cb82 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_export.png diff --git a/tcllib/modules/pt/include/arch_core_import.dia b/tcllib/modules/pt/include/arch_core_import.dia new file mode 100644 index 0000000..76e6c49 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_import.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 5 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_core_import.png b/tcllib/modules/pt/include/arch_core_import.png Binary files differnew file mode 100644 index 0000000..5749efb --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_import.png diff --git a/tcllib/modules/pt/include/arch_core_iplugins.dia b/tcllib/modules/pt/include/arch_core_iplugins.dia new file mode 100644 index 0000000..4b5de6c --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_iplugins.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 4 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_core_iplugins.png b/tcllib/modules/pt/include/arch_core_iplugins.png Binary files differnew file mode 100644 index 0000000..079cce7 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_iplugins.png diff --git a/tcllib/modules/pt/include/arch_core_support.dia b/tcllib/modules/pt/include/arch_core_support.dia new file mode 100644 index 0000000..e24e068 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_support.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 10 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_core_support.png b/tcllib/modules/pt/include/arch_core_support.png Binary files differnew file mode 100644 index 0000000..b33ec99 --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_support.png diff --git a/tcllib/modules/pt/include/arch_core_transform.dia b/tcllib/modules/pt/include/arch_core_transform.dia new file mode 100644 index 0000000..6775bdd --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_transform.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 9 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_core_transform.png b/tcllib/modules/pt/include/arch_core_transform.png Binary files differnew file mode 100644 index 0000000..bc6358d --- /dev/null +++ b/tcllib/modules/pt/include/arch_core_transform.png diff --git a/tcllib/modules/pt/include/arch_support.dia b/tcllib/modules/pt/include/arch_support.dia new file mode 100644 index 0000000..1cb320e --- /dev/null +++ b/tcllib/modules/pt/include/arch_support.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 3 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_support.png b/tcllib/modules/pt/include/arch_support.png Binary files differnew file mode 100644 index 0000000..072ce3a --- /dev/null +++ b/tcllib/modules/pt/include/arch_support.png diff --git a/tcllib/modules/pt/include/arch_user_app.dia b/tcllib/modules/pt/include/arch_user_app.dia new file mode 100644 index 0000000..22ea225 --- /dev/null +++ b/tcllib/modules/pt/include/arch_user_app.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 0 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_user_app.png b/tcllib/modules/pt/include/arch_user_app.png Binary files differnew file mode 100644 index 0000000..66c3af9 --- /dev/null +++ b/tcllib/modules/pt/include/arch_user_app.png diff --git a/tcllib/modules/pt/include/arch_user_pkg.dia b/tcllib/modules/pt/include/arch_user_pkg.dia new file mode 100644 index 0000000..a9090b0 --- /dev/null +++ b/tcllib/modules/pt/include/arch_user_pkg.dia @@ -0,0 +1,4 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set mark 1 +source [file join [file dirname [file normalize [info script]]] architecture.dia] +return diff --git a/tcllib/modules/pt/include/arch_user_pkg.png b/tcllib/modules/pt/include/arch_user_pkg.png Binary files differnew file mode 100644 index 0000000..bb89aac --- /dev/null +++ b/tcllib/modules/pt/include/arch_user_pkg.png diff --git a/tcllib/modules/pt/include/architecture.dia b/tcllib/modules/pt/include/architecture.dia new file mode 100644 index 0000000..399f14b --- /dev/null +++ b/tcllib/modules/pt/include/architecture.dia @@ -0,0 +1,53 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +# Parser Tools Architecture Diagram + +set counter 0 +if {![info exists mark]} { set mark -1 } + +proc xbox {args} { + variable mark + variable counter + + if {$mark == $counter} { + lappend args color red stroke 2 + } + incr counter + return [uplevel 1 [list box {*}$args]] +} + +proc area {label args} { + set E [xbox fillcolor lightyellow {*}$args] + group { + text text $label with nw at [last box nw] + } + return $E +} + +down +set boxwidth [90 mm] +set movelength [5 mm] + +set A [area Applications] +move +set U [area "User Packages"] +move +set C [area "Core Packages" height [90 mm]] +move +set S [area "Support Packages"] + +text at $A "pt" +text at $U "pt::pgen" +text at $S "<general>" +block { + set fillcolor white + set boxwidth [20 mm] + + xbox "json" "peg" height [25 mm] dotted ; up ; arrow stroke 4 + xbox "Import:" ; right ; arrow same <-> + xbox "Container" width [25 mm] ; arrow same + xbox "Export:" ; down ; arrow <- stroke 4 + xbox "json" "peg" "cparam" "tclparam" height [25 mm] dotted ; left ; move + xbox "Execute" "Transform" height [25 mm] width [25 mm] ; up + arrow <-> stroke 4 ; down ; move from [last box s] + xbox "AST / PE / PEG Support" width [75 mm] +} at $C diff --git a/tcllib/modules/pt/include/architecture.png b/tcllib/modules/pt/include/architecture.png Binary files differnew file mode 100644 index 0000000..36b957b --- /dev/null +++ b/tcllib/modules/pt/include/architecture.png diff --git a/tcllib/modules/pt/include/channel_notes.inc b/tcllib/modules/pt/include/channel_notes.inc new file mode 100644 index 0000000..1ad32f3 --- /dev/null +++ b/tcllib/modules/pt/include/channel_notes.inc @@ -0,0 +1,14 @@ +[para] + +Note here that the Parser Tools are based on Tcl 8.5+. In other words, +the channel argument is not restricted to files, sockets, etc. We have +the full power of [term {reflected channels}] available. + +[para] + +It should also be noted that the parser pulls the characters from the +input stream as it needs them. If a parser created by this package has +to be operated in a push aka event-driven manner it will be necessary +to go to Tcl 8.6+ and use the [package coroutine::auto] to wrap it +into a coroutine where [cmd read] is properly changed for +push-operation. diff --git a/tcllib/modules/pt/include/concept.inc b/tcllib/modules/pt/include/concept.inc new file mode 100644 index 0000000..fa96d79 --- /dev/null +++ b/tcllib/modules/pt/include/concept.inc @@ -0,0 +1,22 @@ +[comment { + Description of the concepts used in parsing expression + grammars and how their relate to each other. This is useful + to understand the chosen serialization. +}] +[list_begin enumerated] +[enum] + +A [term {parsing expression grammar}] consists of a +[term {start parsing expression}] and a (possibly empty) list of +[term rules]. + +[enum] +Each rule defines a nonterminal symbol of the grammar, with its name, +semantic mode, and sentennial structure. The latter is provided by a +[term {parsing expression}]. + +[enum] +Each nonterminal symbol is specified at most once, with its name as +the identifying part. + +[list_end] diff --git a/tcllib/modules/pt/include/example/expr_ast.dia b/tcllib/modules/pt/include/example/expr_ast.dia new file mode 100644 index 0000000..21038e5 --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_ast.dia @@ -0,0 +1,44 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set boxwidth [3 cm] +proc achild {args} { arrow <- {*}$args} +proc char {pos x} { + variable movelength + move down [expr {0.5*$movelength}] + box "\"$x\"" textcolor blue + group { + text text $pos with nw at [last box nw] textcolor red + } +} +down +box "Expression 0 4" +achild +box "Factor 0 4" +group { + achild down left left left left ; down + box "Term 0 2" + achild + box "Number 0 2" + group { + achild down left left ; down + box "Digit 0 0" ; char 0 1 + } ; group { + achild down + box "Digit 1 1" ; char 1 2 + } ; group { + achild down right right ; down + box "Digit 2 2" ; char 2 0 + } +} ; group { + achild down down down down down + box "AddOp 3 3" ; char 3 + +} ; group { + achild down right right ; down + box "Term 4 4" + achild + box "Number 4 4" + achild down + box "Digit 4 4" ; char 4 5 +} +line stroke 2 dotted \ + from [0.25 between [6th box nw] [5th box sw ]] \ + to [0.25 between [last box ne] [2nd last box se]] diff --git a/tcllib/modules/pt/include/example/expr_ast.inc b/tcllib/modules/pt/include/example/expr_ast.inc new file mode 100644 index 0000000..1d12780 --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_ast.inc @@ -0,0 +1,19 @@ +[example { +set ast {Expression 0 4 + {Factor 0 4 + {Term 0 2 + {Number 0 2 + {Digit 0 0} + {Digit 1 1} + {Digit 2 2} + } + } + {AddOp 3 3} + {Term 4 4 + {Number 4 4 + {Digit 4 4} + } + } + } +} +}] diff --git a/tcllib/modules/pt/include/example/expr_ast.pic b/tcllib/modules/pt/include/example/expr_ast.pic new file mode 100644 index 0000000..6d09f2c --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_ast.pic @@ -0,0 +1,11 @@ +.nf + +- Digit 0 0 | 1 + | | + +- Term 0 2 --- Number 0 2 -+- Digit 1 1 | 2 + | | | + | +- Digit 2 2 | 0 + | | +Expression 0 4 --- Factor 0 4 -+----------------------------- AddOp 3 3 | + + | | + +- Term 4 4 --- Number 4 4 --- Digit 4 4 | 5 +.fi diff --git a/tcllib/modules/pt/include/example/expr_ast.png b/tcllib/modules/pt/include/example/expr_ast.png Binary files differnew file mode 100644 index 0000000..e33e8f1 --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_ast.png diff --git a/tcllib/modules/pt/include/example/expr_ast.txt b/tcllib/modules/pt/include/example/expr_ast.txt new file mode 100644 index 0000000..f662f8c --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_ast.txt @@ -0,0 +1,9 @@ + +- Digit 0 0 | 1 + | | + +- Term 0 2 --- Number 0 2 -+- Digit 1 1 | 2 + | | | + | +- Digit 2 2 | 0 + | | +Expression 0 4 --- Factor 0 4 -+----------------------------- AddOp 3 3 | + + | | + +- Term 4 4 --- Number 4 4 --- Digit 4 4 | 5 diff --git a/tcllib/modules/pt/include/example/expr_container.inc b/tcllib/modules/pt/include/example/expr_container.inc new file mode 100644 index 0000000..b568459 --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_container.inc @@ -0,0 +1,33 @@ +[example { +snit::type a_pe_grammar { + constructor {} { + install myg using pt::peg::container ${selfns}::G + $myg start {n Expression} + $myg add AddOp Digit Expression Factor MulOp Number Sign Term + $myg modes { + AddOp value + Digit value + Expression value + Factor value + MulOp value + Number value + Sign value + Term value + } + $myg rules { + AddOp {/ {t -} {t +}} + Digit {/ {t 0} {t 1} {t 2} {t 3} {t 4} {t 5} {t 6} {t 7} {t 8} {t 9}} + Expression {/ {x {t \50} {n Expression} {t \51}} {x {n Factor} {* {x {n MulOp} {n Factor}}}}} + Factor {x {n Term} {* {x {n AddOp} {n Term}}}} + MulOp {/ {t *} {t /}} + Number {x {? {n Sign}} {+ {n Digit}}} + Sign {/ {t -} {t +}} + Term {n Number} + } + return + } + + component myg + delegate method * to myg +} +}] diff --git a/tcllib/modules/pt/include/example/expr_json.inc b/tcllib/modules/pt/include/example/expr_json.inc new file mode 100644 index 0000000..45b30f8 --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_json.inc @@ -0,0 +1,41 @@ +[example { +{ + "pt::grammar::peg" : { + "rules" : { + "AddOp" : { + "is" : "\/ {t -} {t +}", + "mode" : "value" + }, + "Digit" : { + "is" : "\/ {t 0} {t 1} {t 2} {t 3} {t 4} {t 5} {t 6} {t 7} {t 8} {t 9}", + "mode" : "value" + }, + "Expression" : { + "is" : "\/ {x {t (} {n Expression} {t )}} {x {n Factor} {* {x {n MulOp} {n Factor}}}}", + "mode" : "value" + }, + "Factor" : { + "is" : "x {n Term} {* {x {n AddOp} {n Term}}}", + "mode" : "value" + }, + "MulOp" : { + "is" : "\/ {t *} {t \/}", + "mode" : "value" + }, + "Number" : { + "is" : "x {? {n Sign}} {+ {n Digit}}", + "mode" : "value" + }, + "Sign" : { + "is" : "\/ {t -} {t +}", + "mode" : "value" + }, + "Term" : { + "is" : "n Number", + "mode" : "value" + } + }, + "start" : "n Expression" + } +} +}] diff --git a/tcllib/modules/pt/include/example/expr_param.inc b/tcllib/modules/pt/include/example/expr_param.inc new file mode 100644 index 0000000..ecfbb3a --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_param.inc @@ -0,0 +1,758 @@ +[example { +# -*- text -*- +# Parsing Expression Grammar 'TEMPLATE'. +# Generated for unknown, from file 'TEST' + +# +# Grammar Start Expression +# + +<<MAIN>>: + call sym_Expression + halt + +# +# value Symbol 'AddOp' +# + +sym_AddOp: +# / +# '-' +# '+' + + symbol_restore AddOp + found! jump found_7 + loc_push + + call choice_5 + + fail! value_clear + ok! value_leaf AddOp + symbol_save AddOp + error_nonterminal AddOp + loc_pop_discard + +found_7: + ok! ast_value_push + return + +choice_5: +# / +# '-' +# '+' + + error_clear + + loc_push + error_push + + input_next "t -" + ok! test_char "-" + + error_pop_merge + ok! jump oknoast_4 + + loc_pop_rewind + loc_push + error_push + + input_next "t +" + ok! test_char "+" + + error_pop_merge + ok! jump oknoast_4 + + loc_pop_rewind + status_fail + return + +oknoast_4: + loc_pop_discard + return +# +# value Symbol 'Digit' +# + +sym_Digit: +# / +# '0' +# '1' +# '2' +# '3' +# '4' +# '5' +# '6' +# '7' +# '8' +# '9' + + symbol_restore Digit + found! jump found_22 + loc_push + + call choice_20 + + fail! value_clear + ok! value_leaf Digit + symbol_save Digit + error_nonterminal Digit + loc_pop_discard + +found_22: + ok! ast_value_push + return + +choice_20: +# / +# '0' +# '1' +# '2' +# '3' +# '4' +# '5' +# '6' +# '7' +# '8' +# '9' + + error_clear + + loc_push + error_push + + input_next "t 0" + ok! test_char "0" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + loc_push + error_push + + input_next "t 1" + ok! test_char "1" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + loc_push + error_push + + input_next "t 2" + ok! test_char "2" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + loc_push + error_push + + input_next "t 3" + ok! test_char "3" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + loc_push + error_push + + input_next "t 4" + ok! test_char "4" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + loc_push + error_push + + input_next "t 5" + ok! test_char "5" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + loc_push + error_push + + input_next "t 6" + ok! test_char "6" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + loc_push + error_push + + input_next "t 7" + ok! test_char "7" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + loc_push + error_push + + input_next "t 8" + ok! test_char "8" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + loc_push + error_push + + input_next "t 9" + ok! test_char "9" + + error_pop_merge + ok! jump oknoast_19 + + loc_pop_rewind + status_fail + return + +oknoast_19: + loc_pop_discard + return +# +# value Symbol 'Expression' +# + +sym_Expression: +# / +# x +# '\(' +# (Expression) +# '\)' +# x +# (Factor) +# * +# x +# (MulOp) +# (Factor) + + symbol_restore Expression + found! jump found_46 + loc_push + ast_push + + call choice_44 + + fail! value_clear + ok! value_reduce Expression + symbol_save Expression + error_nonterminal Expression + ast_pop_rewind + loc_pop_discard + +found_46: + ok! ast_value_push + return + +choice_44: +# / +# x +# '\(' +# (Expression) +# '\)' +# x +# (Factor) +# * +# x +# (MulOp) +# (Factor) + + error_clear + + ast_push + loc_push + error_push + + call sequence_27 + + error_pop_merge + ok! jump ok_43 + + ast_pop_rewind + loc_pop_rewind + ast_push + loc_push + error_push + + call sequence_40 + + error_pop_merge + ok! jump ok_43 + + ast_pop_rewind + loc_pop_rewind + status_fail + return + +ok_43: + ast_pop_discard + loc_pop_discard + return + +sequence_27: +# x +# '\(' +# (Expression) +# '\)' + + loc_push + error_clear + + error_push + + input_next "t (" + ok! test_char "(" + + error_pop_merge + fail! jump failednoast_29 + ast_push + error_push + + call sym_Expression + + error_pop_merge + fail! jump failed_28 + error_push + + input_next "t )" + ok! test_char ")" + + error_pop_merge + fail! jump failed_28 + + ast_pop_discard + loc_pop_discard + return + +failed_28: + ast_pop_rewind + +failednoast_29: + loc_pop_rewind + return + +sequence_40: +# x +# (Factor) +# * +# x +# (MulOp) +# (Factor) + + ast_push + loc_push + error_clear + + error_push + + call sym_Factor + + error_pop_merge + fail! jump failed_41 + error_push + + call kleene_37 + + error_pop_merge + fail! jump failed_41 + + ast_pop_discard + loc_pop_discard + return + +failed_41: + ast_pop_rewind + loc_pop_rewind + return + +kleene_37: +# * +# x +# (MulOp) +# (Factor) + + loc_push + error_push + + call sequence_34 + + error_pop_merge + fail! jump failed_38 + loc_pop_discard + jump kleene_37 + +failed_38: + loc_pop_rewind + status_ok + return + +sequence_34: +# x +# (MulOp) +# (Factor) + + ast_push + loc_push + error_clear + + error_push + + call sym_MulOp + + error_pop_merge + fail! jump failed_35 + error_push + + call sym_Factor + + error_pop_merge + fail! jump failed_35 + + ast_pop_discard + loc_pop_discard + return + +failed_35: + ast_pop_rewind + loc_pop_rewind + return +# +# value Symbol 'Factor' +# + +sym_Factor: +# x +# (Term) +# * +# x +# (AddOp) +# (Term) + + symbol_restore Factor + found! jump found_60 + loc_push + ast_push + + call sequence_57 + + fail! value_clear + ok! value_reduce Factor + symbol_save Factor + error_nonterminal Factor + ast_pop_rewind + loc_pop_discard + +found_60: + ok! ast_value_push + return + +sequence_57: +# x +# (Term) +# * +# x +# (AddOp) +# (Term) + + ast_push + loc_push + error_clear + + error_push + + call sym_Term + + error_pop_merge + fail! jump failed_58 + error_push + + call kleene_54 + + error_pop_merge + fail! jump failed_58 + + ast_pop_discard + loc_pop_discard + return + +failed_58: + ast_pop_rewind + loc_pop_rewind + return + +kleene_54: +# * +# x +# (AddOp) +# (Term) + + loc_push + error_push + + call sequence_51 + + error_pop_merge + fail! jump failed_55 + loc_pop_discard + jump kleene_54 + +failed_55: + loc_pop_rewind + status_ok + return + +sequence_51: +# x +# (AddOp) +# (Term) + + ast_push + loc_push + error_clear + + error_push + + call sym_AddOp + + error_pop_merge + fail! jump failed_52 + error_push + + call sym_Term + + error_pop_merge + fail! jump failed_52 + + ast_pop_discard + loc_pop_discard + return + +failed_52: + ast_pop_rewind + loc_pop_rewind + return +# +# value Symbol 'MulOp' +# + +sym_MulOp: +# / +# '*' +# '/' + + symbol_restore MulOp + found! jump found_67 + loc_push + + call choice_65 + + fail! value_clear + ok! value_leaf MulOp + symbol_save MulOp + error_nonterminal MulOp + loc_pop_discard + +found_67: + ok! ast_value_push + return + +choice_65: +# / +# '*' +# '/' + + error_clear + + loc_push + error_push + + input_next "t *" + ok! test_char "*" + + error_pop_merge + ok! jump oknoast_64 + + loc_pop_rewind + loc_push + error_push + + input_next "t /" + ok! test_char "/" + + error_pop_merge + ok! jump oknoast_64 + + loc_pop_rewind + status_fail + return + +oknoast_64: + loc_pop_discard + return +# +# value Symbol 'Number' +# + +sym_Number: +# x +# ? +# (Sign) +# + +# (Digit) + + symbol_restore Number + found! jump found_80 + loc_push + ast_push + + call sequence_77 + + fail! value_clear + ok! value_reduce Number + symbol_save Number + error_nonterminal Number + ast_pop_rewind + loc_pop_discard + +found_80: + ok! ast_value_push + return + +sequence_77: +# x +# ? +# (Sign) +# + +# (Digit) + + ast_push + loc_push + error_clear + + error_push + + call optional_70 + + error_pop_merge + fail! jump failed_78 + error_push + + call poskleene_73 + + error_pop_merge + fail! jump failed_78 + + ast_pop_discard + loc_pop_discard + return + +failed_78: + ast_pop_rewind + loc_pop_rewind + return + +optional_70: +# ? +# (Sign) + + loc_push + error_push + + call sym_Sign + + error_pop_merge + fail! loc_pop_rewind + ok! loc_pop_discard + status_ok + return + +poskleene_73: +# + +# (Digit) + + loc_push + + call sym_Digit + + fail! jump failed_74 + +loop_75: + loc_pop_discard + loc_push + error_push + + call sym_Digit + + error_pop_merge + ok! jump loop_75 + status_ok + +failed_74: + loc_pop_rewind + return +# +# value Symbol 'Sign' +# + +sym_Sign: +# / +# '-' +# '+' + + symbol_restore Sign + found! jump found_86 + loc_push + + call choice_5 + + fail! value_clear + ok! value_leaf Sign + symbol_save Sign + error_nonterminal Sign + loc_pop_discard + +found_86: + ok! ast_value_push + return +# +# value Symbol 'Term' +# + +sym_Term: +# (Number) + + symbol_restore Term + found! jump found_89 + loc_push + ast_push + + call sym_Number + + fail! value_clear + ok! value_reduce Term + symbol_save Term + error_nonterminal Term + ast_pop_rewind + loc_pop_discard + +found_89: + ok! ast_value_push + return + +# +# +}] diff --git a/tcllib/modules/pt/include/example/expr_pe.inc b/tcllib/modules/pt/include/example/expr_pe.inc new file mode 100644 index 0000000..236819f --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_pe.inc @@ -0,0 +1,3 @@ +[example { + Expression <- Term (AddOp Term)* +}] diff --git a/tcllib/modules/pt/include/example/expr_pe_serial.inc b/tcllib/modules/pt/include/example/expr_pe_serial.inc new file mode 100644 index 0000000..6c98abd --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_pe_serial.inc @@ -0,0 +1,3 @@ +[example { + {x {n Term} {* {x {n AddOp} {n Term}}}} +}] diff --git a/tcllib/modules/pt/include/example/expr_peg.inc b/tcllib/modules/pt/include/example/expr_peg.inc new file mode 100644 index 0000000..0437c32 --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_peg.inc @@ -0,0 +1,12 @@ +[example { +PEG calculator (Expression) + Digit <- '0'/'1'/'2'/'3'/'4'/'5'/'6'/'7'/'8'/'9' ; + Sign <- '-' / '+' ; + Number <- Sign? Digit+ ; + Expression <- Term (AddOp Term)* ; + MulOp <- '*' / '/' ; + Term <- Factor (MulOp Factor)* ; + AddOp <- '+'/'-' ; + Factor <- '(' Expression ')' / Number ; +END; +}] diff --git a/tcllib/modules/pt/include/example/expr_peg_compact.inc b/tcllib/modules/pt/include/example/expr_peg_compact.inc new file mode 100644 index 0000000..5c93557 --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_peg_compact.inc @@ -0,0 +1,11 @@ +[example { +PEG calculator (Expression) + Sign <- [-+] ; + Number <- Sign? <ddigit>+ ; + Expression <- '(' Expression ')' / (Factor (MulOp Factor)*) ; + MulOp <- [*/] ; + Factor <- Term (AddOp Term)* ; + AddOp <- [-+] ; + Term <- Number ; +END; +}] diff --git a/tcllib/modules/pt/include/example/expr_ptgen.inc b/tcllib/modules/pt/include/example/expr_ptgen.inc new file mode 100644 index 0000000..2b7a910 --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_ptgen.inc @@ -0,0 +1,49 @@ +[example { +> tclsh8.5 +% package require pt::pgen +% puts ====\n[pt::pgen peg { + PEG calculator (Expression) + Digit <- '0'/'1'/'2'/'3'/'4'/'5'/'6'/'7'/'8'/'9' ; + Sign <- '-' / '+' ; + Number <- Sign? Digit+ ; + Expression <- '(' Expression ')' / (Factor (MulOp Factor)*) ; + MulOp <- '*' / '/' ; + Factor <- Term (AddOp Term)* ; + AddOp <- '+'/'-' ; + Term <- Number ; + END; + } container -name basic_arithmetic] +==== +snit::type basic_arithmetic { + constructor {} { + install myg using pt::peg::container ${selfns}::G + $myg start {n Expression} + $myg add AddOp Digit Expression Factor MulOp Number Sign Term + $myg modes { + AddOp value + Digit value + Expression value + Factor value + MulOp value + Number value + Sign value + Term value + } + $myg rules { + AddOp {/ {t -} {t +}} + Digit {/ {t 0} {t 1} {t 2} {t 3} {t 4} {t 5} {t 6} {t 7} {t 8} {t 9}} + Expression {/ {x {t \50} {n Expression} {t \51}} {x {n Factor} {* {x {n MulOp} {n Factor}}}}} + Factor {x {n Term} {* {x {n AddOp} {n Term}}}} + MulOp {/ {t *} {t /}} + Number {x {? {n Sign}} {+ {n Digit}}} + Sign {/ {t -} {t +}} + Term {n Number} + } + return + } + + component myg + delegate method * to myg +} +% +}] diff --git a/tcllib/modules/pt/include/example/expr_ptgenb.inc b/tcllib/modules/pt/include/example/expr_ptgenb.inc new file mode 100644 index 0000000..f16d64d --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_ptgenb.inc @@ -0,0 +1,11 @@ +[example { +package require Tcl 8.5 +package require fileutil +package require pt::pgen + +lassign $argv name +set grammar [fileutil::cat $name.peg] +set pclass [pt::pgen peg $gr snit -class $name -file $name.peg -name $name] +fileutil::writeFile $name.tcl $pclass +exit 0 +}] diff --git a/tcllib/modules/pt/include/example/expr_serial.inc b/tcllib/modules/pt/include/example/expr_serial.inc new file mode 100644 index 0000000..91cac81 --- /dev/null +++ b/tcllib/modules/pt/include/example/expr_serial.inc @@ -0,0 +1,15 @@ +[example { +pt::grammar::peg { + rules { + AddOp {is {/ {t -} {t +}} mode value} + Digit {is {/ {t 0} {t 1} {t 2} {t 3} {t 4} {t 5} {t 6} {t 7} {t 8} {t 9}} mode value} + Expression {is {x {n Term} {* {x {n AddOp} {n Term}}}} mode value} + Factor {is {/ {x {t (} {n Expression} {t )}} {n Number}} mode value} + MulOp {is {/ {t *} {t /}} mode value} + Number {is {x {? {n Sign}} {+ {n Digit}}} mode value} + Sign {is {/ {t -} {t +}} mode value} + Term {is {x {n Factor} {* {x {n MulOp} {n Factor}}}} mode value} + } + start {n Expression} +} +}] diff --git a/tcllib/modules/pt/include/example/flow.dia b/tcllib/modules/pt/include/example/flow.dia new file mode 100644 index 0000000..b05fbf0 --- /dev/null +++ b/tcllib/modules/pt/include/example/flow.dia @@ -0,0 +1,5 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 + +ellipse "Grammar" ; arrow ; box "PT generate" width [3 cm] ; arrow +diamond "parser code" height [2 cm] ; down ; arrow ; box "tclsh" ; right +arrow ; ellipse "AST" ; arrow <- from [last box w] left ; ellipse "Text" diff --git a/tcllib/modules/pt/include/example/flow.png b/tcllib/modules/pt/include/example/flow.png Binary files differnew file mode 100644 index 0000000..10a5448 --- /dev/null +++ b/tcllib/modules/pt/include/example/flow.png diff --git a/tcllib/modules/pt/include/example/full.inc b/tcllib/modules/pt/include/example/full.inc new file mode 100644 index 0000000..8c1698a --- /dev/null +++ b/tcllib/modules/pt/include/example/full.inc @@ -0,0 +1,54 @@ + +In this section we are working a complete example, starting with a PEG +grammar and ending with running the parser generated from it over some +input, following the outline shown in the figure below: + +[para][image flow][para] + +Our grammar, assumed to the stored in the file [file calculator.peg] +is + +[include expr_peg.inc] + +From this we create a snit-based parser + +[include full_[vset MODE].inc] + +which leaves us with the parser package and class written to the file +[file calculator.tcl]. + +Assuming that this package is then properly installed in a place where +Tcl can find it we can now use this class via a script like + +[include parser_use.inc] + +where the abstract syntax tree stored in the variable will look like + +[para][include expr_ast.inc][para] + +assuming that the input file and channel contained the text + +[example { 120+5 }] + +A more graphical representation of the tree would be + +[para][image expr_ast][para] + +Regardless, at this point it is the user's responsibility to work with +the tree to reach whatever goal she desires. I.e. analyze it, +transform it, etc. The package [package pt::ast] should be of help +here, providing commands to walk such ASTs structures in various ways. + +[para] + +One important thing to note is that the parsers used here return a +data structure representing the structure of the input per the grammar +underlying the parser. There are [emph no] callbacks during the +parsing process, i.e. no [term {parsing actions}], as most other +parsers will have. + +[para] + +Going back to the last snippet of code, the execution of the parser +for some input, note how the parser instance follows the specified +[term {Parser API}]. diff --git a/tcllib/modules/pt/include/example/full_app.inc b/tcllib/modules/pt/include/example/full_app.inc new file mode 100644 index 0000000..3f0fef6 --- /dev/null +++ b/tcllib/modules/pt/include/example/full_app.inc @@ -0,0 +1,5 @@ +via + +[example { +pt generate snit calculator.tcl -class calculator -name calculator peg calculator.peg +}] diff --git a/tcllib/modules/pt/include/example/full_pkg.inc b/tcllib/modules/pt/include/example/full_pkg.inc new file mode 100644 index 0000000..a0ad70b --- /dev/null +++ b/tcllib/modules/pt/include/example/full_pkg.inc @@ -0,0 +1,7 @@ +using the script [file gen] + +[include expr_ptgenb.inc] + +calling it like + +[example { tclsh8.5 gen calculator }] diff --git a/tcllib/modules/pt/include/example/parser_use.inc b/tcllib/modules/pt/include/example/parser_use.inc new file mode 100644 index 0000000..266740a --- /dev/null +++ b/tcllib/modules/pt/include/example/parser_use.inc @@ -0,0 +1,13 @@ +[example { + package require calculator + + lassign $argv input + set channel [open $input r] + + set parser [calculator] + set ast [$parser parse $channel] + $parser destroy + close $channel + + ... now process the returned abstract syntax tree ... +}] diff --git a/tcllib/modules/pt/include/export/config/container.inc b/tcllib/modules/pt/include/export/config/container.inc new file mode 100644 index 0000000..13b89b1 --- /dev/null +++ b/tcllib/modules/pt/include/export/config/container.inc @@ -0,0 +1,78 @@ + +[section Configuration] + +The CONTAINER export plugin recognizes the following configuration +variables and changes its behaviour as they specify. + +[list_begin arguments] +[arg_def enum mode] + +The value of this configuration variable controls which methods of +[package pt::peg] instances the plugin will use to specify the +grammar. There are two legal values + +[list_begin definitions] +[def [const bulk]] + +In this mode the methods [method start], [method add], [method modes], +and [method rules] are used to specify the grammar in a bulk manner, +i.e. as a set of nonterminal symbols, and two dictionaries mapping +from the symbols to their semantic modes and parsing expressions. + +[para] + +This mode is the default. + +[def [const incremental]] + +In this mode the methods [method start], [method add], [method mode], +and [method rule] are used to specify the grammar piecemal, with each +nonterminal having its own block of defining commands. + +[list_end] + +[arg_def string template] + +If this configuration variable is set it is assumed to contain a +string into which to put the generated code and other configuration +data. The various locations are expected to be specified with the +following placeholders: + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the configuration variable [option user]. + +[def [const @format@]] +To be replaced with the the constant [const CONTAINER]. + +[def [const @file@]] +To be replaced with the value of the configuration variable [option file]. + +[def [const @name@]] +To be replaced with the value of the configuration variable [option name]. + +[def [const @mode@]] +To be replaced with the value of the configuration variable [option mode]. + +[def [const @code@]] +To be replaced with the generated code. + +[list_end] + +[para] + +If this configuration variable is not set, or empty, then the plugin +falls back to a standard template, which is defined as "[const @code@]". + +[list_end] + +[emph Note] that this plugin may ignore the standard configuration +variables [var user], [var format], [var file], and their values, +depending on the chosen template. + +[para] + +The content of the standard configuration variable [var name], if set, +is used as name of the grammar in the output. Otherwise the plugin +falls back to the default name [const a_pe_grammar]. diff --git a/tcllib/modules/pt/include/export/config/cparam.inc b/tcllib/modules/pt/include/export/config/cparam.inc new file mode 100644 index 0000000..afe817f --- /dev/null +++ b/tcllib/modules/pt/include/export/config/cparam.inc @@ -0,0 +1,80 @@ +[section Configuration] + +The C/PARAM export plugin recognizes the following configuration +variables and changes its behaviour as they specify. + +[list_begin arguments] + +[arg_def string template] + +If this configuration variable is set it is assumed to contain a +string into which to put the generated code and other configuration +data. The various locations are expected to be specified with the +following placeholders: + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the configuration variable [option user]. + +[def [const @format@]] +To be replaced with the the constant [const C/PARAM]. + +[def [const @file@]] +To be replaced with the value of the configuration variable [option file]. + +[def [const @name@]] +To be replaced with the value of the configuration variable [option name]. + +[def [const @code@]] +To be replaced with the generated C code. + +[list_end] + +The following configuration variables are special, in that they will +occur within the generated code, and are replaced there as well. + +[list_begin definitions] + +[def [const @statedecl@]] +To be replaced with the value of the configuration variable [option state-decl]. + +[def [const @stateref@]] +To be replaced with the value of the configuration variable [option state-ref]. + +[def [const @strings@]] +To be replaced with the value of the configuration variable [option string-varname]. + +[def [const @self@]] +To be replaced with the value of the configuration variable [option self-command]. + +[def [const @def@]] +To be replaced with the value of the configuration variable [option fun-qualifier]. + +[def [const @ns@]] +To be replaced with the value of the configuration variable [option namespace]. + +[def [const @main@]] +To be replaced with the value of the configuration variable [option main]. + +[def [const @prelude@]] +To be replaced with the value of the configuration variable [option prelude]. + +[list_end] + +[para] + +If this configuration variable is not set, or empty, then the plugin +falls back to a standard template, which is defined as "[const @code@]". + +[list_end] + +[emph Note] that this plugin may ignore the standard configuration +variables [var user], [var format], [var file], and their values, +depending on the chosen template. + +[para] + +The content of the standard configuration variable [var name], if set, +is used as name of the grammar in the output. Otherwise the plugin +falls back to the default name [const a_pe_grammar]. diff --git a/tcllib/modules/pt/include/export/config/json.inc b/tcllib/modules/pt/include/export/config/json.inc new file mode 100644 index 0000000..0b0053c --- /dev/null +++ b/tcllib/modules/pt/include/export/config/json.inc @@ -0,0 +1,36 @@ +[section Configuration] + +The JSON export plugin recognizes the following configuration +variables and changes its behaviour as they specify. + +[list_begin arguments] +[arg_def boolean indented] + +If this flag is set the plugin will break the generated JSON code +across lines and indent it according to its inner structure, with each +key of a dictionary on a separate line. + +[para] + +If this flag is not set (the default), the whole JSON object will be +written on a single line, with minimum spacing between all elements. + + +[arg_def boolean aligned] + +If this flag is set the generator ensures that the values for the keys +in a dictionary are vertically aligned with each other, for a nice +table effect. To make this work this also implies that [var indented] +is set. + +[para] + +If this flag is not set (the default), the output is formatted as per +the value of [var indented], without trying to align the values for +dictionary keys. + +[list_end] + +[emph Note] that this plugin ignores the standard configuration +variables [var user], [var format], [var file], and [var name], and +their values. diff --git a/tcllib/modules/pt/include/export/config/param.inc b/tcllib/modules/pt/include/export/config/param.inc new file mode 100644 index 0000000..2c3d911 --- /dev/null +++ b/tcllib/modules/pt/include/export/config/param.inc @@ -0,0 +1,49 @@ +[section Configuration] + +The PARAM export plugin recognizes the following configuration +variables and changes its behaviour as they specify. + +[list_begin arguments] + +[arg_def string template] + +If this configuration variable is set it is assumed to contain a +string into which to put the generated text and other configuration +data. The various locations are expected to be specified with the +following placeholders: + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the configuration variable [option user]. + +[def [const @format@]] +To be replaced with the the constant [const PARAM]. + +[def [const @file@]] +To be replaced with the value of the configuration variable [option file]. + +[def [const @name@]] +To be replaced with the value of the configuration variable [option name]. + +[def [const @code@]] +To be replaced with the generated text. + +[list_end] + +[para] + +If this configuration variable is not set, or empty, then the plugin +falls back to a standard template, which is defined as "[const @code@]". + +[list_end] + +[emph Note] that this plugin may ignore the standard configuration +variables [var user], [var format], [var file], and their values, +depending on the chosen template. + +[para] + +The content of the standard configuration variable [var name], if set, +is used as name of the grammar in the output. Otherwise the plugin +falls back to the default name [const a_pe_grammar]. diff --git a/tcllib/modules/pt/include/export/config/peg.inc b/tcllib/modules/pt/include/export/config/peg.inc new file mode 100644 index 0000000..9f50e5f --- /dev/null +++ b/tcllib/modules/pt/include/export/config/peg.inc @@ -0,0 +1,49 @@ +[section Configuration] + +The PEG export plugin recognizes the following configuration +variables and changes its behaviour as they specify. + +[list_begin arguments] + +[arg_def string template] + +If this configuration variable is set it is assumed to contain a +string into which to put the generated text and other configuration +data. The various locations are expected to be specified with the +following placeholders: + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the configuration variable [option user]. + +[def [const @format@]] +To be replaced with the the constant [const PEG]. + +[def [const @file@]] +To be replaced with the value of the configuration variable [option file]. + +[def [const @name@]] +To be replaced with the value of the configuration variable [option name]. + +[def [const @code@]] +To be replaced with the generated text. + +[list_end] + +[para] + +If this configuration variable is not set, or empty, then the plugin +falls back to a standard template, which is defined as "[const @code@]". + +[list_end] + +[emph Note] that this plugin may ignore the standard configuration +variables [var user], [var format], [var file], and their values, +depending on the chosen template. + +[para] + +The content of the standard configuration variable [var name], if set, +is used as name of the grammar in the output. Otherwise the plugin +falls back to the default name [const a_pe_grammar]. diff --git a/tcllib/modules/pt/include/export/config/tclparam.inc b/tcllib/modules/pt/include/export/config/tclparam.inc new file mode 100644 index 0000000..cec07e8 --- /dev/null +++ b/tcllib/modules/pt/include/export/config/tclparam.inc @@ -0,0 +1,74 @@ +[section Configuration] + +The Tcl/PARAM export plugin recognizes the following configuration +variables and changes its behaviour as they specify. + +[list_begin arguments] + +[arg_def string template] + +If this configuration variable is set it is assumed to contain a +string into which to put the generated code and other configuration +data. The various locations are expected to be specified with the +following placeholders: + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the configuration variable [option user]. + +[def [const @format@]] +To be replaced with the the constant [const Tcl/PARAM]. + +[def [const @file@]] +To be replaced with the value of the configuration variable [option file]. + +[def [const @name@]] +To be replaced with the value of the configuration variable [option name]. + +[def [const @code@]] +To be replaced with the generated Tcl code. + +[list_end] + +The following configuration variables are special, in that they will +occur within the generated code, and are replaced there as well. + +[list_begin definitions] + +[def [const @runtime@]] +To be replaced with the value of the configuration variable [option runtime-command]. + +[def [const @self@]] +To be replaced with the value of the configuration variable [option self-command]. + +[def [const @def@]] +To be replaced with the value of the configuration variable [option proc-command]. + +[def [const @ns@]] +To be replaced with the value of the configuration variable [option namespace]. + +[def [const @main@]] +To be replaced with the value of the configuration variable [option main]. + +[def [const @prelude@]] +To be replaced with the value of the configuration variable [option prelude]. + +[list_end] + +[para] + +If this configuration variable is not set, or empty, then the plugin +falls back to a standard template, which is defined as "[const @code@]". + +[list_end] + +[emph Note] that this plugin may ignore the standard configuration +variables [var user], [var format], [var file], and their values, +depending on the chosen template. + +[para] + +The content of the standard configuration variable [var name], if set, +is used as name of the grammar in the output. Otherwise the plugin +falls back to the default name [const a_pe_grammar]. diff --git a/tcllib/modules/pt/include/export/config/to_container.inc b/tcllib/modules/pt/include/export/config/to_container.inc new file mode 100644 index 0000000..d6a32a6 --- /dev/null +++ b/tcllib/modules/pt/include/export/config/to_container.inc @@ -0,0 +1,7 @@ + +[section "[vset TPREFIX]Options"] + +The converter to the CONTAINER format recognizes the following options +and changes its behaviour as they specify. + +[include ../../format/options_container.inc] diff --git a/tcllib/modules/pt/include/export/config/to_cparam.inc b/tcllib/modules/pt/include/export/config/to_cparam.inc new file mode 100644 index 0000000..7955732 --- /dev/null +++ b/tcllib/modules/pt/include/export/config/to_cparam.inc @@ -0,0 +1,25 @@ + +[section Options] + +The converter to C code recognizes the following configuration +variables and changes its behaviour as they specify. + +[include ../../format/options_cparam_rawc.inc] + +While the high parameterizability of this converter, as shown by the +multitude of options it supports, is an advantage to the advanced +user, allowing her to customize the output of the converter as needed, +a novice user will likely not see the forest for the trees. + +[para] + +To help these latter users an adjunct package is provided, containing +a canned configuration which will generate immediately useful full +parsers. It is + +[list_begin definitions] +[def [package pt::cparam::configuration::critcl]] + +Generated parsers are embedded into a [package Critcl]-based framework. + +[list_end] diff --git a/tcllib/modules/pt/include/export/config/to_json.inc b/tcllib/modules/pt/include/export/config/to_json.inc new file mode 100644 index 0000000..dce6a17 --- /dev/null +++ b/tcllib/modules/pt/include/export/config/to_json.inc @@ -0,0 +1,8 @@ + +[section "[vset TPREFIX]Options"] + +The converter to the JSON grammar exchange format recognizes the +following configuration variables and changes its behaviour as they +specify. + +[include ../../format/options_json.inc] diff --git a/tcllib/modules/pt/include/export/config/to_param.inc b/tcllib/modules/pt/include/export/config/to_param.inc new file mode 100644 index 0000000..091bed2 --- /dev/null +++ b/tcllib/modules/pt/include/export/config/to_param.inc @@ -0,0 +1,51 @@ + +[section "[vset TPREFIX]Options"] + +The converter to PARAM markup recognizes the following configuration +variables and changes its behaviour as they specify. + +[list_begin options] + +[opt_def -template string] + +The value of this configuration variable is a string into which to put +the generated text and the other configuration settings. The various +locations for user-data are expected to be specified with the +placeholders listed below. The default value is "[const @code@]". + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the configuration variable [option -user]. + +[def [const @format@]] +To be replaced with the the constant [const PARAM]. + +[def [const @file@]] +To be replaced with the value of the configuration variable [option -file]. + +[def [const @name@]] +To be replaced with the value of the configuration variable [option -name]. + +[def [const @code@]] +To be replaced with the generated text. + +[list_end] + +[opt_def -name string] + +The value of this configuration variable is the name of the grammar +for which the conversion is run. The default value is [const a_pe_grammar]. + +[opt_def -user string] + +The value of this configuration variable is the name of the user for +which the conversion is run. The default value is [const unknown]. + +[opt_def -file string] + +The value of this configuration variable is the name of the file or +other entity from which the grammar came, for which the conversion is +run. The default value is [const unknown]. + +[list_end] diff --git a/tcllib/modules/pt/include/export/config/to_peg.inc b/tcllib/modules/pt/include/export/config/to_peg.inc new file mode 100644 index 0000000..d99609a --- /dev/null +++ b/tcllib/modules/pt/include/export/config/to_peg.inc @@ -0,0 +1,7 @@ + +[section "[vset TPREFIX]Options"] + +The converter to the PEG language recognizes the following options and +changes its behaviour as they specify. + +[include ../../format/options_peg.inc] diff --git a/tcllib/modules/pt/include/export/config/to_tclparam.inc b/tcllib/modules/pt/include/export/config/to_tclparam.inc new file mode 100644 index 0000000..b09c5b2 --- /dev/null +++ b/tcllib/modules/pt/include/export/config/to_tclparam.inc @@ -0,0 +1,156 @@ + +[section Options] + +The converter to Tcl/PARAM markup recognizes the following +configuration variables and changes its behaviour as they specify. + +[list_begin options] + +[opt_def -template string] + +The value of this configuration variable is a string into which to put +the generated text and the other configuration settings. The various +locations for user-data are expected to be specified with the +placeholders listed below. The default value is "[const @code@]". + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the configuration variable [option -user]. + +[def [const @format@]] +To be replaced with the the constant [const Tcl/PARAM]. + +[def [const @file@]] +To be replaced with the value of the configuration variable [option -file]. + +[def [const @name@]] +To be replaced with the value of the configuration variable [option -name]. + +[def [const @code@]] +To be replaced with the generated Tcl code. + +[list_end] + +The following configuration variables are special, in that they will +occur within the generated code, and are replaced there as well. + +[list_begin definitions] + +[def [const @runtime@]] +To be replaced with the value of the configuration variable [option runtime-command]. + +[def [const @self@]] +To be replaced with the value of the configuration variable [option self-command]. + +[def [const @def@]] +To be replaced with the value of the configuration variable [option proc-command]. + +[def [const @ns@]] +To be replaced with the value of the configuration variable [option namespace]. + +[def [const @main@]] +To be replaced with the value of the configuration variable [option main]. + +[def [const @prelude@]] +To be replaced with the value of the configuration variable [option prelude]. + +[list_end] + +[opt_def -name string] + +The value of this configuration variable is the name of the grammar +for which the conversion is run. The default value is [const a_pe_grammar]. + +[opt_def -user string] + +The value of this configuration variable is the name of the user for +which the conversion is run. The default value is [const unknown]. + +[opt_def -file string] + +The value of this configuration variable is the name of the file or +other entity from which the grammar came, for which the conversion is +run. The default value is [const unknown]. + + +[opt_def -runtime-command string] + +A Tcl string representing the Tcl command or reference to it used to +call PARAM instruction from parser procedures, per the chosen +framework (template). + +The default value is the empty string. + + +[opt_def -self-command string] + +A Tcl string representing the Tcl command or reference to it used to +call the parser procedures (methods ...) from another parser +procedure, per the chosen framework (template). + +The default value is the empty string. + + +[opt_def -proc-command string] + +The name of the Tcl command used to define procedures (methods ...), +per the chosen framework (template). +The default value is [const proc]. + + +[opt_def -namespace string] + +The name of the namespace the parser procedures (methods, ...) shall +reside in, including the trailing '::' needed to separate it from the +actual procedure name. + +The default value is [const ::]. + + +[opt_def -main string] + +The name of the main procedure (method, ...) to be called by the +chosen framework (template) to start parsing input. + +The default value is [const __main]. + + +[opt_def -prelude string] + +A snippet of code to be insert at the head of each generated parsing +command. + +The default value is the empty string. + + +[opt_def -indent integer] + +The number of characters to indent each line of the generated code by. + +The default value is [const 0]. + +[list_end] + +While the high parameterizability of this converter, as shown by the +multitude of options it supports, is an advantage to the advanced +user, allowing her to customize the output of the converter as needed, +a novice user will likely not see the forest for the trees. + +[para] + +To help these latter users two adjunct packages are provided, each +containing a canned configuration which will generate immediately +useful full parsers. These are + +[list_begin definitions] +[def [package pt::tclparam::configuration::snit]] + +Generated parsers are classes based on the [package snit] package, +i.e. snit::type's. + +[def [package pt::tclparam::configuration::tcloo]] + +Generated parsers are classes based on the [package OO] package. + +[list_end] diff --git a/tcllib/modules/pt/include/export/format/container.inc b/tcllib/modules/pt/include/export/format/container.inc new file mode 100644 index 0000000..5b2cc68 --- /dev/null +++ b/tcllib/modules/pt/include/export/format/container.inc @@ -0,0 +1,3 @@ +[require pt::peg] +[require text::write] +[require char] diff --git a/tcllib/modules/pt/include/export/format/cparam.inc b/tcllib/modules/pt/include/export/format/cparam.inc new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tcllib/modules/pt/include/export/format/cparam.inc diff --git a/tcllib/modules/pt/include/export/format/json.inc b/tcllib/modules/pt/include/export/format/json.inc new file mode 100644 index 0000000..f868e55 --- /dev/null +++ b/tcllib/modules/pt/include/export/format/json.inc @@ -0,0 +1,2 @@ +[require pt::peg] +[require json::write] diff --git a/tcllib/modules/pt/include/export/format/null.inc b/tcllib/modules/pt/include/export/format/null.inc new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tcllib/modules/pt/include/export/format/null.inc diff --git a/tcllib/modules/pt/include/export/format/param.inc b/tcllib/modules/pt/include/export/format/param.inc new file mode 100644 index 0000000..d9ff38b --- /dev/null +++ b/tcllib/modules/pt/include/export/format/param.inc @@ -0,0 +1,2 @@ +[require pt::peg] +[require pt::pe] diff --git a/tcllib/modules/pt/include/export/format/peg.inc b/tcllib/modules/pt/include/export/format/peg.inc new file mode 100644 index 0000000..0b7a89f --- /dev/null +++ b/tcllib/modules/pt/include/export/format/peg.inc @@ -0,0 +1,3 @@ +[require pt::peg] +[require pt::pe] +[require text::write] diff --git a/tcllib/modules/pt/include/export/format/tclparam.inc b/tcllib/modules/pt/include/export/format/tclparam.inc new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tcllib/modules/pt/include/export/format/tclparam.inc diff --git a/tcllib/modules/pt/include/export/plugin.inc b/tcllib/modules/pt/include/export/plugin.inc new file mode 100644 index 0000000..a7676d7 --- /dev/null +++ b/tcllib/modules/pt/include/export/plugin.inc @@ -0,0 +1,71 @@ +[comment {-*- tcl -*- --- !doctools ---}] +[manpage_begin pt::peg::export::[vset PACKAGE] n [vset VERSION]] +[include ../module.inc] +[include ../keywords_export.inc] +[titledesc "PEG Export Plugin. Write [vset NAME] format"] +[require pt::peg::export::[vset PACKAGE] [opt [vset VERSION]]] +[require pt::peg::to::[vset PACKAGE]] +[description] +[include ../ref_intro.inc] + +This package implements the parsing expression grammar export plugin +for the generation of [vset NAME] markup. + +[para] + +It resides in the Export section of the Core Layer of Parser Tools and +is intended to be used by [package pt::peg::export], the export +manager, sitting between it and the corresponding core conversion +functionality provided by [package pt::peg::to::[vset PACKAGE]]. + +[para][image arch_core_eplugins][para] + +[para] + +While the direct use of this package with a regular interpreter is +possible, this is strongly disrecommended and requires a number of +contortions to provide the expected environment. + +The proper way to use this functionality depends on the situation: + +[list_begin enumerated] +[enum] + +In an untrusted environment the proper access is through the package +[package pt::peg::export] and the export manager objects it +provides. + +[enum] + +In a trusted environment however simply use the package +[package pt::peg::to::[vset PACKAGE]] and access the core +conversion functionality directly. + +[list_end] + + +[section API] + +The API provided by this package satisfies the specification of the +Plugin API found in the [manpage {Parser Tools Export API}] +specification. + +[list_begin definitions] + +[call [cmd export] [arg serial] [arg configuration]] + +This command takes the canonical serialization of a parsing expression +grammar, as specified in section [sectref {PEG serialization format}], +and contained in [arg serial], the [arg configuration], a dictionary, +and generates [vset NAME] markup encoding the grammar. + +The created string is then returned as the result of the command. + +[list_end] + +[include config/[vset CONFIG].inc] +[include ../format/[vset PACKAGE].inc] +[include ../serial/pegrammar.inc] +[include ../serial/pexpression.inc] +[include ../feedback.inc] +[manpage_end] diff --git a/tcllib/modules/pt/include/export/to.inc b/tcllib/modules/pt/include/export/to.inc new file mode 100644 index 0000000..bdc4d54 --- /dev/null +++ b/tcllib/modules/pt/include/export/to.inc @@ -0,0 +1,75 @@ +[comment {-*- tcl -*- --- !doctools ---}] +[manpage_begin pt::peg::to::[vset PACKAGE] n [vset VERSION]] +[include ../module.inc] +[include ../keywords_convert.inc] +[titledesc "PEG Conversion. Write [vset NAME] format"] +[require pt::peg::to::[vset PACKAGE] [opt [vset VERSION]]] +[include format/[vset REQUIRE].inc] +[description] +[include ../ref_intro.inc] + +This package implements the converter from parsing expression grammars +to [vset NAME] markup. + +[para] + +It resides in the Export section of the Core Layer of Parser Tools, +and can be used either directly with the other packages of this layer, +or indirectly through the export manager provided by +[package pt::peg::export]. The latter is intented for use in untrusted +environments and done through the corresponding export plugin +[package pt::peg::export::[vset PACKAGE]] sitting between converter +and export manager. + +[para][image arch_core_eplugins][para] + + +[section API] + +The API provided by this package satisfies the specification of the +Converter API found in the [manpage {Parser Tools Export API}] +specification. + + +[list_begin definitions] + +[call [cmd pt::peg::to::[vset PACKAGE]] [method reset]] + +This command resets the configuration of the package to its default +settings. + +[call [cmd pt::peg::to::[vset PACKAGE]] [method configure]] + +This command returns a dictionary containing the current configuration +of the package. + +[call [cmd pt::peg::to::[vset PACKAGE]] [method configure] [arg option]] + +This command returns the current value of the specified configuration +[arg option] of the package. For the set of legal options, please read +the section [sectref Options]. + +[call [cmd pt::peg::to::[vset PACKAGE]] [method configure] [arg option] [arg value]...] + +This command sets the given configuration [arg option]s of the +package, to the specified [arg value]s. For the set of legal options, +please read the section [sectref Options]. + +[call [cmd pt::peg::to::[vset PACKAGE]] [method convert] [arg serial]] + +This command takes the canonical serialization of a parsing expression +grammar, as specified in section [sectref {PEG serialization format}], +and contained in [arg serial], and generates [vset NAME] markup +encoding the grammar, per the current package configuration. + +The created string is then returned as the result of the command. + +[list_end] + +[vset TPREFIX {}] +[include config/to_[vset CONFIG].inc] +[include ../format/[vset PACKAGE].inc] +[include ../serial/pegrammar.inc] +[include ../serial/pexpression.inc] +[include ../feedback.inc] +[manpage_end] diff --git a/tcllib/modules/pt/include/feedback.inc b/tcllib/modules/pt/include/feedback.inc new file mode 100644 index 0000000..8fc01a0 --- /dev/null +++ b/tcllib/modules/pt/include/feedback.inc @@ -0,0 +1,3 @@ +[comment {--- Standard trailer for all manpages in this module --}] +[vset CATEGORY pt] +[include ../../doctools2base/include/feedback.inc] diff --git a/tcllib/modules/pt/include/format/container.inc b/tcllib/modules/pt/include/format/container.inc new file mode 100644 index 0000000..7cb464d --- /dev/null +++ b/tcllib/modules/pt/include/format/container.inc @@ -0,0 +1,21 @@ + +[section {Grammar Container}] + +[include whatis_container.inc] +[para] + +It has no direct formal specification beyond what was said above. + +[subsection Example] + +Assuming the following PEG for simple mathematical expressions + +[para] +[include ../example/expr_peg.inc] +[para] + +one possible CONTAINER serialization for it is + +[para] +[include ../example/expr_container.inc] +[para] diff --git a/tcllib/modules/pt/include/format/cparam.inc b/tcllib/modules/pt/include/format/cparam.inc new file mode 100644 index 0000000..703bd81 --- /dev/null +++ b/tcllib/modules/pt/include/format/cparam.inc @@ -0,0 +1,38 @@ + +[section {C/PARAM code representation of parsing expression grammars}] + +[include whatis_cparam_rawc.inc] +[para] + +The bulk of such a framework has to be specified through the option +[option -template]. The additional options + +[list_begin options] +[opt_def -fun-qualifier string] +[opt_def -main string] +[opt_def -namespace string] +[opt_def -prelude string] +[opt_def -self-command string] +[opt_def -state-decl string] +[opt_def -state-ref string] +[opt_def -string-varname string] +[list_end] + +provide code snippets which help to glue framework and generated code +together. Their placeholders are in the [emph generated] code. + +Further the options + +[list_begin options] +[opt_def -indent integer] +[opt_def -comments boolean] +[list_end] + +allow for the customization of the code indent (default none), and +whether to generate comments showing the parsing expressions a +function is for (default on). + +[subsection Example] + +We are forgoing an example of this representation, with apologies. +It would be way to large for this document. diff --git a/tcllib/modules/pt/include/format/json.inc b/tcllib/modules/pt/include/format/json.inc new file mode 100644 index 0000000..468d889 --- /dev/null +++ b/tcllib/modules/pt/include/format/json.inc @@ -0,0 +1,3 @@ + +[section {JSON Grammar Exchange Format}] +[include json_core.inc] diff --git a/tcllib/modules/pt/include/format/json_core.inc b/tcllib/modules/pt/include/format/json_core.inc new file mode 100644 index 0000000..b19c39b --- /dev/null +++ b/tcllib/modules/pt/include/format/json_core.inc @@ -0,0 +1,103 @@ + +[include whatis_json.inc] +[para] + +It is formally specified by the rules below: + +[list_begin enumerated][comment {-- json points --}] +[enum] +The JSON of any PEG is a JSON object. + +[enum] +This object holds a single key, [const pt::grammar::peg], and its +value. This value holds the contents of the grammar. + +[enum] +The contents of the grammar are a JSON object holding the set of +nonterminal symbols and the starting expression. The relevant keys and +their values are + +[list_begin definitions][comment {-- grammar keywords --}] +[def [const rules]] + +The value is a JSON object whose keys are the names of the nonterminal +symbols known to the grammar. + +[list_begin enumerated][comment {-- nonterminals --}] +[enum] +Each nonterminal symbol may occur only once. + +[enum] +The empty string is not a legal nonterminal symbol. + +[enum] +The value for each symbol is a JSON object itself. The relevant +keys and their values in this dictionary are + +[list_begin definitions][comment {-- nonterminal keywords --}] +[def [const is]] + +The value is a JSON string holding the Tcl serialization of the +parsing expression describing the symbols sentennial structure, as +specified in the section [sectref {PE serialization format}]. + +[comment { + This part we could try to expand further into a json data structure +(array of (objects of) arrays ?) +}] + +[def [const mode]] + +The value is a JSON holding holding one of three values specifying how +a parser should handle the semantic value produced by the symbol. + +[include ../modes.inc] +[list_end][comment {-- nonterminal keywords --}] +[list_end][comment {-- nonterminals --}] + +[def [const start]] + +The value is a JSON string holding the Tcl serialization of the start +parsing expression of the grammar, as specified in the section +[sectref {PE serialization format}]. + +[list_end][comment {-- grammar keywords --}] + +[enum] +The terminal symbols of the grammar are specified implicitly as the +set of all terminal symbols used in the start expression and on the +RHS of the grammar rules. + +[list_end][comment {-- json points --}] + +[para] + +As an aside to the advanced reader, this is pretty much the same as +the Tcl serialization of PE grammars, as specified in section +[sectref {PEG serialization format}], except that the Tcl dictionaries +and lists of that format are mapped to JSON objects and arrays. Only +the parsing expressions themselves are not translated further, but +kept as JSON strings containing a nested Tcl list, and there is no +concept of canonicity for the JSON either. + +[subsection Example] + +Assuming the following PEG for simple mathematical expressions + +[para] +[include ../example/expr_peg.inc] +[para] + +a JSON serialization for it is + +[para] +[include ../example/expr_json.inc] +[para] + +and a Tcl serialization of the same is + +[para] +[include ../example/expr_serial.inc] +[para] + +The similarity of the latter to the JSON should be quite obvious. diff --git a/tcllib/modules/pt/include/format/options_container.inc b/tcllib/modules/pt/include/format/options_container.inc new file mode 100644 index 0000000..4ebff51 --- /dev/null +++ b/tcllib/modules/pt/include/format/options_container.inc @@ -0,0 +1,60 @@ +[list_begin options] +[include options_std.inc] + +[opt_def -mode [const bulk]|[const incremental]] + +The value of this option controls which methods of + +[package pt::peg::container] instances are used to specify the +grammar, i.e. preload it into the container. There are two legal +values, as listed below. The default is [const bulk]. + +[list_begin definitions] +[def [const bulk]] + +In this mode the methods [method start], [method add], [method modes], +and [method rules] are used to specify the grammar in a bulk manner, +i.e. as a set of nonterminal symbols, and two dictionaries mapping +from the symbols to their semantic modes and parsing expressions. + +[para] + +This mode is the default. + +[def [const incremental]] + +In this mode the methods [method start], [method add], [method mode], +and [method rule] are used to specify the grammar piecemal, with each +nonterminal having its own block of defining commands. + +[list_end] + +[opt_def -template string] + +The value of this option is a string into which to put the generated +code and the other configuration settings. The various locations for +user-data are expected to be specified with the placeholders listed +below. The default value is "[const @code@]". + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the option [option -user]. + +[def [const @format@]] +To be replaced with the the constant [const CONTAINER]. + +[def [const @file@]] +To be replaced with the value of the option [option -file]. + +[def [const @name@]] +To be replaced with the value of the option [option -name]. + +[def [const @mode@]] +To be replaced with the value of the option [option -mode]. + +[def [const @code@]] +To be replaced with the generated code. + +[list_end] +[list_end] diff --git a/tcllib/modules/pt/include/format/options_cparam_critcl.inc b/tcllib/modules/pt/include/format/options_cparam_critcl.inc new file mode 100644 index 0000000..30b5d66 --- /dev/null +++ b/tcllib/modules/pt/include/format/options_cparam_critcl.inc @@ -0,0 +1,34 @@ + +[list_begin options] +[include options_std.inc] + +[comment {= = == === ===== ======== =============}] +[opt_def -class string] + +The value of this option is the name of the class to generate, without +leading colons. + +The default value is [const CLASS]. + +[para] + +For a simple value [var X] without colons, like CLASS, the parser +command will be [var X]::[var X]. Whereas for a namespaced value +[var X::Y] the parser command will be [var X::Y]. + +[comment {= = == === ===== ======== =============}] +[opt_def -package string] + +The value of this option is the name of the package to generate. + +The default value is [const PACKAGE]. + +[comment {= = == === ===== ======== =============}] +[opt_def -version string] + +The value of this option is the version of the package to generate. + +The default value is [const 1]. + +[comment {= = == === ===== ======== =============}] +[list_end] diff --git a/tcllib/modules/pt/include/format/options_cparam_rawc.inc b/tcllib/modules/pt/include/format/options_cparam_rawc.inc new file mode 100644 index 0000000..01021f9 --- /dev/null +++ b/tcllib/modules/pt/include/format/options_cparam_rawc.inc @@ -0,0 +1,142 @@ + +[list_begin options] +[include options_std.inc] + +[opt_def -template string] + +The value of this option is a string into which to put +the generated text and the other configuration settings. The various +locations for user-data are expected to be specified with the +placeholders listed below. The default value is "[const @code@]". + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the option [option -user]. + +[def [const @format@]] +To be replaced with the the constant [const C/PARAM]. + +[def [const @file@]] +To be replaced with the value of the option [option -file]. + +[def [const @name@]] +To be replaced with the value of the option [option -name]. + +[def [const @code@]] +To be replaced with the generated Tcl code. + +[list_end] + +The following options are special, in that they will +occur within the generated code, and are replaced there as well. + +[list_begin definitions] + +[def [const @statedecl@]] +To be replaced with the value of the option [option state-decl]. + +[def [const @stateref@]] +To be replaced with the value of the option [option state-ref]. + +[def [const @strings@]] +To be replaced with the value of the option [option string-varname]. + +[def [const @self@]] +To be replaced with the value of the option [option self-command]. + +[def [const @def@]] +To be replaced with the value of the option [option fun-qualifier]. + +[def [const @ns@]] +To be replaced with the value of the option [option namespace]. + +[def [const @main@]] +To be replaced with the value of the option [option main]. + +[def [const @prelude@]] +To be replaced with the value of the option [option prelude]. + +[list_end] + +[opt_def -state-decl string] + +A C string representing the argument declaration to use in the +generated parsing functions to refer to the parsing state. In essence +type and argument name. + +The default value is the string [const {RDE_PARAM p}]. + + +[opt_def -state-ref string] + +A C string representing the argument named used in the generated +parsing functions to refer to the parsing state. + +The default value is the string [const p]. + + +[opt_def -self-command string] + +A C string representing the reference needed to call the generated +parser function (methods ...) from another parser fonction, per the +chosen framework (template). + +The default value is the empty string. + + +[opt_def -fun-qualifier string] + +A C string containing the attributes to give to the generated +functions (methods ...), per the chosen framework (template). + +The default value is [const static]. + + +[opt_def -namespace string] + +The name of the C namespace the parser functions (methods, ...) shall +reside in, or a general prefix to add to the function names. + +The default value is the empty string. + + +[opt_def -main string] + +The name of the main function (method, ...) to be called by the chosen +framework (template) to start parsing input. + +The default value is [const __main]. + + +[opt_def -string-varname string] + +The name of the variable used for the table of strings used by the +generated parser, i.e. error messages, symbol names, etc. + +The default value is [const p_string]. + + +[opt_def -prelude string] + +A snippet of code to be inserted at the head of each generated parsing +function. + +The default value is the empty string. + + +[opt_def -indent integer] + +The number of characters to indent each line of the generated code by. + +The default value is [const 0]. + + +[opt_def -comments boolean] + +A flag controlling the generation of code comments containing the +original parsing expression a parsing function is for. + +The default value is [const on]. + +[list_end] diff --git a/tcllib/modules/pt/include/format/options_json.inc b/tcllib/modules/pt/include/format/options_json.inc new file mode 100644 index 0000000..a6b472c --- /dev/null +++ b/tcllib/modules/pt/include/format/options_json.inc @@ -0,0 +1,31 @@ + +[list_begin options] +[include options_std.inc] + +[opt_def -indented boolean] + +If this option is set the system will break the generated JSON across +lines and indent it according to its inner structure, with each key of +a dictionary on a separate line. + +[para] + +If the option is not set (the default), the whole JSON object will be +written on a single line, with minimum spacing between all elements. + + +[opt_def -aligned boolean] + +If this option is set the system will ensure that the values for the +keys in a dictionary are vertically aligned with each other, for a +nice table effect. + +To make this work this also implies that [option -indented] is set. + +[para] + +If the option is not set (the default), the output is formatted as per +the value of [var indented], without trying to align the values for +dictionary keys. + +[list_end] diff --git a/tcllib/modules/pt/include/format/options_peg.inc b/tcllib/modules/pt/include/format/options_peg.inc new file mode 100644 index 0000000..4b85984 --- /dev/null +++ b/tcllib/modules/pt/include/format/options_peg.inc @@ -0,0 +1,30 @@ + +[list_begin options] +[include options_std.inc] + +[opt_def -template string] + +The value of this option is a string into which to put the generated +text and the values of the other options. The various locations for +user-data are expected to be specified with the placeholders listed +below. The default value is "[const @code@]". + +[list_begin definitions] + +[def [const @user@]] +To be replaced with the value of the option [option -user]. + +[def [const @format@]] +To be replaced with the the constant [const PEG]. + +[def [const @file@]] +To be replaced with the value of the option [option -file]. + +[def [const @name@]] +To be replaced with the value of the option [option -name]. + +[def [const @code@]] +To be replaced with the generated text. + +[list_end] +[list_end] diff --git a/tcllib/modules/pt/include/format/options_std.inc b/tcllib/modules/pt/include/format/options_std.inc new file mode 100644 index 0000000..3b156ee --- /dev/null +++ b/tcllib/modules/pt/include/format/options_std.inc @@ -0,0 +1,16 @@ + +[opt_def -file string] + +The value of this option is the name of the file or other entity from +which the grammar came, for which the command is run. The default +value is [const unknown]. + +[opt_def -name string] + +The value of this option is the name of the grammar we are processing. +The default value is [const a_pe_grammar]. + +[opt_def -user string] + +The value of this option is the name of the user for which the command +is run. The default value is [const unknown]. diff --git a/tcllib/modules/pt/include/format/options_tclparam_oo.inc b/tcllib/modules/pt/include/format/options_tclparam_oo.inc new file mode 100644 index 0000000..d57d717 --- /dev/null +++ b/tcllib/modules/pt/include/format/options_tclparam_oo.inc @@ -0,0 +1,32 @@ + +[list_begin options] +[include options_std.inc] + +[comment {================================================================================}] +[opt_def -class string] + +The value of this option is the name of the class to generate, without +leading colons. Note, it serves double-duty as the name of the package +to generate too, if option [option -package] is not specified, see below. + +The default value is [const CLASS], applying if neither option +[option -class] nor [option -package] were specified. + +[comment {================================================================================}] +[opt_def -package string] + +The value of this option is the name of the package to generate, without +leading colons. Note, it serves double-duty as the name of the class +to generate too, if option [option -class] is not specified, see above. + +The default value is [const PACKAGE], applying if neither option +[option -package] nor [option -class] were specified. + +[comment {================================================================================}] +[opt_def -version string] + +The value of this option is the version of the package to generate. + +The default value is [const 1]. + +[list_end] diff --git a/tcllib/modules/pt/include/format/options_tclparam_snit.inc b/tcllib/modules/pt/include/format/options_tclparam_snit.inc new file mode 100644 index 0000000..d57d717 --- /dev/null +++ b/tcllib/modules/pt/include/format/options_tclparam_snit.inc @@ -0,0 +1,32 @@ + +[list_begin options] +[include options_std.inc] + +[comment {================================================================================}] +[opt_def -class string] + +The value of this option is the name of the class to generate, without +leading colons. Note, it serves double-duty as the name of the package +to generate too, if option [option -package] is not specified, see below. + +The default value is [const CLASS], applying if neither option +[option -class] nor [option -package] were specified. + +[comment {================================================================================}] +[opt_def -package string] + +The value of this option is the name of the package to generate, without +leading colons. Note, it serves double-duty as the name of the class +to generate too, if option [option -class] is not specified, see above. + +The default value is [const PACKAGE], applying if neither option +[option -package] nor [option -class] were specified. + +[comment {================================================================================}] +[opt_def -version string] + +The value of this option is the version of the package to generate. + +The default value is [const 1]. + +[list_end] diff --git a/tcllib/modules/pt/include/format/param.inc b/tcllib/modules/pt/include/format/param.inc new file mode 100644 index 0000000..b99139b --- /dev/null +++ b/tcllib/modules/pt/include/format/param.inc @@ -0,0 +1,21 @@ + +[section {PARAM code representation of parsing expression grammars}] + +[include whatis_param.inc] +[para] + +It has no direct formal specification beyond what was said above. + +[subsection Example] + +Assuming the following PEG for simple mathematical expressions + +[para] +[include ../example/expr_peg.inc] +[para] + +one possible PARAM serialization for it is + +[para] +[include ../example/expr_param.inc] +[para] diff --git a/tcllib/modules/pt/include/format/peg.inc b/tcllib/modules/pt/include/format/peg.inc new file mode 100644 index 0000000..c68fdb1 --- /dev/null +++ b/tcllib/modules/pt/include/format/peg.inc @@ -0,0 +1,119 @@ + +[section {PEG Specification Language}] + +[include whatis_peg.inc] +[para] + +It is formally specified by the grammar shown below, written in +itself. For a tutorial / introduction to the language please go and +read the [manpage {PEG Language Tutorial}]. + +[para] +[example { +PEG pe-grammar-for-peg (Grammar) + + # -------------------------------------------------------------------- + # Syntactical constructs + + Grammar <- WHITESPACE Header Definition* Final EOF ; + + Header <- PEG Identifier StartExpr ; + Definition <- Attribute? Identifier IS Expression SEMICOLON ; + Attribute <- (VOID / LEAF) COLON ; + Expression <- Sequence (SLASH Sequence)* ; + Sequence <- Prefix+ ; + Prefix <- (AND / NOT)? Suffix ; + Suffix <- Primary (QUESTION / STAR / PLUS)? ; + Primary <- ALNUM / ALPHA / ASCII / CONTROL / DDIGIT / DIGIT + / GRAPH / LOWER / PRINTABLE / PUNCT / SPACE / UPPER + / WORDCHAR / XDIGIT + / Identifier + / OPEN Expression CLOSE + / Literal + / Class + / DOT + ; + Literal <- APOSTROPH (!APOSTROPH Char)* APOSTROPH WHITESPACE + / DAPOSTROPH (!DAPOSTROPH Char)* DAPOSTROPH WHITESPACE ; + Class <- OPENB (!CLOSEB Range)* CLOSEB WHITESPACE ; + Range <- Char TO Char / Char ; + + StartExpr <- OPEN Expression CLOSE ; +void: Final <- "END" WHITESPACE SEMICOLON WHITESPACE ; + + # -------------------------------------------------------------------- + # Lexing constructs + + Identifier <- Ident WHITESPACE ; +leaf: Ident <- ([_:] / <alpha>) ([_:] / <alnum>)* ; + Char <- CharSpecial / CharOctalFull / CharOctalPart + / CharUnicode / CharUnescaped + ; + +leaf: CharSpecial <- "\\" [nrt'"\[\]\\] ; +leaf: CharOctalFull <- "\\" [0-2][0-7][0-7] ; +leaf: CharOctalPart <- "\\" [0-7][0-7]? ; +leaf: CharUnicode <- "\\" 'u' HexDigit (HexDigit (HexDigit HexDigit?)?)? ; +leaf: CharUnescaped <- !"\\" . ; + +void: HexDigit <- [0-9a-fA-F] ; + +void: TO <- '-' ; +void: OPENB <- "[" ; +void: CLOSEB <- "]" ; +void: APOSTROPH <- "'" ; +void: DAPOSTROPH <- '"' ; +void: PEG <- "PEG" !([_:] / <alnum>) WHITESPACE ; +void: IS <- "<-" WHITESPACE ; +leaf: VOID <- "void" WHITESPACE ; # Implies that definition has no semantic value. +leaf: LEAF <- "leaf" WHITESPACE ; # Implies that definition has no terminals. +void: SEMICOLON <- ";" WHITESPACE ; +void: COLON <- ":" WHITESPACE ; +void: SLASH <- "/" WHITESPACE ; +leaf: AND <- "&" WHITESPACE ; +leaf: NOT <- "!" WHITESPACE ; +leaf: QUESTION <- "?" WHITESPACE ; +leaf: STAR <- "*" WHITESPACE ; +leaf: PLUS <- "+" WHITESPACE ; +void: OPEN <- "(" WHITESPACE ; +void: CLOSE <- ")" WHITESPACE ; +leaf: DOT <- "." WHITESPACE ; + +leaf: ALNUM <- "<alnum>" WHITESPACE ; +leaf: ALPHA <- "<alpha>" WHITESPACE ; +leaf: ASCII <- "<ascii>" WHITESPACE ; +leaf: CONTROL <- "<control>" WHITESPACE ; +leaf: DDIGIT <- "<ddigit>" WHITESPACE ; +leaf: DIGIT <- "<digit>" WHITESPACE ; +leaf: GRAPH <- "<graph>" WHITESPACE ; +leaf: LOWER <- "<lower>" WHITESPACE ; +leaf: PRINTABLE <- "<print>" WHITESPACE ; +leaf: PUNCT <- "<punct>" WHITESPACE ; +leaf: SPACE <- "<space>" WHITESPACE ; +leaf: UPPER <- "<upper>" WHITESPACE ; +leaf: WORDCHAR <- "<wordchar>" WHITESPACE ; +leaf: XDIGIT <- "<xdigit>" WHITESPACE ; + +void: WHITESPACE <- (" " / "\t" / EOL / COMMENT)* ; +void: COMMENT <- '#' (!EOL .)* EOL ; +void: EOL <- "\n\r" / "\n" / "\r" ; +void: EOF <- !. ; + + # -------------------------------------------------------------------- +END; +}] + +[subsection Example] + +Our example specifies the grammar for a basic 4-operation calculator. + +[para] +[include ../example/expr_peg.inc] +[para] + +Using higher-level features of the notation, i.e. the character +classes (predefined and custom), this example can be rewritten as + +[para] +[include ../example/expr_peg_compact.inc] +[para] diff --git a/tcllib/modules/pt/include/format/tclparam.inc b/tcllib/modules/pt/include/format/tclparam.inc new file mode 100644 index 0000000..2183bd9 --- /dev/null +++ b/tcllib/modules/pt/include/format/tclparam.inc @@ -0,0 +1,30 @@ + +[section {Tcl/PARAM code representation of parsing expression grammars}] + +The Tcl/PARAM representation of parsing expression grammars is Tcl +code whose execution will parse input per the grammar. The code is +based on the virtual machine documented in the +[manpage {PackRat Machine Specification}], using its instructions +and a few more to handle control flow. + +[para] + +Note that the generated code by itself is not functional. It expects +to be embedded into a framework which provides services like the PARAM +state, implementations for the PARAM instructions, etc. + +The bulk of such a framework has to be specified through the option +[option -template]. The additional options + +[list_begin options] +[opt_def -indent integer] +[opt_def -main string] +[opt_def -namespace string] +[opt_def -prelude string] +[opt_def -proc-command string] +[opt_def -runtime-command string] +[opt_def -self-command string] +[list_end] + +provide code snippets which help to glue framework and generated code +together. Their placeholders are in the [emph generated] code. diff --git a/tcllib/modules/pt/include/format/whatis_container.inc b/tcllib/modules/pt/include/format/whatis_container.inc new file mode 100644 index 0000000..6ed7034 --- /dev/null +++ b/tcllib/modules/pt/include/format/whatis_container.inc @@ -0,0 +1,13 @@ + +The [const container] format is another form of describing parsing +expression grammars. While data in this format is executable it does +not constitute a parser for the grammar. It always has to be used in +conjunction with the package [package pt::peg::interp], a grammar +interpreter. + +[para] + +The format represents grammars by a [cmd snit::type], i.e. class, +whose instances are API-compatible to the instances of the +[package pt::peg::container] package, and which are preloaded with the +grammar in question. diff --git a/tcllib/modules/pt/include/format/whatis_cparam_critcl.inc b/tcllib/modules/pt/include/format/whatis_cparam_critcl.inc new file mode 100644 index 0000000..6e669b9 --- /dev/null +++ b/tcllib/modules/pt/include/format/whatis_cparam_critcl.inc @@ -0,0 +1,4 @@ + +The [const critcl] format is executable code, a parser for the +grammar. It is a Tcl package with the actual parser implementation +written in C and embedded in Tcl via the [package critcl] package. diff --git a/tcllib/modules/pt/include/format/whatis_cparam_rawc.inc b/tcllib/modules/pt/include/format/whatis_cparam_rawc.inc new file mode 100644 index 0000000..19f532f --- /dev/null +++ b/tcllib/modules/pt/include/format/whatis_cparam_rawc.inc @@ -0,0 +1,9 @@ + +The [const c] format is executable code, a parser for the grammar. The +parser implementation is written in C and can be tweaked to the users' +needs through a multitude of options. + +[para] + +The [cmd critcl] format, for example, is implemented as a canned +configuration of these options on top of the generator for [const c]. diff --git a/tcllib/modules/pt/include/format/whatis_json.inc b/tcllib/modules/pt/include/format/whatis_json.inc new file mode 100644 index 0000000..09dbca2 --- /dev/null +++ b/tcllib/modules/pt/include/format/whatis_json.inc @@ -0,0 +1,5 @@ + +The [const json] format for parsing expression grammars was written as +a data exchange format not bound to Tcl. It was defined to allow the +exchange of grammars with PackRat/PEG based parser generators for +other languages. diff --git a/tcllib/modules/pt/include/format/whatis_param.inc b/tcllib/modules/pt/include/format/whatis_param.inc new file mode 100644 index 0000000..46e75da --- /dev/null +++ b/tcllib/modules/pt/include/format/whatis_param.inc @@ -0,0 +1,12 @@ + +The PARAM code representation of parsing expression grammars is +assembler-like text using the instructions of the virtual machine +documented in the [manpage {PackRat Machine Specification}], plus a +few more for control flow (jump ok, jump fail, call symbol, return). + +[para] + +It is not really useful, except possibly as a tool demonstrating how a +grammar is compiled in general, without getting distracted by the +incidentials of a framework, i.e. like the supporting C and Tcl code +generated by the other PARAM-derived formats. diff --git a/tcllib/modules/pt/include/format/whatis_peg.inc b/tcllib/modules/pt/include/format/whatis_peg.inc new file mode 100644 index 0000000..ae49cc3 --- /dev/null +++ b/tcllib/modules/pt/include/format/whatis_peg.inc @@ -0,0 +1,7 @@ + +[const peg], a language for the specification of parsing expression +grammars is meant to be human readable, and writable as well, yet +strict enough to allow its processing by machine. Like any computer +language. It was defined to make writing the specification of a +grammar easy, something the other formats found in the Parser Tools do +not lend themselves too. diff --git a/tcllib/modules/pt/include/format/whatis_tclparam_oo.inc b/tcllib/modules/pt/include/format/whatis_tclparam_oo.inc new file mode 100644 index 0000000..da29b1e --- /dev/null +++ b/tcllib/modules/pt/include/format/whatis_tclparam_oo.inc @@ -0,0 +1,4 @@ + +The [const oo] format is executable code, a parser for the grammar. It +is a Tcl package holding a [package TclOO] class, whose instances are +parsers for the input grammar. diff --git a/tcllib/modules/pt/include/format/whatis_tclparam_snit.inc b/tcllib/modules/pt/include/format/whatis_tclparam_snit.inc new file mode 100644 index 0000000..622240e --- /dev/null +++ b/tcllib/modules/pt/include/format/whatis_tclparam_snit.inc @@ -0,0 +1,4 @@ + +The [const snit] format is executable code, a parser for the +grammar. It is a Tcl package holding a [cmd snit::type], i.e. a class, +whose instances are parsers for the input grammar. diff --git a/tcllib/modules/pt/include/gen.inc b/tcllib/modules/pt/include/gen.inc new file mode 100644 index 0000000..d2f8743 --- /dev/null +++ b/tcllib/modules/pt/include/gen.inc @@ -0,0 +1,6 @@ +[def [const [vset NAME]]] +[include format/whatis_[vset WHATIS].inc] +[para] + +The set of options supported by the generator for this format is +listed and explained in section [sectref [vset SECT]]. diff --git a/tcllib/modules/pt/include/gen_options.dia b/tcllib/modules/pt/include/gen_options.dia new file mode 100644 index 0000000..4d75d22 --- /dev/null +++ b/tcllib/modules/pt/include/gen_options.dia @@ -0,0 +1,7 @@ +# -*- tcl -*- tcl.tk//DSL diagram//EN//1.0 +set boxwidth [4 cm] +line ;group { + arc cw ; arc ; arrow ; box "interpreted (Tcl)" ; arrow ; box container +} ; arc ; line ; arc cw ; arrow ; box "specialized" ; group { + arc ; arc cw ; arrow ; box "C" ; arrow ; box "critcl / C" +} ; arc cw ; arc ; arrow ; box "Tcl" ; arrow ; box "snit / oo" diff --git a/tcllib/modules/pt/include/gen_options.inc b/tcllib/modules/pt/include/gen_options.inc new file mode 100644 index 0000000..9559fa7 --- /dev/null +++ b/tcllib/modules/pt/include/gen_options.inc @@ -0,0 +1 @@ +[para][image gen_options][para] diff --git a/tcllib/modules/pt/include/gen_options.pic b/tcllib/modules/pt/include/gen_options.pic new file mode 100644 index 0000000..78f723e --- /dev/null +++ b/tcllib/modules/pt/include/gen_options.pic @@ -0,0 +1,9 @@ +.nf + + --- C ---> critcl, c + | + + --- specialized -+ + | | + ---+ + --- Tcl -> snit, oo + | + + --- interpreted (Tcl) ------> container +.fi diff --git a/tcllib/modules/pt/include/gen_options.png b/tcllib/modules/pt/include/gen_options.png Binary files differnew file mode 100644 index 0000000..9d64a55 --- /dev/null +++ b/tcllib/modules/pt/include/gen_options.png diff --git a/tcllib/modules/pt/include/gen_options.txt b/tcllib/modules/pt/include/gen_options.txt new file mode 100644 index 0000000..a8d02e5 --- /dev/null +++ b/tcllib/modules/pt/include/gen_options.txt @@ -0,0 +1,7 @@ + + --- C ---> critcl, c + | + + --- specialized -+ + | | + ---+ + --- Tcl -> snit, oo + | + + --- interpreted (Tcl) ------> container diff --git a/tcllib/modules/pt/include/gen_verticals.inc b/tcllib/modules/pt/include/gen_verticals.inc new file mode 100644 index 0000000..90cada1 --- /dev/null +++ b/tcllib/modules/pt/include/gen_verticals.inc @@ -0,0 +1,22 @@ +[example { + USER + | + Common API + | + +---------------------------+---------------------------+ + | | | + + Interpreted Specialized Tcl Specialized C + +------------------------+ +------------------------+ +------------------------+ + | [container] + | | [snit or TclOO] | | C code alone or embed- | + | pt::peg::interp | | | | ded in Tcl (Critcl) | + +========================+ +========================+ | | + | pt::rde | | pt::rde | | | + | | | | | | + | Tcl | Critcl | | Tcl | Critcl | | | + +===============+ | +===============+ | | | + | struct::stack | | | struct::stack | | | | + | | | | | | | | + | Tcl | Critcl | | | Tcl | Critcl | | | | + +------+--------+--------+ +------+--------+--------+ +------------------------+ +}] diff --git a/tcllib/modules/pt/include/import/format/json.inc b/tcllib/modules/pt/include/import/format/json.inc new file mode 100644 index 0000000..193b279 --- /dev/null +++ b/tcllib/modules/pt/include/import/format/json.inc @@ -0,0 +1,2 @@ +[require pt::peg] +[require json] diff --git a/tcllib/modules/pt/include/import/format/peg.inc b/tcllib/modules/pt/include/import/format/peg.inc new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tcllib/modules/pt/include/import/format/peg.inc diff --git a/tcllib/modules/pt/include/import/from.inc b/tcllib/modules/pt/include/import/from.inc new file mode 100644 index 0000000..9a0acc3 --- /dev/null +++ b/tcllib/modules/pt/include/import/from.inc @@ -0,0 +1,49 @@ +[comment {-*- tcl -*- --- !doctools ---}] +[manpage_begin pt::peg::from::[vset PACKAGE] n [vset VERSION]] +[include ../module.inc] +[include ../keywords_convert.inc] +[titledesc "PEG Conversion. Read [vset NAME] format"] +[require pt::peg::from::[vset PACKAGE] [opt [vset VERSION]]] +[include format/[vset REQUIRE].inc] +[description] +[include ../ref_intro.inc] + +This package implements the converter from [vset NAME] markup to +parsing expression grammars. + +[para] + +It resides in the Import section of the Core Layer of Parser Tools, +and can be used either directly with the other packages of this layer, +or indirectly through the import manager provided by +[package pt::peg::import]. The latter is intented for use in untrusted +environments and done through the corresponding import plugin +[package pt::peg::import::[vset PACKAGE]] sitting between converter +and import manager. + +[para][image arch_core_iplugins][para] + +[section API] + +The API provided by this package satisfies the specification of the +Converter API found in the [manpage {Parser Tools Import API}] +specification. + +[list_begin definitions] + +[call [cmd pt::peg::from::[vset PACKAGE]] [method convert] [arg text]] + +This command takes the [vset NAME] markup encoding a parsing +expression grammar and contained in [arg text], and generates the +canonical serialization of said grammar, as specified in section +[sectref {PEG serialization format}]. + +The created value is then returned as the result of the command. + +[list_end] + +[include ../format/[vset PACKAGE].inc] +[include ../serial/pegrammar.inc] +[include ../serial/pexpression.inc] +[include ../feedback.inc] +[manpage_end] diff --git a/tcllib/modules/pt/include/import/plugin.inc b/tcllib/modules/pt/include/import/plugin.inc new file mode 100644 index 0000000..a9aec5f --- /dev/null +++ b/tcllib/modules/pt/include/import/plugin.inc @@ -0,0 +1,69 @@ +[comment {-*- tcl -*- --- !doctools ---}] +[manpage_begin pt::peg::import::[vset PACKAGE] n [vset VERSION]] +[include ../module.inc] +[include ../keywords_import.inc] +[titledesc "PEG Import Plugin. Read [vset NAME] format"] +[require pt::peg::import::[vset PACKAGE] [opt [vset VERSION]]] +[require pt::peg::to::[vset PACKAGE]] +[description] +[include ../ref_intro.inc] + +This package implements the parsing expression grammar import plugin +processing [vset NAME] markup. + +[para] + +It resides in the Import section of the Core Layer of Parser Tools and +is intended to be used by [package pt::peg::import], the import +manager, sitting between it and the corresponding core conversion +functionality provided by [package pt::peg::from::[vset PACKAGE]]. + +[para][image arch_core_iplugins][para] +[para] + +While the direct use of this package with a regular interpreter is +possible, this is strongly disrecommended and requires a number of +contortions to provide the expected environment. + +The proper way to use this functionality depends on the situation: + +[list_begin enumerated] +[enum] + +In an untrusted environment the proper access is through the package +[package pt::peg::import] and the import manager objects it +provides. + +[enum] + +In a trusted environment however simply use the package +[package pt::peg::from::[vset PACKAGE]] and access the core +conversion functionality directly. + +[list_end] + + +[section API] + +The API provided by this package satisfies the specification of the +Plugin API found in the [manpage {Parser Tools Import API}] +specification. + +[list_begin definitions] + +[call [cmd import] [arg text]] + +This command takes the [vset NAME] markup encoding a parsing +expression grammar and contained in [arg text], and generates the +canonical serialization of said grammar, as specified in section +[sectref {PEG serialization format}]. + +The created value is then returned as the result of the command. + +[list_end] + +[include ../format/[vset PACKAGE].inc] +[include ../serial/pegrammar.inc] +[include ../serial/pexpression.inc] +[include ../feedback.inc] +[manpage_end] diff --git a/tcllib/modules/pt/include/keywords.inc b/tcllib/modules/pt/include/keywords.inc new file mode 100644 index 0000000..aaad9db --- /dev/null +++ b/tcllib/modules/pt/include/keywords.inc @@ -0,0 +1,17 @@ +[comment {--- Keywords common to all packages in Parser Tools ---}] +[keywords {EBNF}] +[keywords {LL(k)}] +[keywords {PEG}] +[keywords {TDPL}] +[keywords {context-free languages}] +[keywords {expression}] +[keywords {grammar}] +[keywords {matching}] +[keywords {parser}] +[keywords {parsing expression grammar}] +[keywords {parsing expression}] +[keywords {push down automaton}] +[keywords {recursive descent}] +[keywords {state}] +[keywords {top-down parsing languages}] +[keywords {transducer}] diff --git a/tcllib/modules/pt/include/keywords_convert.inc b/tcllib/modules/pt/include/keywords_convert.inc new file mode 100644 index 0000000..acf9161 --- /dev/null +++ b/tcllib/modules/pt/include/keywords_convert.inc @@ -0,0 +1,5 @@ +[comment {--- Keywords shared among all the conversion packages ---}] +[keywords [vset NAME]] +[keywords {conversion}] +[keywords {format conversion}] +[keywords {serialization}] diff --git a/tcllib/modules/pt/include/keywords_export.inc b/tcllib/modules/pt/include/keywords_export.inc new file mode 100644 index 0000000..ad5ca06 --- /dev/null +++ b/tcllib/modules/pt/include/keywords_export.inc @@ -0,0 +1,5 @@ +[comment {--- Keywords shared among all the export plugins ---}] +[keywords [vset NAME]] +[keywords {export}] +[keywords {plugin}] +[keywords {serialization}] diff --git a/tcllib/modules/pt/include/keywords_import.inc b/tcllib/modules/pt/include/keywords_import.inc new file mode 100644 index 0000000..9a633bc --- /dev/null +++ b/tcllib/modules/pt/include/keywords_import.inc @@ -0,0 +1,5 @@ +[comment {--- Keywords shared among all the import plugins ---}] +[keywords [vset NAME]] +[keywords {import}] +[keywords {plugin}] +[keywords {serialization}] diff --git a/tcllib/modules/pt/include/modes.inc b/tcllib/modules/pt/include/modes.inc new file mode 100644 index 0000000..4fce8d0 --- /dev/null +++ b/tcllib/modules/pt/include/modes.inc @@ -0,0 +1,21 @@ +[list_begin definitions][comment {-- modes --}] + +[def [const value]] + +The semantic value of the nonterminal symbol is an abstract syntax +tree consisting of a single node node for the nonterminal itself, +which has the ASTs of the symbol's right hand side as its children. + +[def [const leaf]] + +The semantic value of the nonterminal symbol is an abstract syntax +tree consisting of a single node node for the nonterminal, without any +children. Any ASTs generated by the symbol's right hand side are +discarded. + +[def [const void]] + +The nonterminal has no semantic value. Any ASTs generated by the +symbol's right hand side are discarded (as well). + +[list_end][comment {-- modes --}] diff --git a/tcllib/modules/pt/include/module.inc b/tcllib/modules/pt/include/module.inc new file mode 100644 index 0000000..8a41ee7 --- /dev/null +++ b/tcllib/modules/pt/include/module.inc @@ -0,0 +1,6 @@ +[comment {--- Standard header for all manpages in this module --}] +[copyright {2009 Andreas Kupries <andreas_kupries@users.sourceforge.net>}] +[moddesc {Parser Tools}] +[include keywords.inc] +[category {Parsing and Grammars}] +[require Tcl 8.5] diff --git a/tcllib/modules/pt/include/param_1is.inc b/tcllib/modules/pt/include/param_1is.inc new file mode 100644 index 0000000..6b9d39b --- /dev/null +++ b/tcllib/modules/pt/include/param_1is.inc @@ -0,0 +1,3 @@ +[para] +This part of the machine's state is used and modified by the +instructions defined in the section [sectref [vset INS0]]. diff --git a/tcllib/modules/pt/include/param_2is.inc b/tcllib/modules/pt/include/param_2is.inc new file mode 100644 index 0000000..15362f0 --- /dev/null +++ b/tcllib/modules/pt/include/param_2is.inc @@ -0,0 +1,4 @@ +[para] +This part of the machine's state is used and modified by the +instructions defined in the sections [sectref [vset INS0]], and +[sectref [vset INS1]]. diff --git a/tcllib/modules/pt/include/param_3is.inc b/tcllib/modules/pt/include/param_3is.inc new file mode 100644 index 0000000..d6e9364 --- /dev/null +++ b/tcllib/modules/pt/include/param_3is.inc @@ -0,0 +1,4 @@ +[para] +This part of the machine's state is used and modified by the +instructions defined in the sections [sectref [vset INS0]], +[sectref [vset INS1]], and [sectref [vset INS2]]. diff --git a/tcllib/modules/pt/include/param_okfail.inc b/tcllib/modules/pt/include/param_okfail.inc new file mode 100644 index 0000000..0751f77 --- /dev/null +++ b/tcllib/modules/pt/include/param_okfail.inc @@ -0,0 +1,7 @@ +[para] + +Success and failure of the test are both recorded directly in ST. +Success further clears ES, wheras failure sets the pair of CL and +expected input (encoded as a leaf parsing expression) as the new ES +and then rewinds CL by one character, preparing the machine for +another parse attempt by a possible alternative. diff --git a/tcllib/modules/pt/include/param_special.inc b/tcllib/modules/pt/include/param_special.inc new file mode 100644 index 0000000..55cc600 --- /dev/null +++ b/tcllib/modules/pt/include/param_special.inc @@ -0,0 +1,4 @@ +This instruction implements the special PE operator "[vset OP]", which +checks if CC falls into the character class of the same name, or not. + +[include param_okfail.inc] diff --git a/tcllib/modules/pt/include/rde_0cins.inc b/tcllib/modules/pt/include/rde_0cins.inc new file mode 100644 index 0000000..8e44138 --- /dev/null +++ b/tcllib/modules/pt/include/rde_0cins.inc @@ -0,0 +1,5 @@ +[call [arg objectName] [method i_[vset IFAIL]/[vset IOKX]]] + +This method is a convenient combination of control flow and the two +PARAM instructions [cmd [vset IFAIL]] and [cmd [vset IOK]]. The former +is executed for "ST == fail", the latter for "ST == ok". diff --git a/tcllib/modules/pt/include/rde_0gins.inc b/tcllib/modules/pt/include/rde_0gins.inc new file mode 100644 index 0000000..5a64b26 --- /dev/null +++ b/tcllib/modules/pt/include/rde_0gins.inc @@ -0,0 +1,4 @@ +[call [arg objectName] [method i:[vset G]_[vset INS]]] + +This guarded method, a variant of [method i_[vset INS]], executes only +for "ST == [vset G]". diff --git a/tcllib/modules/pt/include/rde_0ginsb.inc b/tcllib/modules/pt/include/rde_0ginsb.inc new file mode 100644 index 0000000..5e2a0fb --- /dev/null +++ b/tcllib/modules/pt/include/rde_0ginsb.inc @@ -0,0 +1,4 @@ +[call [arg objectName] [method i:[vset G]_[vset INS]]] + +This method implements a guarded variant of the the PARAM instruction +[cmd [vset INS]], which executes only for "ST == [vset G]". diff --git a/tcllib/modules/pt/include/rde_0ins.inc b/tcllib/modules/pt/include/rde_0ins.inc new file mode 100644 index 0000000..38f8848 --- /dev/null +++ b/tcllib/modules/pt/include/rde_0ins.inc @@ -0,0 +1,3 @@ +[call [arg objectName] [method i_[vset INS]]] + +This method implements the PARAM instruction [cmd [vset INS]]. diff --git a/tcllib/modules/pt/include/rde_1ins.inc b/tcllib/modules/pt/include/rde_1ins.inc new file mode 100644 index 0000000..c51a43b --- /dev/null +++ b/tcllib/modules/pt/include/rde_1ins.inc @@ -0,0 +1,3 @@ +[call [arg objectName] [method i_[vset INS]] [arg [vset IA0]]] + +This method implements the PARAM instruction [cmd [vset INS]]. diff --git a/tcllib/modules/pt/include/rde_2ins.inc b/tcllib/modules/pt/include/rde_2ins.inc new file mode 100644 index 0000000..eb8f314 --- /dev/null +++ b/tcllib/modules/pt/include/rde_2ins.inc @@ -0,0 +1,3 @@ +[call [arg objectName] [method i_[vset INS]] [arg [vset IA0]] [arg [vset IA1]]] + +This method implements the PARAM instruction [cmd [vset INS]]. diff --git a/tcllib/modules/pt/include/ref_intro.inc b/tcllib/modules/pt/include/ref_intro.inc new file mode 100644 index 0000000..a8f7484 --- /dev/null +++ b/tcllib/modules/pt/include/ref_intro.inc @@ -0,0 +1,12 @@ + +[para] + +Are you lost ? + +Do you have trouble understanding this document ? + +In that case please read the overview provided by the +[manpage {Introduction to Parser Tools}]. This document is the +entrypoint to the whole system the current package is a part of. + +[para] diff --git a/tcllib/modules/pt/include/serial/ast.inc b/tcllib/modules/pt/include/serial/ast.inc new file mode 100644 index 0000000..090a8c2 --- /dev/null +++ b/tcllib/modules/pt/include/serial/ast.inc @@ -0,0 +1,104 @@ +[comment {-*- text -*-}] +[section {AST serialization format}] + +Here we specify the format used by the Parser Tools to serialize +Abstract Syntax Trees (ASTs) as immutable values for transport, +comparison, etc. + +[para] + +Each node in an AST represents a nonterminal symbol of a grammar, and +the range of tokens/characters in the input covered by it. ASTs do not +contain terminal symbols, i.e. tokens/characters. These can be +recovered from the input given a symbol's location. + +[para] + +We distinguish between [term regular] and [term canonical] +serializations. + +While a tree may have more than one regular serialization only exactly +one of them will be [term canonical]. + + +[list_begin definitions][comment {-- serializations --}] +[def {Regular serialization}] + +[list_begin enumerated][comment {-- regular points --}] + +[enum] +The serialization of any AST is the serialization of its root node. + +[enum] +The serialization of any node is a Tcl list containing at least three +elements. + +[list_begin enumerated][comment {-- node elements --}] +[enum] +The first element is the name of the nonterminal symbol stored in the +node. + +[enum] +The second and third element are the locations of the first and last +token in the token stream the node represents (covers). + +[list_begin enumerated][comment {--- location constraints}] +[enum] +Locations are provided as non-negative integer offsets from the +beginning of the token stream, with the first token found in the +stream located at offset 0 (zero). + +[enum] +The end location has to be equal to or larger than the start location. + +[list_end][comment {--- location constraints}] + +[enum] +All elements after the first three represent the children of the node, +which are themselves nodes. This means that the serializations of +nodes without children, i.e. leaf nodes, have exactly three elements. + +The children are stored in the list with the leftmost child first, and +the rightmost child last. + +[list_end][comment {-- node elements --}] +[list_end][comment {-- regular points --}] + +[def {Canonical serialization}] + +The canonical serialization of an abstract syntax tree has the format +as specified in the previous item, and then additionally satisfies the +constraints below, which make it unique among all the possible +serializations of this tree. + +[list_begin enumerated][comment {-- canonical points --}] +[enum] + +The string representation of the value is the canonical representation +of a pure Tcl list. I.e. it does not contain superfluous whitespace. + +[list_end][comment {-- canonical points --}] +[list_end][comment {-- serializations --}] +[para] + +[subsection Example] + +Assuming the parsing expression grammar below + +[para] +[include ../example/expr_peg.inc] +[para] + +and the input string + +[example { 120+5 }] + +then a parser should deliver the abstract syntax tree below (except for whitespace) + +[para] +[include ../example/expr_ast.inc] +[para] + +Or, more graphical + +[para][image expr_ast][para] diff --git a/tcllib/modules/pt/include/serial/pegrammar.inc b/tcllib/modules/pt/include/serial/pegrammar.inc new file mode 100644 index 0000000..4dbdb56 --- /dev/null +++ b/tcllib/modules/pt/include/serial/pegrammar.inc @@ -0,0 +1,114 @@ +[section {PEG serialization format}] + +Here we specify the format used by the Parser Tools to serialize +Parsing Expression Grammars as immutable values for transport, +comparison, etc. + +[para] + +We distinguish between [term regular] and [term canonical] +serializations. + +While a PEG may have more than one regular serialization only exactly +one of them will be [term canonical]. + + +[list_begin definitions][comment {-- serializations --}] +[def {regular serialization}] + +[list_begin enumerated][comment {-- regular points --}] +[enum] +The serialization of any PEG is a nested Tcl dictionary. + +[enum] +This dictionary holds a single key, [const pt::grammar::peg], and its +value. This value holds the contents of the grammar. + +[enum] +The contents of the grammar are a Tcl dictionary holding the set of +nonterminal symbols and the starting expression. The relevant keys and +their values are + +[list_begin definitions][comment {-- grammar keywords --}] +[def [const rules]] + +The value is a Tcl dictionary whose keys are the names of the +nonterminal symbols known to the grammar. + +[list_begin enumerated][comment {-- nonterminals --}] +[enum] +Each nonterminal symbol may occur only once. + +[enum] +The empty string is not a legal nonterminal symbol. + +[enum] +The value for each symbol is a Tcl dictionary itself. The relevant +keys and their values in this dictionary are + +[list_begin definitions][comment {-- nonterminal keywords --}] +[def [const is]] + +The value is the serialization of the parsing expression describing +the symbols sentennial structure, as specified in the section +[sectref {PE serialization format}]. + +[def [const mode]] + +The value can be one of three values specifying how a parser should +handle the semantic value produced by the symbol. + +[include ../modes.inc] +[list_end][comment {-- nonterminal keywords --}] +[list_end][comment {-- nonterminals --}] + +[def [const start]] + +The value is the serialization of the start parsing expression of the +grammar, as specified in the section [sectref {PE serialization format}]. + +[list_end][comment {-- grammar keywords --}] + +[enum] +The terminal symbols of the grammar are specified implicitly as the +set of all terminal symbols used in the start expression and on the +RHS of the grammar rules. + + +[list_end][comment {-- regular points --}] + +[def {canonical serialization}] + +The canonical serialization of a grammar has the format as specified +in the previous item, and then additionally satisfies the constraints +below, which make it unique among all the possible serializations of +this grammar. + +[list_begin enumerated][comment {-- canonical points --}] +[enum] + +The keys found in all the nested Tcl dictionaries are sorted in +ascending dictionary order, as generated by Tcl's builtin command +[cmd {lsort -increasing -dict}]. + +[enum] + +The string representation of the value is the canonical representation +of a Tcl dictionary. I.e. it does not contain superfluous whitespace. + +[list_end][comment {-- canonical points --}] +[list_end][comment {-- serializations --}] + +[subsection Example] + +Assuming the following PEG for simple mathematical expressions + +[para] +[include ../example/expr_peg.inc] +[para] + +then its canonical serialization (except for whitespace) is + +[para] +[include ../example/expr_serial.inc] +[para] diff --git a/tcllib/modules/pt/include/serial/pexpression.inc b/tcllib/modules/pt/include/serial/pexpression.inc new file mode 100644 index 0000000..c0b2255 --- /dev/null +++ b/tcllib/modules/pt/include/serial/pexpression.inc @@ -0,0 +1,245 @@ +[comment {-*- text -*-}] +[section {PE serialization format}] + +Here we specify the format used by the Parser Tools to serialize +Parsing Expressions as immutable values for transport, comparison, +etc. + +[para] + +We distinguish between [term regular] and [term canonical] +serializations. + +While a parsing expression may have more than one regular +serialization only exactly one of them will be [term canonical]. + +[list_begin definitions][comment {-- serializations --}] +[def {Regular serialization}] + +[list_begin definitions][comment {-- regular points --}] + +[def [const {Atomic Parsing Expressions}]] +[list_begin enumerated][comment {-- atomic points --}] + +[enum] +The string [const epsilon] is an atomic parsing expression. It matches +the empty string. + +[enum] +The string [const dot] is an atomic parsing expression. It matches +any character. + +[enum] +The string [const alnum] is an atomic parsing expression. It matches +any Unicode alphabet or digit character. This is a custom extension of +PEs based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const alpha] is an atomic parsing expression. It matches +any Unicode alphabet character. This is a custom extension of PEs +based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const ascii] is an atomic parsing expression. It matches +any Unicode character below U0080. This is a custom extension of PEs +based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const control] is an atomic parsing expression. It matches +any Unicode control character. This is a custom extension of PEs based +on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const digit] is an atomic parsing expression. It matches +any Unicode digit character. Note that this includes characters +outside of the [lb]0..9[rb] range. This is a custom extension of PEs +based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const graph] is an atomic parsing expression. It matches +any Unicode printing character, except for space. This is a custom +extension of PEs based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const lower] is an atomic parsing expression. It matches +any Unicode lower-case alphabet character. This is a custom extension +of PEs based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const print] is an atomic parsing expression. It matches +any Unicode printing character, including space. This is a custom +extension of PEs based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const punct] is an atomic parsing expression. It matches +any Unicode punctuation character. This is a custom extension of PEs +based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const space] is an atomic parsing expression. It matches +any Unicode space character. This is a custom extension of PEs based +on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const upper] is an atomic parsing expression. It matches +any Unicode upper-case alphabet character. This is a custom extension +of PEs based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const wordchar] is an atomic parsing expression. It +matches any Unicode word character. This is any alphanumeric character +(see alnum), and any connector punctuation characters (e.g. +underscore). This is a custom extension of PEs based on Tcl's builtin +command [cmd {string is}]. + +[enum] +The string [const xdigit] is an atomic parsing expression. It matches +any hexadecimal digit character. This is a custom extension of PEs +based on Tcl's builtin command [cmd {string is}]. + +[enum] +The string [const ddigit] is an atomic parsing expression. It matches +any decimal digit character. This is a custom extension of PEs based +on Tcl's builtin command [cmd regexp]. + +[enum] +The expression + [lb]list t [var x][rb] +is an atomic parsing expression. It matches the terminal string [var x]. + +[enum] +The expression + [lb]list n [var A][rb] +is an atomic parsing expression. It matches the nonterminal [var A]. + +[list_end][comment {-- atomic points --}] + +[def [const {Combined Parsing Expressions}]] +[list_begin enumerated][comment {-- combined points --}] + +[enum] +For parsing expressions [var e1], [var e2], ... the result of + + [lb]list / [var e1] [var e2] ... [rb] + +is a parsing expression as well. + +This is the [term {ordered choice}], aka [term {prioritized choice}]. + +[enum] +For parsing expressions [var e1], [var e2], ... the result of + + [lb]list x [var e1] [var e2] ... [rb] + +is a parsing expression as well. + +This is the [term {sequence}]. + +[enum] +For a parsing expression [var e] the result of + + [lb]list * [var e][rb] + +is a parsing expression as well. + +This is the [term {kleene closure}], describing zero or more +repetitions. + +[enum] +For a parsing expression [var e] the result of + + [lb]list + [var e][rb] + +is a parsing expression as well. + +This is the [term {positive kleene closure}], describing one or more +repetitions. + +[enum] +For a parsing expression [var e] the result of + + [lb]list & [var e][rb] + +is a parsing expression as well. + +This is the [term {and lookahead predicate}]. + +[enum] +For a parsing expression [var e] the result of + + [lb]list ! [var e][rb] + +is a parsing expression as well. + +This is the [term {not lookahead predicate}]. + + +[enum] +For a parsing expression [var e] the result of + + [lb]list ? [var e][rb] + +is a parsing expression as well. + +This is the [term {optional input}]. + + +[list_end][comment {-- combined points --}] +[list_end][comment {-- regular points --}] + +[def {Canonical serialization}] + +The canonical serialization of a parsing expression has the format as +specified in the previous item, and then additionally satisfies the +constraints below, which make it unique among all the possible +serializations of this parsing expression. + +[list_begin enumerated][comment {-- canonical points --}] +[enum] + +The string representation of the value is the canonical representation +of a pure Tcl list. I.e. it does not contain superfluous whitespace. + +[enum] + +Terminals are [emph not] encoded as ranges (where start and end of the +range are identical). + +[comment { + Thinking about this I am not sure if that was a good move. + There are a lot more equivalent encodings around that just + the one I used above. Examples + + {x {t a} {t b} {tc } {t d}} + {x {x {t a} {t b}} {x {tc } {t d}}} + {x {x {t a} {t b} {tc } {t d}}} + + etc. Having the t/.. equivalence added it can now be argued + that we should handle these as well. Which essentially + amounts to a whole-sale system to simplify parsing + expressions. This moves expression equality from intensional + to extensional, or as near as is possible. + + The only counter-argument I have is that the t/.. equivalence + is restricted to leaves of the tree, or alternatively, to + terminal symbol operators. +}] + +[list_end][comment {-- canonical points --}] +[list_end][comment {-- serializations --}] +[para] + +[subsection Example] + +Assuming the parsing expression shown on the right-hand side of the +rule + +[para] +[include ../example/expr_pe.inc] +[para] + +then its canonical serialization (except for whitespace) is + +[para] +[include ../example/expr_pe_serial.inc] +[para] diff --git a/tcllib/modules/pt/include/std_parser_object_api.inc b/tcllib/modules/pt/include/std_parser_object_api.inc new file mode 100644 index 0000000..71b70d9 --- /dev/null +++ b/tcllib/modules/pt/include/std_parser_object_api.inc @@ -0,0 +1,71 @@ + +[call [arg objectName] [method destroy]] + +This method destroys the parser instance, releasing all claimed memory +and other resources, and deleting the instance command. + +[para] + +The result of the command is the empty string. + + +[call [arg objectName] [method parse] [arg chan]] + +This method runs the parser using the contents of [arg chan] as input +(starting at the current location in the channel), until parsing is +not possible anymore, either because parsing has completed, or run +into a syntax error. + +[include channel_notes.inc] + +[para] + +Upon successful completion the command returns an abstract syntax tree +as its result. + +This AST is in the form specified in section +[sectref {AST serialization format}]. + +As a plain nested Tcl-list it can then be processed with any Tcl +commands the user likes, doing transformations, semantic checks, etc. + +To help in this the package [package pt::ast] provides a set of +convenience commands for validation of the tree's basic structure, +printing it for debugging, and walking it either from the bottom up, +or top down. + +[para] + +When encountering a syntax error the command will throw an error instead. + +This error will be a 4-element Tcl-list, containing, in the order +listed below: + +[list_begin enumerated] +[enum] +The string [const pt::rde] identifying it as parser runtime error. + +[enum] +The location of the parse error, as character offset from the +beginning of the parsed input. + +[enum] +The location of parse error, now as a 2-element list containing +line-number and column in the line. + +[enum] +A set of atomic parsing expressions indicating encoding the characters +and/or nonterminal symbols the parser expected to see at the location +of the parse error, but did not get. + + For the specification of atomic parsing expressions please see the +section [sectref {PE serialization format}]. + +[list_end] + + +[call [arg objectName] [method parset] [arg text]] + +This method runs the parser using the string in [arg text] as input. +In all other ways it behaves like the method [method parse], shown +above. |