1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
|
#----------------------------------------------------------------------
#
# aycock-runtime.tcl --
#
# Procedures needed to execute an Aycock-Horspool-Earley parser.
#
# Copyright (c) 2006 by Kevin B. Kenny. All rights reserved.
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
# RCS: @(#) $Id: aycock-runtime.tcl,v 1.2 2011/01/13 02:47:47 andreas_kupries Exp $
#
#----------------------------------------------------------------------
package provide grammar::aycock::runtime 1.0
package require Tcl 8.5
# Define the directory containing this package's scripts
namespace eval grammar {}
namespace eval grammar::aycock {
variable parserCount 0
}
# grammar::aycock::Restore --
#
# Restores a parser from saved state.
#
# Parameters;
# rules - Saved rule set
# automaton - Saved automaton
# args - Saved action procedures
#
# Results:
# Returns the constructed parser's name
#
# Side effects:
# Reconstructs the parser
proc ::grammar::aycock::Restore {rules automaton args} {
set name [MakeParser]
variable ${name}::RuleSet
variable ${name}::Completions
variable ${name}::Edges
set RuleSet $rules
set Edges [dict create]
set Completions {}
set i 0
foreach {completions edges} $automaton {
lappend Completions $completions
dict set Edges $i $edges
incr i
}
foreach {actionName actionBody} $args {
namespace eval ${name} \
[list proc $actionName {_ clientData} $actionBody]
}
return ${name}
}
# grammar::aycock::MakeParser --
#
# Constructs the ensemble that will contain an Aycock parser.
#
# Results:
# Returns the name of the parser, which is an ensemble within
# the "aycock" namespace.
#
# The following commands are members of the ensemble:
# parse -- Parses a sequence of symbols and returns its lexical
# value.
# destroy -- Destroys the parser.
# terminals -- Lists the terminal symbols accepted by the parser
# nonterminals -- Lists the nonterminal symbols reduced by the parser
# save -- Returns a command to recreate the parser without needing
# to analyze the rule set.
proc ::grammar::aycock::MakeParser {} {
variable parserCount
set name [namespace current]::parser[incr parserCount]
namespace eval $name {
namespace export parse destroy
namespace export terminals nonterminals save
}
proc ${name}::parse {symList vallist {clientData {}}} \
[string map [list \
PROC [namespace current]::Parse \
PARSER $name] {
PROC PARSER $symList $vallist $clientData
}]
proc ${name}::terminals {} \
[list [namespace current]::Terminals $name]
proc ${name}::nonterminals {} \
[list [namespace current]::Nonterminals $name]
proc ${name}::save {} \
[list [namespace current]::Save $name]
proc ${name}::destroy {} \
[list namespace delete $name]
namespace eval $name {
namespace ensemble create
}
return $name
}
# grammar::aycock::MakeSet --
#
# Run one step of an Earley parse.
#
# Parameters:
# parser -- Name of the parser
# setsVar -- Sets of parser states already constructed
# sym -- Input symbol
#
# Results:
# Returns the sets of parser states updated with the transition on the
# given input
#
# Each parser state is an ordered pair (automaton state, parent)
# where parent is the position in the input string where the substring
# matching the given state begins. A state set is a dictionary whose
# keys are parser states and whose values are "links" - a link consists of
# the automaton state, parent, and state set of the predecessor,
# the automaton state, parent and state set of the cause, and
# the LRE(0) parser state of the symbol being reduced - see Aycock's
# paper for the details on how these are interpreted.
proc ::grammar::aycock::MakeSet {parser setsVar sym} {
upvar 1 $setsVar sets
namespace upvar $parser \
Completions Completions \
Edges Edges
# Find the state index and set up "current" and "next" state sets.
set ip1 [llength $sets]
set i [expr {$ip1 - 1}]
set curSet [lindex $sets end]
set newSet {}
# Work through the "current" set to determine "next state" transitions.
set j 0
set worklist $curSet
while {$j < [llength $worklist]} {
set item [lindex $worklist $j]
incr j 2
foreach {state parent} $item break
# Advance using the 'goto' on the current input symbol
if {$sym ne {} && [dict exists $Edges $state $sym]} {
set k [dict get $Edges $state $sym]
set createdItem [list $k $parent]
set links [list $state $parent $i]
dict set newSet $createdItem $links {}
# Also add the epsilon-transition from that state
if {[dict exists $Edges $k {}]} {
set nk [dict get $Edges $k {}]
set createdItem [list $nk [expr {$i+1}]]
dict set newSet $createdItem {} {}
}
}
if {$parent != $i} {
# Reduce any completions in the current state, adding
# them to the worklist because their 'goto' items may
# also be shifted.
foreach {lhs rhs pos} [lindex $Completions $state] {
if {$lhs eq {}} continue
foreach pitem [lindex $sets $parent] {
foreach {pstate pparent} $pitem break
if {[dict exists $Edges $pstate $lhs]} {
# goto on the newly-reduced nonterminal
set k [dict get $Edges $pstate $lhs]
set createdItem [list $k $pparent]
set links [list $pstate $pparent $parent \
$state $parent $i \
$lhs $rhs $pos]
if {![dict exists $curSet $createdItem]} {
lappend worklist $createdItem $links
}
dict set curSet $createdItem $links {}
if {[dict exists $Edges $k {}]} {
# epsilon-transition from the nonterminal's goto
set nk [dict get $Edges $k {}]
set createdItem [list $nk $i]
if {![dict exists $curSet $createdItem]} {
lappend worklist $createdItem {}
}
dict set curSet $createdItem {} {}
}
}
}
}
}
}
set sets [lreplace $sets[set sets {}] end end $curSet $newSet]
}
# grammar::aycock::Parse --
#
# Runs an Aycock-Earley parser
#
# Usage:
# $parser parse symlist vallist
#
# Parameters:
# symlist - List of token names created by scanning an input
# vallist - List of semantic values corresponding to the
# tokens in $symlist
# clientData - Client data to be passed to semantic action procedures
#
# Results:
# Returns whatever the semantic action in the top-level reduction
# of the parse returns.
proc ::grammar::aycock::Parse {parser symlist vallist {clientData {}}} {
namespace upvar $parser \
RuleSet RuleSet \
Edges Edges
set sets [list [dict create [list 1 0] {} [list 2 0] {}]]
set i 0
foreach sym $symlist {
MakeSet $parser sets $sym
if {[llength [lindex $sets end]] == 0} {
return -code error "syntax error before symbol $i ($sym: [lindex $vallist $i])"
}
incr i
}
MakeSet $parser sets {}
set startSym [lindex [dict get $RuleSet {}] 0 1]
#set finalState [dict get $Edges 2 $startSym]
set finalState [dict get $Edges 1 $startSym]
# TODO - check that the final state *is* final... it has to contain an
# acceptor somewhere.
return [Reconstruct $parser {} $finalState 0 $vallist $sets \
[expr {[llength $sets] - 2}] $clientData]
}
# grammar::aycock::Reconstruct --
#
# Reconstructs the parse that leads to reducing a given nonterminal
# symbol, and determines the nonterminal's semantic value.
#
# Parameters:
# parser -- Aycock parser
# nt - Name of the nonterminal being reduced
# state - Parser state that contains the reduction
# parent - Position in the input list of the start of the reduction
# vallist - List of semantic values corresponding the the symbols
# on the right hand side of the reduction
# sets - List of sets generated by grammar::aycock::MakeSet
# k - Position in the input list at the start of the reduction
# clientData - Client data for semantic actions
#
# Results:
# Returns the semantic value of the left-hand side of the reduction
proc ::grammar::aycock::Reconstruct {parser nt state parent vallist sets k clientData} {
namespace upvar $parser \
RuleSet RuleSet \
Completions Completions \
Edges Edges
set choices {}
# Here it's possible that Completions contains completions for the
# wrong nonterminal?
set complete [lindex $Completions $state]
if {[llength $complete] != 3} {
set complete {}
foreach {lhs rhs pos} [lindex $Completions $state] {
if {$lhs eq $nt} {
lappend complete $lhs $rhs $pos
}
}
}
set compIdx [ChooseReduction $parser $complete]
foreach {lhs rhsIndex pos} \
[lrange $complete [expr {3*$compIdx}] [expr {3*$compIdx+2}]] break
foreach {rhs action} [lrange [dict get $RuleSet $lhs] $rhsIndex [expr {$rhsIndex+1}]] break
set cmd [list ${parser}::$action]
set args {}
foreach sym $rhs {
lappend args {}
}
for {set i [expr {[llength $rhs]-1}]} {$i >= 0} {incr i -1} {
set sym [lindex $rhs $i]
if {![dict exists $RuleSet $sym]} {
# terminal symbol
if {$sym != "\u22a2"} {
lset args $i [lindex $vallist [expr {$k-1}]]
set predecessors {}
dict for {key v} \
[dict get [lindex $sets $k] [list $state $parent]] {
foreach {pstate pparent pk cstate cparent ck
lhs rhsIndex pos} $key break
# should be only one transition on a terminal
break
}
set state $pstate
set parent $pparent
set k $pk
}
} elseif {[string range $sym end-2 end] == "\{\u00d8\}"} {
lset args $i [DeriveEpsilon $parser $sym $clientData]
} elseif {[dict exists [lindex $sets $k] [list $state $parent]]} {
set causes {}
set links [dict get [lindex $sets $k] [list $state $parent]]
set keys {}
set reductions {}
dict for {key v} $links {
foreach {pstate pparent pk cstate cparent ck \
lhs rhsIndex pos} $key break
lappend reductions $lhs $rhsIndex $pos
lappend keys $key
}
set keyIdx [ChooseReduction $parser $reductions]
set key [lindex $keys $keyIdx]
foreach {pstate pparent pk cstate cparent ck \
lhs rhsIndex pos} $key break
lset args $i \
[Reconstruct $parser $sym $cstate $cparent $vallist \
$sets $ck $clientData]
set state $pstate
set parent $pparent
set k $pk
} else {
return -code error "syntax error: incomplete parse"
}
}
set v [eval [list $cmd $args $clientData]]
return $v
}
# grammar::aycock::ChooseReduction --
#
# Resolves an ambiguity in an Aycock-Earley parse
#
# Parameters:
# parser - Parser structure
# lritems - List of LR items that could be reduced.
#
# Results:
# Returns the ordinal number of the reduction to choose
#
# Always resolves in favour of the shortest right-hand side. This choice
# is equivalent to choosing "resolve shift/reduce conflicts in favour
# of shifting" in an LR parser, and is adequate to handling situations
# like "dangling ELSE." It is not adequate for handling things like a
# YACC-style ambiguous expression grammar with precedence and associativity;
# that sort of processing would need additional investigation.
proc ::grammar::aycock::ChooseReduction {parser lritems} {
# if {[llength $lritems] != 3} {
# puts "Need to choose which item to reduce:"
# DumpItemSet $parser $lritems
# }
# choose the shortest reduction - this is equivalent to
# "resolve in favour of shift"
set ind -1
set shortest 99999
set i 0
foreach {lhs rhsIndex pos} $lritems {
if {$pos < $shortest} {
set shortest $pos
set ind $i
}
incr i
}
return $ind
}
# grammar::aycock::DeriveEpsilon --
#
# Performs a set of semantic actions needed to derive the
# empty string within a set of reductions in an Aycock-Earley parser.
#
# Parameters:
# parser -- Parser data structure
# sym -- Non-terminal symbol that reduces to the empty string.
# clientData - Client data for semantic actions
#
# Results:
# Returns the semantic value of the given symbol
proc ::grammar::aycock::DeriveEpsilon {parser sym clientData} {
# need to find the rule that derives the null string, and
# expand it out.
namespace upvar $parser RuleSet RuleSet
set rules [dict get $RuleSet $sym]
set idx 0
if { [llength $rules] != 2 } {
set items {}
set i 0
foreach {rhs action} $rules {
lappend items $sym $i [llength $rhs]
incr i 2
}
set idx [expr {2 * [ChooseReduction $parser $items]}]
}
set rhs [lindex $rules $idx]
set action [lindex $rules [expr {$idx + 1}]]
set cmd [list ${parser}::$action]
set args {}
foreach sym $rhs {
lappend args {}
}
for {set i [expr {[llength $rhs] - 1}]} {$i >= 0} {incr i -1} {
lset args $i [DeriveEpsilon $parser [lindex $rhs $i] $clientData]
}
set r [eval [list $cmd $args $clientData]]
return $r
}
|