# ACTIVESTATE TEAPOT-PKG BEGIN TM -*- tcl -*- # -- Tcl Module # @@ Meta Begin # Package pt::ast 1 # Meta as::build::date 2010-07-14 # Meta as::origin http://sourceforge.net/projects/tcllib # Meta category Parser Tools # Meta description Abstract Syntax Tree Serialization # Meta license BSD # Meta platform tcl # Meta require {Tcl 8.5} # Meta require char # Meta subject expression {push down automaton} state EBNF # Meta subject {context-free languages} matching PEG TDPL # Meta subject {parsing expression} parser {recursive descent} # Meta subject grammar transducer {top-down parsing languages} # Meta subject {parsing expression grammar} LL(k) # Meta summary pt::ast # @@ Meta End # ACTIVESTATE TEAPOT-PKG BEGIN REQUIREMENTS package require Tcl 8.5 package require char # ACTIVESTATE TEAPOT-PKG END REQUIREMENTS # ACTIVESTATE TEAPOT-PKG BEGIN DECLARE package provide pt::ast 1 # ACTIVESTATE TEAPOT-PKG END DECLARE # ACTIVESTATE TEAPOT-PKG END TM # -*- tcl -*- # Copyright (c) 2009 Andreas Kupries # Verification of serialized parsing expressions, conversion # between such and other data structures, and their construction. # # ## ### ##### ######## ############# ##################### ## Requirements package require Tcl 8.5 ; # Required runtime. package require char ; # Character quoting utilities. # # ## ### ##### ######## ############# ##################### ## namespace eval ::pt::ast { namespace export \ verify verify-as-canonical canonicalize \ equal bottomup topdown \ print new namespace ensemble create } # # ## ### ##### ######## ############# ## Public API # Check that the proposed serialization of an abstract syntax tree is # indeed such. proc ::pt::ast::verify {serial {canonvar {}}} { variable ourprefix #puts "V <$serial> /[llength [info level 0]] / [info level 0]" if {$canonvar ne {}} { upvar 1 $canonvar iscanonical set iscanonical [string equal $serial [list {*}$serial]] } topdown [list [namespace current]::Verify] $serial return } proc ::pt::ast::verify-as-canonical {serial} { verify $serial iscanonical if {!$iscanonical} { variable ourprefix variable ourimpure return -code error $ourprefix$ourimpure } return } proc ::pt::ast::Verify {ast} { variable ourprefix variable ourbadrange variable ourbadend variable ourbadstart variable ourshort if {[llength $ast] < 3} { return -code error $ourprefix$ourshort } # Open Questions # - Should we constrain the locations of the children to be # inside of the parent ? # - Should we constrain the locations of the children to not # overlap ? # Note: Gaps we have to allow, comments and whitespace and such. lassign $ast type start end if {![string is integer -strict $start]} { return -code error $ourprefix[format $ourbadstart $start] } elseif {$start < 0} { return -code error $ourprefix[format $ourbadstart $start] } if {![string is integer -strict $end] || ($end < 0)} { return -code error $ourprefix[format $ourbadend $end] } if {$end < $start} { return -code error $ourprefix$ourbadrange } upvar 1 iscanonical iscanonical if { [info exists iscanonical] && ($ast ne [list {*}$ast]) } { # Reject coding with superfluous whitespace as non-canonical. set iscanonical 0 } return } # # ## ### ##### ######## ############# proc ::pt::ast::canonicalize {serial} { verify $serial iscanonical if {$iscanonical} { return $serial } return [bottomup [list [namespace current]::Canonicalize] $serial] } proc ::pt::ast::Canonicalize {ast} { # We construct a pure list out of the node data. return [list {*}$ast] } # # ## ### ##### ######## ############# # Converts a parsing expression serialization into a human readable # string for test results. It assumes that the serialization is at # least structurally sound. proc ::pt::ast::print {serial} { return [join [bottomup [list [namespace current]::Print] $serial] \n] } proc ::pt::ast::Print {ast} { set children [lassign $ast type start end] set result [list [list <$type> :: $start $end]] # The arguments are already processed for printing foreach c $children { foreach line $c { lappend result " $line" } } return $result } # # ## ### ##### ######## ############# proc ::pt::ast::equal {seriala serialb} { return [string equal \ [canonicalize $seriala] \ [canonicalize $serialb]] } # # ## ### ##### ######## ############# proc ::pt::ast::bottomup {cmdprefix ast} { Bottomup 2 $cmdprefix $ast } proc ::pt::ast::Bottomup {level cmdprefix ast} { set children [lassign $ast type start end] set new [list $type $start $end] set clevel $level incr clevel foreach c $children { lappend new [Bottomup $clevel $cmdprefix $c] } return [uplevel $level [list {*}$cmdprefix $new]] } proc ::pt::ast::topdown {cmdprefix ast} { Topdown 2 $cmdprefix $ast return } proc ::pt::ast::Topdown {level cmdprefix ast} { uplevel $level [list {*}$cmdprefix $ast] incr level foreach c [lrange $ast 3 end] { Topdown $level $cmdprefix $c } return } # # ## ### ##### ######## ############# proc ::pt::ast::new {sym start end args} { variable ourbadstart variable ourbadend variable ourbadrange if {![string is integer -strict $start] || ($start < 0)} { return -code error [format $ourbadstart $start] } if {![string is integer -strict $end] || ($end < 0)} { return -code error [format $ourbadend $end] } if {$end < $start} { return -code error $ourbadrange } return [list $sym $start $end {*}$args] } namespace eval ::pt::ast { # # ## ### ##### ######## ############# ## Strings for error messages. variable ourprefix "error in serialization:" variable ourbadstart " expected integer >= 0 as start of range, got \"%s\"" variable ourbadend " expected integer >= 0 as end of range, got \"%s\"" variable ourbadrange " expected start <= end for range" variable ourshort " expected at least 3 elements for node" variable ourimpure " has irrelevant whitespace" ## # # ## ### ##### ######## ############# } # # ## ### ##### ######## ############# ##################### ## Ready package provide pt::ast 1 return