# ACTIVESTATE TEAPOT-PKG BEGIN TM -*- tcl -*-
# -- Tcl Module

# @@ Meta Begin
# Package ralutil 0.10.2
# Meta as::author      {Andrew Mangogna}
# Meta as::build::date 2014-02-10
# Meta as::origin      http://sourceforge.net/projects/tclral
# Meta category        Database
# Meta description     Utilities for Relational Algebra for Tcl
# Meta license         BSD
# Meta platform        tcl
# Meta require         {Tcl 8.4}
# Meta require         {ral 0.10.2}
# Meta subject         relational algebra sql database
# Meta summary         Relational Algebra for Tcl, Utilities
# @@ Meta End


# ACTIVESTATE TEAPOT-PKG BEGIN REQUIREMENTS

package require Tcl 8.4
package require ral 0.10.2

# ACTIVESTATE TEAPOT-PKG END REQUIREMENTS

# ACTIVESTATE TEAPOT-PKG BEGIN DECLARE

package provide ralutil 0.10.2

# ACTIVESTATE TEAPOT-PKG END DECLARE
# ACTIVESTATE TEAPOT-PKG END TM
# This software is copyrighted 2006, 2007, 2008, 2009 by G. Andrew Mangogna.
# The following terms apply to all files associated with the software unless
# explicitly disclaimed in individual files.
# 
# The authors hereby grant permission to use, copy, modify, distribute,
# and license this software and its documentation for any purpose, provided
# that existing copyright notices are retained in all copies and that this
# notice is included verbatim in any distributions. No written agreement,
# license, or royalty fee is required for any of the authorized uses.
# Modifications to this software may be copyrighted by their authors and
# need not follow the licensing terms described here, provided that the
# new terms are clearly indicated on the first page of each file where
# they apply.
# 
# IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR
# DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING
# OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY DERIVATIVES
# THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# 
# THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
# IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
# NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
# OR MODIFICATIONS.
# 
# GOVERNMENT USE: If you are acquiring this software on behalf of the
# U.S. government, the Government shall have only "Restricted Rights"
# in the software and related documentation as defined in the Federal
# Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
# are acquiring the software on behalf of the Department of Defense,
# the software shall be classified as "Commercial Computer Software"
# and the Government shall have only "Restricted Rights" as defined in
# Clause 252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing,
# the authors grant the U.S. Government and others acting in its behalf
# permission to use and distribute the software in accordance with the
# terms specified in this license.
# 
#  *++
# MODULE:
# ralutil.tcl -- Convenience scripts for use with TclRAL
# 
# ABSTRACT:
# This file contains a set of utility procs that are useful in
# conjunction with TclRAL. N.B. that many of these are experimental
# in nature and interfaces are subject to much change. Part of the
# purpose for this package is to determine what works well in practice
# without cluttering the TclRAL package proper.
# 
# $RCSfile: ralutil.tcl,v $
# $Revision: 1.7 $
# $Date: 2012/02/26 19:09:04 $
#  *--

package provide ralutil 0.10.2

package require ral 0.10.2

namespace eval ::ralutil {
    namespace export pipe
    namespace export sysIdsInit
    namespace export sysIdsGenSystemId
    namespace export crosstab
    namespace export rvajoin

    namespace import ::ral::*

    # We define the very special relation values of DEE and DUM.
    # DUM is the relation value that has an empty heading and an empty body.
    # DEE is the relation value that has an empty heading and a body consisting
    # of one tuple that is the empty tuple (the only body that an empty heading
    # relation value could have).
    variable DUM {{} {}}
    variable DEE {{} {{}}}
}

# Relational expressions are frequently nested to achieve some desired
# result. This nesting in normal Tcl syntax quickly gets difficult to read.
# Often the expression is naturally a pipe of the output of one command
# to the input of another. This idea came from the "Commands pipe" page
# on the wiki: http://wiki.tcl.tk/17419. The code here is different,
# but the idea is the same.
proc ::ralutil::pipe {fns {var {}} {sep |~}} {
    # Split out the separator characters
    lassign [split $sep ""] s p
    # create a valid tcl command
    set cmd [nestcmds [string trim [split $fns $s]] $p]
    # perform command
    if {$var eq ""} {
	return [uplevel $cmd]
    } else {
	upvar $var v
	set v $cmd
    }
}

# nest one command inside another in reverse list order.
# "p" is a place holder character denoting where the previous result
# is to be placed in the command. If "p" is absent in the command,
# then the previous result is simply appended to the command.
proc ::ralutil::nestcmds {cmdList p} {
    set cmd [string trim [lindex $cmdList end]]
    if {[llength $cmdList] > 1} {
	set innercmd "\[[nestcmds [lrange $cmdList 0 end-1] $p]\]"
	set cmd [expr {[string first $p $cmd] < 0 ?\
	    "$cmd $innercmd" : [string map [list $p $innercmd] $cmd]}]
    }
    return $cmd
}

# These procs form a scheme for system defined identifiers.
# This scheme works as a reasonable way to assign system identifiers
# that are based on incrementing integers.
# Note that is possible to save and restore the contents of the
# relvar holding the identifiers, although that is not done here.
proc ::ralutil::sysIdsInit {} {
    if {![relvar exists ::__ral_systemids__]} {
	relvar create ::__ral_systemids__\
            {RelvarName string IdAttr string IdNum int} {RelvarName IdAttr}
    }
}

# A convenience proc that installs a trace on an attribute of
# a relvar in order to have the system assign a unique value
# to that attribute.
proc ::ralutil::sysIdsGenSystemId {relvarName attrName} {
    relvar trace add variable [uplevel 1 relvar path $relvarName] insert\
	[list ::ralutil::sysIdsCreateIdFor $attrName]
}

# This proc can be used in a relvar trace on insert to
# create a unique identifier for an attribute.
proc ::ralutil::sysIdsCreateIdFor {attrName op relvarName tup} {
    relvar eval {
	set updated [\
	    relvar updateone __ral_systemids__ sysid\
		[list RelvarName $relvarName IdAttr $attrName] {
		set idValue [tuple extract $sysid IdNum]
		tuple update $sysid IdNum [incr idValue]
	    }]
	if {[relation isempty $updated]} {
	    # If we don't update then we need to 
	    # find the maximum value of the atttribute in any tuple
	    # to use as our counter value.
	    set idValue [findMaxAttrValue $relvarName $attrName]
	    # The next one to use is one past the max
	    relvar insert __ral_systemids__ [list RelvarName $relvarName\
		IdAttr $attrName IdNum [incr idValue]]
	}
    }
    # It is possible that no value was given for the attribute that is to be
    # generated. In that case we need to add it in using the type we find in
    # the relation heading.
    if {$attrName in [tuple attributes $tup]} {
        set newTup [tuple update $tup $attrName $idValue]
    } else {
        set heading [relation heading [relvar set $relvarName]]
        set attrType [dict get $heading $attrName]
        set newTup [tuple extend $tup $attrName $attrType $idValue]
    }
    return $newTup
}

# A helper procedure in finding the value for a system identifiers
proc ::ralutil::findMaxAttrValue {relvarName attrName} {
    set relValue [relvar set $relvarName]
    # If we don't find any, then we just start at 0
    set idValue 0
    if {[relation isnotempty $relValue]} {
	# We find the maximum by summarizing over DEE
	set idValue [pipe {
	    relation summarizeby $relValue {} r\
		maxValue int {rmax($r, $attrName)} |
	    relation extract ~ maxValue
        }]
    }
    return $idValue
}

# crosstab relValue crossAttr ?attr1 attr2 ...?
#
# Generate a cross tabulation of "relValue" for the "crossAttr" against the
# variable number of attributes given. The "crossAttr" argument is the name of
# an attribute of "relValue". The idea is to create new relation that contains
# all the attributes in "args" plus a new attribute for each distinct value of
# "crossAttr". The value of the new attributes is the count of tuples that have
# the corresponding value of "crossAttr".  Relationally, the "summarize"
# command is used when computations are required across groups of tuples.
proc ::ralutil::crosstab {relValue crossAttr args} {
    # We start by projecting the attributes that will be retained
    # in the resulting relation.
    set subproj [relation project $relValue {*}$args]
    # The strategy is to build up a summarize command on the fly, adding new
    # attributes. So we start with the constant part of the command.
    set sumCmd [list relation summarize $relValue $subproj r]

    # By projecting on the "crossAttr" we get the unique set of values
    # for that attribute since there are no duplicates in relations.
    set crossproj [relation project $relValue $crossAttr]
    # For each distinct value of the "crossAttr" extend the relation with
    # a new attribute by the same name as the value and whose value is
    # the number of tuples which match the value.
    foreach val [lsort [relation list $crossproj]] {
	 set sumexpr [format\
	     {[relation cardinality [relation restrictwith $r {$%s == "%s"}]]}\
	     $crossAttr $val]
	 lappend sumCmd $val int $sumexpr
    }
    # Finally we want the total for all the "crossAttr" matches.
    lappend sumCmd Total int {[relation cardinality $r]}
    return [eval $sumCmd]
}

# Perform the relational equivalent to an "outer join". The idea is to end up
# with a relation that contains a relation valued attribute that contains the
# tuples that match in the join and that is empty for those tuples in the
# relation that do not match across the join attributes (the "natural
# join" leaving such tuples out of the result altogether).
# "r1" the first relation to join
# "r2" the second relation to join
# "newAttr" the name of the new relation valued attribute
# The remaining optional arguments are the attributes across which the join is
# to be made.  If the list is empty, then the commonly named attributes are
# found.
# The result has the same heading as "r1" plus an attribute "newAttr".
# The type of "newAttr" is Relation with the heading of "r2" minus
# the join attributes.

proc ::ralutil::rvajoin {r1 r2 newAttr args} {
    # If the join attributes are not given, then determine them by finding the
    # intersection of the attributes in r1 and r2.
    set joinAttrs $args
    if {[llength $joinAttrs] == 0} {
        set a1 [relation attributes $r1]
        set a2 [relation attributes $r2]
        foreach attr $a1 {
            if {$attr in $a2} {
                lappend joinAttrs $attr
            }
        }
    }
    # N.B. if there are no commonly named attributes, then we get the usual
    # degenerate behavior of a join, namely, every tuple in the result relation
    # will have its relation valued attribute set to "r2" in a sort of
    # interesting take on the Cartesian product (I think if the result is
    # "ungroup"ed it is the Cartesian product.)

    # Compute the heading of the relation valued attribute by removing the
    # join attributes from the heading of "r2".
    set rvaHeading [relation heading $r2]
    foreach attr $joinAttrs {
        set attrIndex [lsearch -exact $rvaHeading $attr]
        if {$attrIndex == -1} {
            error "attribute, \"$attr\", does not appear in,\
                    \"$[relation attributes $r2]\""
        } else {
            set rvaHeading [lreplace $rvaHeading $attrIndex $attrIndex+1]
        }
    }
    # Finally, compute the result by extending "r1" with the relation valued
    # attribute. The tuples of the relation valued attribute are those where
    # the corresponding attributes in "r1" match those in "r2".
    return [relation extend $r1 r1tup $newAttr [list Relation $rvaHeading] {
        [relation project [relation restrict $r2 r2tup {
            [tuple equal [tuple project $r2tup {*}$joinAttrs]\
                    [tuple project $r1tup {*}$joinAttrs]]
        }] {*}[dict keys $rvaHeading]]
    }]
}