# ACTIVESTATE TEAPOT-PKG BEGIN TM -*- tcl -*- # -- Tcl Module # @@ Meta Begin # Package procwatch 1.0 # Meta as::author {Emmanuel Frecon} # Meta as::build::date 2015-05-22 # Meta as::license BSD # Meta as::origin http://sourceforge.net/projects/til # Meta description This module provides all the support mechanism for # Meta description starting a number of processes and ensuring that # Meta description they will continuously run at any time. The module # Meta description provides a number of heuristics for restarting badly # Meta description behaved processes. It will also continuously log the # Meta description output of processes to files. # Meta platform tcl # Meta require {Tcl 8.4} # Meta require diskutil # Meta require java # Meta require logger # Meta require logwatch # Meta require process # Meta require struct::list # Meta require uobj # @@ Meta End # ACTIVESTATE TEAPOT-PKG BEGIN REQUIREMENTS package require Tcl 8.4 package require diskutil package require java package require logger package require logwatch package require process package require struct::list package require uobj # ACTIVESTATE TEAPOT-PKG END REQUIREMENTS # ACTIVESTATE TEAPOT-PKG BEGIN DECLARE package provide procwatch 1.0 # ACTIVESTATE TEAPOT-PKG END DECLARE # ACTIVESTATE TEAPOT-PKG END TM # procwatch.tcl -- Service controller support # # This module provides all the support mechanism for starting a # number of processes and ensuring that they will continuously # run at any time. The module provides a number of heuristics # for restarting badly behaved processes. It will also # continuously log the output of processes to files. # # Copyright (c) 2004-2007 by the Swedish Institute of Computer Science. # # See the file 'license.terms' for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. package require Tcl 8.4 package require logger package require struct::list package require uobj package require process package require java package require logwatch package require diskutil namespace eval ::procwatch { variable PW if {![::info exists PW]} { array set PW { cidgene 0 sidgene 0 widgene 0 vm_tclsh "" vm_wish "" vm_java "" comments "\#!;" svc_run_file "running.%hostname%.%context%.svc" svc_pid_file "context.%hostname%.%context%.pid" -logfmt "%progname%.%hostname%.log" -log "log" -run "run" -uniqueness "::procwatch::__exit" -maxsize 65536 -autostart on -home "%progdir%" -pause 1 -watch 10 -type "" -main "" -vmargs "" -args "" cx_inheritance "-log -run -home -pause -uniqueness -watch -autostart" srv_inheritance "-logfmt -maxsize -type -main -vmargs -args" } variable libdir [file dirname [file normalize [info script]]] ::uobj::install_log procwatch PW; # Creates log namespace variable ::uobj::install_defaults procwatch PW; # Creates defaults procedure } } # This module handles two different types of "objects": Contexts and # Services. Contexts provides the resources for the controlling of a # number of processes, this includes things such as location for files # on disk. Processes that should be watched are added to contexts and # are named Services, since they should be kept running at all time # and under any circumstances. # ::procwatch::isa -- Detect object type # # Detect if an object is of a given type, possibly returning # back it creation iteration identifier. # # Arguments: # id Identifier of object # type type that we are requesting for # iter_p Pointer to iteration creation identifier # # Results: # 1 if object is of correct type, 0 otherwise # # Side Effects: # None. proc ::procwatch::isa { id type {iter_p ""} } { variable PW variable log if { $iter_p ne "" } { upvar $iter_p creationid } set creationid -1 if { [string match "[namespace current]::${type}_*" $id] } { set uscore [string last _ $id] set creationid [string range $id [expr $uscore +1] end] if { [info vars $id] ne "" } { return 1 } else { return 0 } } return 0 } # ::procwatch::__initdirs -- Create directories # # This procedure will create the resource directories needed for # the run-time controlling of services within a context. # # Arguments: # id Identifier of a context, as returned by ::procwatch::new # # Results: # None. # # Side Effects: # Creates logging and run control directories if necessary. # Turn off facilities on errors. proc ::procwatch::__initdirs { id } { variable PW variable log upvar #0 $id CONTEXT # Create run directory if necessary. Disable running facilities # if we could not create it. set CONTEXT(rundir) "" if { $CONTEXT(-run) ne "" } { ${log}::notice "Turning on run control facilities" set CONTEXT(rundir) \ [file join [::diskutil::fname_resolv $CONTEXT(-home)] \ $CONTEXT(-run)] if { ! [file exists $CONTEXT(rundir)] } { ${log}::notice "Creating directory $CONTEXT(rundir)" file mkdir $CONTEXT(rundir) } elseif { ! [file isdirectory $CONTEXT(rundir)] } { ${log}::warn "Run directory $CONTEXT(rundir) is not a directory!" set CONTEXT(rundir) "" } if { $CONTEXT(rundir) != "" } { set CONTEXT(rundir) [::diskutil::absolute_path $CONTEXT(rundir)] } } # Create log directory if necessary. Disable logging facilities # if we could not create it. set CONTEXT(logdir) "" if { $CONTEXT(-log) != "" } { ${log}::notice "Turning on logging facilities" set CONTEXT(logdir) \ [file join [::diskutil::fname_resolv $CONTEXT(-home)] \ $CONTEXT(-log)] if { ! [file exists $CONTEXT(logdir)] } { ${log}::notice "Creating logging directory $CONTEXT(logdir)" file mkdir $CONTEXT(logdir) } elseif { ! [file isdirectory $CONTEXT(logdir)] } { ${log}::warn \ "Logging directory $CONTEXT(logdir) is not a directory!" set CONTEXT(logdir) "" } if { $CONTEXT(logdir) != "" } { set CONTEXT(logdir) [::diskutil::absolute_path $CONTEXT(logdir)] } } } # ::procwatch::__exit -- Exit on duplicate controllers # # This procedure is the default behaviour when the same context # is controlled by another process. It will exit this process # after printing out an error. # # Arguments: # pid Process identifier of the other controlling process # # Results: # None. # # Side Effects: # Will actively terminate the current process proc ::procwatch::__exit { pid } { variable PW variable log ${log}::critical "Another process at $pid is already in control of\ this context, exiting" exit 1 } # ::procwatch::__checkuniqueness -- Scream/exit when non-unique # # This procedure tries to ensure that a single process, on that # host, is controlling a given context. If several were, things # would end into chaos (since we would attempt to start several # times the same service). # # Arguments: # id Identifier of the context, as returned by ::procwatch::new # # Results: # None. # # Side Effects: # None. proc ::procwatch::__uniqueness { id } { variable PW variable log upvar #0 $id CONTEXT # Now create a pidfile for ourselves and make sure that no other # instance of ourselves is already running (otherwise, we would have # chaotic behaviour!) if { $CONTEXT(rundir) != "" && $CONTEXT(-uniqueness) ne "" } { isa $id context cxid # Refuse to start if another starter is already running on that host set pidfile [::diskutil::fname_resolv \ [file join $CONTEXT(rundir) $PW(svc_pid_file)]] set pidfile [regsub -all "%context%" $pidfile $cxid] if { [catch "open $pidfile" fd] == 0 } { set pid [gets $fd] close $fd # Search for processes which have the same name as # executable (which will work both if we are a wrapped # script or a script started from wish or tclsh). set rx "[file tail [info nameofexecutable]]" set other_progs [::process::find $rx] if { [lsearch $other_progs $pid] >= 0 && $pid != [pid]} { ${log}::error "Another process is already controlling\ context $id at $pid!" if { [catch {eval $CONTEXT(-uniqueness) $pid} err] } { ${log}::error \ "Error when calling $CONTEXT(-uniqueness): $err" } } } # Now writing pidfile ${log}::info "Writing pid file: $pidfile" set fd [open $pidfile w] puts $fd [pid] close $fd } } # ::procwatch::runfile:write -- Write run-time information file # # Write down a run description file that shortly describes the # state of the services currently running in a context. # # Arguments: # id Identifier of the context # runfile Path to file, will be generated otherwise # # Results: # Return the number of services for which information was written, -1 # if the file could not be opened for writing. # # Side Effects: # None. proc ::procwatch::runfile:write { id {runfile ""} } { variable PW variable log if { ![isa $id context cxid] } { ${log}::warn "$id is not a known context" return -code error "$id is not a known context" } upvar #0 $id CONTEXT # Generate a valid and intelligent runfile name if none was # provided. if { $runfile == "" } { set runfile [::diskutil::fname_resolv \ [file join $CONTEXT(rundir) $PW(svc_run_file)]] set runfile [regsub -all "%context%" $runfile $cxid] } # And now write information for all services. ${log}::info "Writing running state to $runfile" set nbservices 0 if { [catch "open $runfile w" fd] == 0 } { foreach s [get $id services] { upvar \#0 $s SERVICE if { $SERVICE(state) eq "RUNNING" && [llength $SERVICE(pid)] > 0 } { puts $fd "$SERVICE(id) \"$SERVICE(pid)\" $SERVICE(-main)" } incr nbservices } close $fd } else { ${log}::error "Failed writing running state to $runfile" return -1 } return $nbservices } # ::procwatch::runfile:read -- Read run-time information file # # Write down a run description file that shortly describes the # state of the services currently running in a context. # # Arguments: # id Identifier of the context # runfile Path to file, will be generated otherwise # # Results: # Return a list describing these (empty list on no processes or error). # # Side Effects: # None. proc ::procwatch::runfile:read { id {runfile ""} } { variable PW variable log if { ![isa $id context cxid] } { ${log}::warn "$id is not a known context" return -code error "$id is not a known context" } upvar #0 $id CONTEXT # Generate a valid and intelligent runfile name if none was # provided. if { $runfile == "" } { set runfile [::diskutil::fname_resolv \ [file join $CONTEXT(rundir) $PW(svc_run_file)]] set runfile [regsub -all "%context%" $runfile $cxid] } # And now read information for all services. set services [list] if { [catch "open $runfile r" fd] == 0 } { while { ! [eof $fd] } { set line [gets $fd] if { $line != "" } { foreach {id pids script} $line break lappend services [list $id $pids $script] } } close $fd } return $services } # ::procwatch::find -- Find a service # # Find a service by its PID. # # Arguments: # pid PID of service # # Results: # Return the identifier of the service or -1 # # Side Effects: # None. proc ::procwatch::find:bypid { pid } { variable PW variable log foreach v [info vars ::procwatch::service_*] { upvar #0 $v SERVICE if { [lindex $SERVICE(pid) 0] == $pid } { return $v } } return "" } # ::procwatch::find -- Find a service # # Find a service by its internal id (preferred), its current pid # or its script name # # Arguments: # id Internal id, pid or name # # Results: # Return the identifer of the service, empty when not found # # Side Effects: # None. proc ::procwatch::find { id } { variable PW variable log if { [isa $id service] } { return $id } set servid "" # First try to understand id as an internal service id if { [regexp "\\d+" $id] } { set vname [info vars ::procwatch::service_*_$id] if { $vname ne "" } { return $vname } } # Next get on with name foreach v [info vars ::procwatch::service_*] { upvar #0 $v SERVICE if { [lindex $SERVICE(pid) 0] == $id \ || [string match $id $SERVICE(-main)] } { return $v } } return "" } # ::procwatch::__vmfind -- Find a Virtual Machine # # This procedure finds a virtual machine of a given type. The # results are cached for quickening further lookups. Recognised # virtual machines are tclsh, wish and java. # # Arguments: # type Type of the virtual machine that we are looking for # # Results: # Return the full path to the execution binary for the VM, or an # empty strings on errors. # # Side Effects: # None. proc ::procwatch::__vmfind { type } { variable PW variable log # Initialise the tclsh/wish cache. if { $PW(vm_tclsh) eq "" } { set starter [file tail [info nameofexecutable]] if { [string match wish* $starter] \ || [string match tclsh* $starter] } { set exedir [file dirname [info nameofexecutable]] set ext [file extension [info nameofexecutable]] set raw [file rootname [file tail [info nameofexecutable]]] set version "" if { [regexp "\\w+\\d+" $raw] } { set len [string length $raw] for { set vidx 0 } { $vidx < $len } { incr vidx } { if { [string is digit [string index $raw $vidx]] } { break } } set version [string range $raw $vidx end] } set PW(vm_tclsh) [file join $exedir "tclsh${version}${ext}"] set PW(vm_wish) [file join $exedir "wish${version}${ext}"] } else { set PW(vm_tclsh) [auto_execok tclsh] set PW(vm_wish) [auto_execok wish] } if { ![file executable $PW(vm_tclsh)] } { ${log}::warn "$PW(vm_tclsh) is not a valid tclsh VM!" set PW(vm_tclsh) "" } else { ${log}::info "Using \"$PW(vm_tclsh)\" for tcl-only scripts" } if { ![file executable $PW(vm_wish)] } { ${log}::warn "$PW(vm_wish) is not a valid wish VM!" set PW(vm_wish) "" } else { ${log}::info "Using \"$PW(vm_wish)\" for tk scripts" } } # Now return the location of the requested VM or an empty string # on errors. set type [string tolower $type] switch -glob -- $type { t* { return $PW(vm_tclsh) } w* { return $PW(vm_wish) } j* { if { $PW(vm_java) eq "" } { set PW(vm_java) [::java::find vnumber] if { $PW(vm_java) eq "" } { ${log}::warn "Could not find java" } else { ${log}::notice "Using \"$PW(vm_java)\" ($vnumber)\ for java programs" } } return $PW(vm_java) } } return "" } # ::procwatch::__resolv -- Resolve service run-time support # # This procedure initialises the necessary variables that will # be essential to the run-time support of a service: where to # log, what to start, where to find VM interpreters (tcl, java, # etc.), etc. # # Arguments: # id Identifier of a service, as returned by ::procwatch::add # # Results: # Return a boolean value telling if the script should be # installed and started or not. # # Side Effects: # None. proc ::procwatch::__resolv { id } { variable PW variable log upvar #0 $id SERVICE upvar #0 $SERVICE(context) CONTEXT # Decide where we should log and whether we should log or not. set SERVICE(logfile) "" if { $CONTEXT(logdir) ne "" } { set raw [file tail [::diskutil::fname_resolv \ $SERVICE(-logfmt) $SERVICE(-main)]] set SERVICE(logfile) [file join $CONTEXT(logdir) $raw] } # Decide which executable to start set exe [string tolower $SERVICE(-type)] set install_it 0 set test_existence 0 switch -glob -- $exe { t* { set SERVICE(exe) [__vmfind tclsh] set test_existence 1 set install_it 1 } w* { set SERVICE(exe) [__vmfind wish] set test_existence 1 set install_it 1 } j* { set SERVICE(exe) [__vmfind java] set install_it 1 } s* - e* - b* { set SERVICE(exe) "" set test_existence 1 set install_it 1 } } # Check if script actually points to an existing file if { $test_existence && ! [file exists $SERVICE(-main)] } { ${log}::warn "\"$SERVICE(-main)\" is not a readable file!" set install_it 0 } if { $install_it } { set SERVICE(pid) "" } else { set SERVICE(state) DISABLED } return $install_it } # ::procwatch::__buildcmd -- Build command # # This procedure builds and remember the command necessary to # start a service. # # Arguments: # id Identifier of the service. # # Results: # None. # # Side Effects: # None. proc ::procwatch::__buildcmd { id } { variable PW variable log variable libdir # Look for the official script used for timestamping output set tstamper [file join $libdir .. bin timestamper.tcl] if { ! [file exists $tstamper] || ! [file readable $tstamper] } { set tstamper "" } else { if { [catch "::diskutil::absolute_path $tstamper" tstamper] != 0 } { set tstamper "" } } # Decide upon the command to start the service, this is still in # pseudo-code: %dir% will be replaced by the directory of the # script or by an empty directory if it is possible to change to # SERVICE(cmddir). upvar #0 $id SERVICE set SERVICE(cmddir) [file dirname $SERVICE(-main)] if { $SERVICE(exe) == "" } { set SERVICE(cmd) "exec \"%dir%[file tail $SERVICE(-main)]\"" append SERVICE(cmd) " $SERVICE(-args)" } else { set SERVICE(cmd) "exec \"$SERVICE(exe)\"" append SERVICE(cmd) " $SERVICE(-vmargs)" append SERVICE(cmd) " \"%dir%[file tail $SERVICE(-main)]\"" append SERVICE(cmd) " $SERVICE(-args)" } # Append log dump information if necessary. if { $SERVICE(logfile) ne "" } { set tclsh [__vmfind tclsh] if { $tstamper == "" || $tclsh eq "" } { set SERVICE(timestamper) "" append SERVICE(cmd) " >>& $SERVICE(logfile)" } else { set SERVICE(timestamper) "\"$tclsh\" \"$tstamper\" -ignorepreformat -datesensitive -outfile \"$SERVICE(logfile)\"" append SERVICE(cmd) " |& $SERVICE(timestamper)" } } ${log}::debug "Built pseudo-command for service $id: $SERVICE(cmd)" return $SERVICE(cmd) } # ::procwatch::__exec -- Execute service command # # This procedure executes the command that should be used for # starting the service. # # Arguments: # id Identifier of the service. # # Results: # Return the list of pids for the service, empty on errors. # # Side Effects: # Will spawn one or more processes! proc ::procwatch::__exec { id } { variable PW variable log upvar #0 $id SERVICE upvar #0 $SERVICE(context) CONTEXT # Build command if it does not exist. if { $SERVICE(cmd) eq "" } { __buildcmd $id } # Append restarting header with time to logfile. if { $SERVICE(logfile) != "" } { if { [catch "open $SERVICE(logfile) a" lf] == 0 } { puts $lf "" puts $lf \ "\tStarting service at [clock format [clock seconds]]" puts $lf \ "\t$SERVICE(exe) $SERVICE(-main) $SERVICE(-args)" close $lf } else { $ST(_log)::warn "Cannot open log file $SERVICE(logfile)" } } # Remember which directory we are in and change to the appropriate # directory, if possible. Try to adapt if we could not. if { [catch "pwd" curdir] != 0 } { $ST(_log)::warn "Could not remember current directory!" set curdir "" } if { [catch "cd $SERVICE(cmddir)"] == 0 } { set cmd [regsub %dir% $SERVICE(cmd) ""] } else { set cmd [regsub %dir% $SERVICE(cmd) "$SERVICE(cmddir)/"] } # Start the service ${log}::notice "Spawning: $cmd" append cmd " &" set res [catch $cmd pid] if { $res == 0 } { set SERVICE(pid) $pid set SERVICE(state) RUNNING if { [llength $pid] == 1 } { if { $SERVICE(timestamper) ne "" } { ${log}::info "Could not start service correctly, aborting" ::process::kill $pid set SERVICE(pid) $pid set SERVICE(state) DEAD } else { ${log}::info "Started with pid: $pid" } } else { ${log}::info "Started with pid: [lindex $pid 0]\ (logger: [lindex $pid 1])" } } else { set SERVICE(pid) "" set SERVICE(state) DEAD ${log}::warn "Could not start $cmd" } # Change back to the starting directory if { $curdir ne "" } { if { [catch "cd $curdir"] != 0 } { ${log}::warn "Could not change back to directory $curdir" } } # Remember the state of the current context. if { $CONTEXT(rundir) ne "" } { if { [runfile:write $CONTEXT(id)] < 0 } { ${log}::warn "Could not create running state file!" } } return $SERVICE(pid) } # ::procwatch::__recapture -- Recapture running processes # # Recapture already running processes into the services of a # context. The last running state is read and a comparison is # made between those. This comparison is based on the current # list of running processes at startup. # # Arguments: # cx Identifier of context # # Results: # None. # # Side Effects: # None. proc ::procwatch::__recapture { cx } { variable PW variable log # Reject if wrong context if { ![isa $cx context] } { ${log}::warn "$cx is not a known context" return -code error "$cx is not a known context" } # Skip recapturing if the context does not have any running # directory for process information storage. upvar #0 $cx CONTEXT if { $CONTEXT(rundir) eq "" } { return } ${log}::notice "Recapturing possibly lost services..." set running [runfile:read $cx] set all_pids [::process::list] # Now, for all old processes in the last running state, see if # these are still running (i.e. one of all_pids variable) and that # the running processes actually match the name of the script # identifying the process (anything else can have been starting # inbetween runs and got the same pid!). foreach s_info $running { set s_pids [lindex $s_info 1] set found 0 if { [lsearch $all_pids [lindex $s_pids 0]] >= 0 } { set m_pids [::process::find [lindex $s_info 2]] if { [lsearch $m_pids [lindex $s_pids 0]] >= 0 } { set found 1 } } if { $found && [llength $s_pids] > 1 } { set found 0 if { [lsearch $all_pids [lindex $s_pids 1]] >= 0 } { set m_pids [::process::find timestamper.tcl] if { [lsearch $m_pids [lindex $s_pids 1]] >= 0 } { set found 1 } } } # We have found and matched both old pids against the actual # running state. Recapture through setting the Service(pid) # list back to these pids. if { $found } { ${log}::info "Recaptured [lindex $s_info 2]..." set id [find [lindex $s_info 2]] if { $id ne "" } { upvar #0 $id SERVICE set SERVICE(pid) $s_pids set SERVICE(state) RUNNING } } } } # ::procwatch::start -- Start services # # This procedure will start a number of services. Do not start # them if they are alread on. When several services are # started, the -pause option of the context will be honoured and # services will be started with pauses in between them. # # Arguments: # cx Identifier of context # svcs List of services to start (empty means all) # force Force restarting of processes? # # Results: # Return the list of identifiers of the processes that were # effectively started. # # Side Effects: # None. proc ::procwatch::start { cx {svcs {}} {force off}} { variable PW variable log # Reject if wrong context if { ![isa $cx context] } { ${log}::warn "$cx is not a known context" return -code error "$cx is not a known context" } upvar #0 $cx CONTEXT # Find all services of the context if we did not specify any. if { [llength $svcs] == 0 } { set svcs [get $cx services] } # Now start services. set started "" set when 0 foreach s $svcs { upvar #0 $s SERVICE if { [llength $SERVICE(pid)] > 0 && $force } { ${log}::debug "Killing processes of service $SERVICE(id)" foreach pid $SERVICE(pid) { ::process::kill $pid } set SERVICE(pid) [list] set SERVICE(state) DEAD } if { [llength $SERVICE(pid)] == 0 || $force } { if { $SERVICE(state) ne "DISABLED" } { ${log}::info "Scheduling service $s to start in $when ms." set SERVICE(state) STARTING set SERVICE(starting) [after $when ::procwatch::__exec $s] incr when [expr int($CONTEXT(-pause) * 1000)] } } } } # ::procwatch::__projection -- List projection # # This procedure returns the n'th index of a list. # # Arguments: # n index # l List to extract from # # Results: # Return the n'th index of a list # # Side Effects: # None. proc ::procwatch::__projection { n l } { ::lindex $l $n } # ::procwatch::__intersect3 -- List intersection # # Perform list intersection. # # Arguments: # list1 First list # list2 Second list # # Results: # Returns a list containing three lists. The first list is # everything in the first list that wasn't in the second, the # second list contains the intersection of the two lists, the # third list contains everything in the second list that wasn't # in the first. # # Side Effects: # None. # # Copyright 1992-1999 Karl Lehenbauer and Mark Diekhans. # # Permission to use, copy, modify, and distribute this software and its # documentation for any purpose and without fee is hereby granted, provided # that the above copyright notice appear in all copies. Karl Lehenbauer and # Mark Diekhans make no representations about the suitability of this # software for any purpose. It is provided "as is" without express or # implied warranty. proc ::procwatch::__intersect3 {list1 list2} { set la1(0) {} ; unset la1(0) set lai(0) {} ; unset lai(0) set la2(0) {} ; unset la2(0) foreach v $list1 { set la1($v) {} } foreach v $list2 { set la2($v) {} } foreach elem [concat $list1 $list2] { if {[info exists la1($elem)] && [info exists la2($elem)]} { unset la1($elem) unset la2($elem) set lai($elem) {} } } list [lsort [array names la1]] [lsort [array names lai]] \ [lsort [array names la2]] } # ::procwatch::__watch -- Watch services # # This procedure will arrange to regularly watch a number of # (started) services, restarting them if necessary. # # Arguments: # cx Identifier of context # svcs List of services to start (empty means all) # # Results: # None. # # Side Effects: # None. proc ::procwatch::__watch { cx { svcs "" } } { variable PW variable log # Reject if wrong context if { ![isa $cx context] } { ${log}::warn "$cx is not a known context" return -code error "$cx is not a known context" } upvar #0 $cx CONTEXT # Find all services of the context if we did not specify any. if { [llength $svcs] == 0 } { set svc_watch [get $cx services] } else { set svc_watch $svcs } # Gather all pids of the services that we should watch set svc_pids [list] foreach s $svc_watch { upvar #0 $s SERVICE if { [llength $SERVICE(pid)] > 0 } { lappend svc_pids [lindex $SERVICE(pid) 0] } } ${log}::debug "pids for our services should be: $svc_pids" # Gather information for all currently processes running on this # machine. From this list, isolate services that should be # running but are not and restart services which virtual memory # usage has become greater than the allowed limit. set restart [list] if { [catch "::process::full_list" f_pids] == 0 } { # First restart processes that have disappeared and crashed set pids [::struct::list map $f_pids "__projection 0"] foreach {others running died} [__intersect3 $pids $svc_pids] {} # Parse through the list of processes that still do not exist # and find their corresponding service if any. Restart all # those that are enabled and not currently already starting. if { [llength $died] > 0 } { foreach d $died { set sid [find:bypid $d] if { $sid ne "" } { upvar #0 $sid SERVICE # Filter away services that we know are already # starting or are disabled. if { $SERVICE(state) ne "DISABLED" \ && $SERVICE(state) ne "STARTING" } { lappend restart $sid } } } ${log}::notice "Some services have died, restarting: $restart" } # Now, restart processes that have grown too big. set toobig [list] foreach p_info $f_pids { foreach {pid usr prg ppid msize} $p_info {} set sid [find:bypid $pid] if { $sid ne "" } { upvar #0 $sid SERVICE if { $SERVICE(state) eq "RUNNING" \ && $SERVICE(-maxsize) > 0 && $SERVICE(-maxsize) ne "" \ && $msize > $SERVICE(-maxsize) } { ${log}::notice "In service $sid, $pid is $msize kB, i.e.\ larger than the allowed $SERVICE(-maxsize)" lappend toobig $sid } } } if { [llength $toobig] > 0 } { ${log}::notice "Some services are too big, restarting: $toobig" } # Do the restarting of all these in one pass only. set restart [lsort -unique [concat $restart $toobig]] if { [llength $restart] > 0 } { start $cx $restart 1 } } else { ${log}::warn "Cannot get process list for now, sleeping again" } # Start the services that were still not started if we have autostart on if { [string is true $CONTEXT(-autostart)] } { foreach {tocheck unused1 unused2} [__intersect3 $svc_watch $restart] {} ${log}::notice "Automatically checking [llength $tocheck] service(s)" if { [llength $tocheck] > 0 } { start $cx $tocheck } } # Reschedule next check set CONTEXT(schedule) [after [expr $CONTEXT(-watch) * 1000] \ "::procwatch::__watch $cx \"$svcs\""] } # ::procwatch::kill -- Kill processes # # Force killing of some or all of the current processes # associated to the services. This procedure does not restart # these processes, however, if the processes are being watched, # they will automatically be restarted at the next contextual # check. # # Arguments: # svcs List of service identifiers to kill or contexts, in which # case all the services of these context will be killed. # # Results: # Return the identifiers of the processes that were killed. # # Side Effects: # None. proc ::procwatch::kill { { svcs {}} } { variable PW variable log set tokill [list] if { [llength $svcs] == 0 } { set tokill [info vars ::procwatch::service_*] } else { foreach id $svcs { if { [isa $id context] } { foreach sid [get $id services] { lappend tokill $sid } } elseif { [isa $id service] } { lappend tokill $id } else { ${log}::warn "$id is neither a process context or a process" } } } set killed [list] foreach s $tokill { upvar #0 $s SERVICE switch $SERVICE(state) { STARTING { after cancel $SERVICE(starting) set SERVICE(pid) [list] set SERVICE(state) DEAD } RUNNING { foreach pid $SERVICE(pid) { if { [::process::kill $pid] == $pid } { lappend killed $pid } else { ${log}::warn "Failed killing process $pid in service $s" } } set SERVICE(pid) [list] set SERVICE(state) DEAD upvar #0 $SERVICE(context) CONTEXT if { $CONTEXT(rundir) ne "" } { runfile:write $CONTEXT(id) } } } } return $killed } # ::procwatch::__logout -- Callback log watches # # This procedure is called wheneven a line has been output to # the log file of a watched service. It relays the callbacks # that are associated to the watch. # # Arguments: # sid Identifier of service # wid logwatch identifier (unused) # line log line # # Results: # None. # # Side Effects: # None. proc ::procwatch::__logout { sid wid line } { variable PW variable log puts "incoming line $line" upvar #0 $sid SERVICE foreach wid [get $sid logwatches] { upvar #0 $wid LOGWATCH ${log}::debug "Forwarding '$line' to $wid" if { [catch {eval $LOGWATCH(command) $wid \$line} err] } { ${log}::warn "Error when calling watch callback\ $LOGWATCH(command): $err" } } } # ::procwatch::log:watch -- Start watching a log # # This command register a command to be called whenever output # is sent to the log file associated to a service. The callback # will never be called back if the context has not been setup to # accomodate logs. # # Arguments: # sid Identifier of a service # cmd Command to call back, the identifier of the watch and # the log line will be appended to the command on log output. # # Results: # Return an identifier that can be used to unregister the watch, # empty string on error (no logfile, thus no log watch # possible). # # Side Effects: # None. proc ::procwatch::log:watch { sid cmd } { variable PW variable log # Reject if wrong context if { ! [isa $sid service srid] } { ${log}::warn "$sid is not a known service" return -code error "$sid is not a known service" } upvar #0 $sid SERVICE if { $SERVICE(wid) eq "" && $SERVICE(logfile) ne "" } { set SERVICE(wid) [::logwatch::new $SERVICE(logfile) \ [list ::procwatch::__logout $sid]] ${log}::debug "Created log watch context for $SERVICE(logfile):\ $SERVICE(wid)" } if { $SERVICE(wid) ne "" } { isa $SERVICE(context) context cxid set varname \ [namespace current]::logwatch_${cxid}_${srid}_[incr PW(widgene)] upvar #0 $varname LOGWATCH set LOGWATCH(id) $varname set LOGWATCH(service) $sid set LOGWATCH(command) $cmd ${log}::debug "Created new log watch on $sid, forwarding\ $SERVICE(logfile) to '$cmd'" return $LOGWATCH(id) } return "" } # ::procwatch::log:unwatch -- Stop one or several log watch # # Remove one or several log watches. # # Arguments: # ids List of identifiers, either log watches, service or context # identifiers, in which case all log watches are stopped. An # empty list means all the logwatches of all contexts # # Results: # Return the list of log watches that were removed # # Side Effects: # None. proc ::procwatch::log:unwatch { {ids {}}} { variable PW variable log # Gather in "unwatch" the list of logwatch identifiers that we # should stop watching. This means recursing through the # contexts, and services if necessary. set unwatch [list] if { [llength $ids] == 0 } { set unwatch [info vars ::procwatch::logwatch_*] } else { foreach id $ids { if { [isa $id context] } { foreach sid [get $id services] { upvar #0 $sid SERVICE foreach wid [get $sid logwatches] { lappend unwatch $wid } } } elseif { [isa $id service] } { foreach wid [get $id logwatches] { lappend unwatch $wid } } elseif { [isa $id logwatch] } { lappend unwatch $id } else { ${log}::warn "$id is neither a process context, a process or\ a log watch" } } } # Now remove the logwatch, gathering in "svcs" the list of # services that hosted them. set svcs [list] foreach id $unwatch { upvar #0 $id LOGWATCH lappend svcs $LOGWATCH(service) unset LOGWATCH } # Stop watching the logfile for all services that do not have any # logwatch associated to them anymore, this is to avoid # unnecessary resource hog (shouldn't be, but I like to clean up). foreach sid [lsort -unique $svcs] { upvar #0 $sid SERVICE set watches [get $sid logwatches] if { [llength $watches] == 0 } { ::logwatch::delete $SERVICE(wid) set SERVICE(wid) "" } } return $unwatch } # ::procwatch::remove -- Remove services and contexts # # Remove one or more services or contexts, killing their # associated processes if necessary. Return the list of # processes that were killed. # # Arguments: # svcs List of service identifiers to kill or contexts, in which # case all the services of these context will be killed. # force Force killing of processes # # Results: # Return the identifiers of the processes that were killed. # # Side Effects: # None. proc ::procwatch::remove { {svcs {}} {force 1}} { variable PW variable log # Arrange to gather the contexts and services that should be # removed in two different lists. set svc_toremove [list] set cx_toremove [list] if { [llength $svcs] == 0 } { set svc_toremove [info vars ::procwatch::service_*] set cx_toremove [info vars ::procwatch::context_*] } else { foreach id $svcs { if { [isa $id context] } { foreach sid [get $id services] { lappend svc_toremove $sid } lappend cx_toremove $id } elseif { [isa $id service] } { lappend svc_toremove $id } else { ${log}::warn "$id is neither a process context or a process" } } } # Kill services and remove their state ${log}::debug "Suppressing services $svc_toremove..." set killed [list] foreach s $svc_toremove { if { [string is true $force] } { set killed [concat $killed [kill $s]] } upvar #0 $s SERVICE # Stop watching the log output if { $SERVICE(wid) ne "" } { ::logwatch::delete $SERVICE(wid) } # Remove all logwatches on the log output, if any foreach v [get $s logwatches] { unset $v } # Finally remove the service object unset SERVICE } # Remove all contexts and remove their state ${log}::debug "Removing contexts $cx_toremove..." foreach c $cx_toremove { upvar #0 $c CONTEXT if { $CONTEXT(schedule) ne "" } { after cancel $CONTEXT(schedule) } unset CONTEXT } return $killed } # ::procwatch::get -- Get object properties # # This procedure returns some semi-internal objects properties # to other modules. The properties that are recognised are # dependent on the identifier of the object passed as a # parameter. Any property led by a dash is a configuration # option. # # Arguments: # id Identifier of object (context, service or logwatch). # type Property to get # # Results: # The value of the property or an empty string # # Side Effects: # None. proc ::procwatch::get { id type } { variable PW variable log if { ![isa $id context] && ![isa $id service] \ && ![isa $id logwatch] } { ${log}::warn "$id is neither a process context, a service or a logwatch" return -code error \ "$id is neither a process context, a service or a logwatch" } if { [isa $id context] } { upvar #0 $id CONTEXT switch -glob -- $type { "id" - "logdir" - "rundir" { return $CONTEXT($type) } "services" { set uscore [string last _ $id] set cxid [string range $id [expr $uscore +1] end] return [info vars "::procwatch::service_${cxid}_*"] } "-*" { return [config $id $type] } } } if { [isa $id service] } { upvar #0 $id SERVICE switch -glob -- $type { "id" - "context" - "exe" - "cmd" - "cmddir" - "logfile" - "state" - "pid" { return $SERVICE($type) } "logwatches" { set uscore [string last _ $id] set srid [string range $id [expr $uscore +1] end] set uscore [string last _ $SERVICE(context)] set cxid [string range $SERVICE(context) \ [expr $uscore +1] end] return [info vars "::procwatch::logwatch_${cxid}_${srid}_*"] } "-*" { return [config $id $type] } } } if { [isa $id logwatch] } { upvar #0 $id LOGWATCH switch -glob -- $type { "id" - "service" - "command" { return $LOGWATCH($type) } } } return "" } # ::procwatch::config -- Get/Set configuration options # # This procedure will set or get the options of a service # context or a service. # # Arguments: # id Identifier of the service or the context # args list of options # # Results: # Return all options, the option requested or set the options # # Side Effects: # None. proc ::procwatch::config { id args } { variable PW variable log if { [info vars $id] eq "" } { ${log}::warn "$id is neither a process context or a process" return -code error "$id is neither a process context or a process" } if { [isa $id context] } { upvar #0 $id CONTEXT set result [eval ::uobj::config CONTEXT "-*" $args] if { $result eq "" } { __initdirs $id __uniqueness $id } if { $CONTEXT(schedule) eq "" } { set CONTEXT(schedule) [after idle ::procwatch::__watch $id] } return $result } else { upvar #0 $id SERVICE set result [eval ::uobj::config SERVICE "-*" $args] if { $result eq "" } { __resolv $id } return $result } } # ::procwatch::add -- Create a new service # # This procedure creates a new service and associates it to the # controller which identifier is passed as a first argument. # The service will be continuously watched and restarted as soon # as it dies. # # Arguments: # cx Idendifier of the controller, as returned by ::procwatch::new # args A list of dash led options and their values # # Results: # Return the identifier of the service. This identifier will be # used in many other procedures. # # Side Effects: # None. proc ::procwatch::add { cx args } { variable PW variable log if { ![isa $cx context] } { ${log}::warn "$cx is not a known context" return -code error "$cx is not a known context" } upvar #0 $cx CONTEXT set uscore [string last _ $cx] set cxid [string range $cx [expr $uscore +1] end] set varname [namespace current]::service_${cxid}_[incr PW(sidgene)] upvar #0 $varname SERVICE set SERVICE(id) $varname set SERVICE(context) $cx set SERVICE(exe) "" set SERVICE(cmd) "" set SERVICE(cmddir) "" set SERVICE(wid) "" set SERVICE(state) DEAD ::uobj::inherit PW SERVICE $PW(srv_inheritance) eval config $SERVICE(id) $args if { [string is true CONTEXT(autocapture)] } { __recapture $cx } return $SERVICE(id) } # ::procwatch::read -- Read service descriptions # # This procedure will read a service description file into a # service context, thus creating new services. The services # will be started or not, depending on the -autostart option of # the context. # # Arguments: # cx Identifier of context # fname Name of file # # Results: # Return the list of services that were created. # # Side Effects: # None. proc ::procwatch::read { cx fname } { variable PW variable log if { ![isa $cx context] } { ${log}::warn "$cx is not a known context" return -code error "$cx is not a known context" } upvar #0 $cx CONTEXT set CONTEXT(autocapture) off set services [list] if { [catch "open $fname r" sf] != 0 } { ${log}::error "Unable to open $fname" } else { while { ! [eof $sf] } { set line [string trim [gets $sf]] if { $line ne "" } { set firstchar [string index $line 0] # Skip all lines that are commented. if { [string first $firstchar $PW(comments)] < 0 } { set type [lindex $line 0] set script [lindex $line 1] set params [lrange $line 2 end] set idx [string first "-- " $params] if { $idx < 0 } { lappend services [add $cx -main $script -type $type \ -args $params] } else { set vmparams [string range $params 0 [expr $idx - 1]] set params [string range $params [expr $idx + 3] end] lappend services [add $cx -main $script -type $type \ -args $params -vmargs $vmparams] } } } } close $sf } # Recapture running processes. __recapture $cx set CONTEXT(autocapture) on return $services } # ::procwatch::new -- Create new service controller # # This procedure creates a new service controller context. The # context will hold information about resources such as log # directories or run-time controlling directories. Services # will have to be associated to a context to be created. # # Arguments: # args A list of dash led options and their values # # Results: # Return the identifier of the context. This identifier will be # used in many other procedures. # # Side Effects: # None. proc ::procwatch::new { args } { variable PW variable log set varname [namespace current]::context_[incr PW(cidgene)] upvar #0 $varname CONTEXT set CONTEXT(id) $varname set CONTEXT(rundir) "" set CONTEXT(schedule) "" set CONTEXT(autocapture) on ::uobj::inherit PW CONTEXT $PW(cx_inheritance) eval config $CONTEXT(id) $args return $CONTEXT(id) } package provide procwatch 1.0