# ACTIVESTATE TEAPOT-PKG BEGIN TM -*- tcl -*- # -- Tcl Module # @@ Meta Begin # Package textutil::adjust 0.7.1 # Meta as::build::date 2015-01-23 # Meta as::origin http://sourceforge.net/projects/tcllib # Meta category Text and string utilities, macro processing # Meta description Procedures to adjust, indent, and undent paragraphs # Meta license BSD # Meta platform tcl # Meta require {Tcl 8.2} # Meta require textutil::repeat # Meta require textutil::string # Meta subject undenting justification indenting TeX adjusting # Meta subject paragraph string hyphenation formatting # Meta summary textutil::adjust # @@ Meta End # ACTIVESTATE TEAPOT-PKG BEGIN REQUIREMENTS package require Tcl 8.2 package require textutil::repeat package require textutil::string # ACTIVESTATE TEAPOT-PKG END REQUIREMENTS # ACTIVESTATE TEAPOT-PKG BEGIN DECLARE package provide textutil::adjust 0.7.1 # ACTIVESTATE TEAPOT-PKG END DECLARE # ACTIVESTATE TEAPOT-PKG END TM # trim.tcl -- # # Various ways of trimming a string. # # Copyright (c) 2000 by Ajuba Solutions. # Copyright (c) 2000 by Eric Melski # Copyright (c) 2002-2004 by Johannes-Heinrich Vogeler # Copyright (c) 2001-2006 by Andreas Kupries # # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # # RCS: @(#) $Id: adjust.tcl,v 1.16 2011/12/13 18:12:56 andreas_kupries Exp $ # ### ### ### ######### ######### ######### ## Requirements package require Tcl 8.2 package require textutil::repeat package require textutil::string namespace eval ::textutil::adjust {} # ### ### ### ######### ######### ######### ## API implementation namespace eval ::textutil::adjust { namespace import -force ::textutil::repeat::strRepeat } proc ::textutil::adjust::adjust {text args} { if {[string length [string trim $text]] == 0} { return "" } Configure $args Adjust text newtext return $newtext } proc ::textutil::adjust::Configure {args} { variable Justify left variable Length 72 variable FullLine 0 variable StrictLength 0 variable Hyphenate 0 variable HyphPatterns ; # hyphenation patterns (TeX) set args [ lindex $args 0 ] foreach { option value } $args { switch -exact -- $option { -full { if { ![ string is boolean -strict $value ] } then { error "expected boolean but got \"$value\"" } set FullLine [ string is true $value ] } -hyphenate { # the word exceeding the length of line is tried to be # hyphenated; if a word cannot be hyphenated to fit into # the line processing stops! The length of the line should # be set to a reasonable value! if { ![ string is boolean -strict $value ] } then { error "expected boolean but got \"$value\"" } set Hyphenate [string is true $value] if { $Hyphenate && ![info exists HyphPatterns(_LOADED_)]} { error "hyphenation patterns not loaded!" } } -justify { set lovalue [ string tolower $value ] switch -exact -- $lovalue { left - right - center - plain { set Justify $lovalue } default { error "bad value \"$value\": should be center, left, plain or right" } } } -length { if { ![ string is integer $value ] } then { error "expected positive integer but got \"$value\"" } if { $value < 1 } then { error "expected positive integer but got \"$value\"" } set Length $value } -strictlength { # the word exceeding the length of line is moved to the # next line without hyphenation; words longer than given # line length are cut into smaller pieces if { ![ string is boolean -strict $value ] } then { error "expected boolean but got \"$value\"" } set StrictLength [ string is true $value ] } default { error "bad option \"$option\": must be -full, -hyphenate, \ -justify, -length, or -strictlength" } } } return "" } # ::textutil::adjust::Adjust # # History: # rewritten on 2004-04-13 for bugfix tcllib-bugs-882402 (jhv) proc ::textutil::adjust::Adjust { varOrigName varNewName } { variable Length variable FullLine variable StrictLength variable Hyphenate upvar $varOrigName orig upvar $varNewName text set pos 0; # Cursor after writing set line "" set text "" if {!$FullLine} { regsub -all -- "(\n)|(\t)" $orig " " orig regsub -all -- " +" $orig " " orig regsub -all -- "(^ *)|( *\$)" $orig "" orig } set words [split $orig] set numWords [llength $words] set numline 0 for {set cnt 0} {$cnt < $numWords} {incr cnt} { set w [lindex $words $cnt] set wLen [string length $w] # the word $w doesn't fit into the present line # case #1: we try to hyphenate if {$Hyphenate && ($pos+$wLen >= $Length)} { # Hyphenation instructions set w2 [textutil::adjust::Hyphenation $w] set iMax [llength $w2] if {$iMax == 1 && [string length $w] > $Length} { # word cannot be hyphenated and exceeds linesize error "Word \"$w2\" can\'t be hyphenated\ and exceeds linesize $Length!" } else { # hyphenating of $w was successfull, but we have to look # that every sylable would fit into the line foreach x $w2 { if {[string length $x] >= $Length} { error "Word \"$w\" can\'t be hyphenated\ to fit into linesize $Length!" } } } for {set i 0; set w3 ""} {$i < $iMax} {incr i} { set syl [lindex $w2 $i] if {($pos+[string length " $w3$syl-"]) > $Length} {break} append w3 $syl } for {set w4 ""} {$i < $iMax} {incr i} { set syl [lindex $w2 $i] append w4 $syl } if {[string length $w3] && [string length $w4]} { # hyphenation was successfull: redefine # list of words w => {"$w3-" "$w4"} set x [lreplace $words $cnt $cnt "$w4"] set words [linsert $x $cnt "$w3-"] set w [lindex $words $cnt] set wLen [string length $w] incr numWords } } # the word $w doesn't fit into the present line # case #2: we try to cut the word into pieces if {$StrictLength && ([string length $w] > $Length)} { # cut word into two pieces set w2 $w set over [expr {$pos+2+$wLen-$Length}] set w3 [string range $w2 0 $Length] set w4 [string range $w2 [expr {$Length+1}] end] set x [lreplace $words $cnt $cnt $w4] set words [linsert $x $cnt $w3 ] set w [lindex $words $cnt] set wLen [string length $w] incr numWords } # continuing with the normal procedure if {($pos+$wLen < $Length)} { # append word to current line if {$pos} {append line " "; incr pos} append line $w incr pos $wLen } else { # line full => write buffer and begin a new line if {[string length $text]} {append text "\n"} append text [Justification $line [incr numline]] set line $w set pos $wLen } } # write buffer and return! if {[string length $text]} {append text "\n"} append text [Justification $line end] return $text } # ::textutil::adjust::Justification # # justify a given line # # Parameters: # line text for justification # index index for line in text # # Returns: # the justified line # # Remarks: # Only lines with size not exceeding the max. linesize provided # for text formatting are justified!!! proc ::textutil::adjust::Justification { line index } { variable Justify variable Length variable FullLine set len [string length $line]; # length of current line if { $Length <= $len } then { # the length of current line ($len) is equal as or greater than # the value provided for text formatting ($Length) => to avoid # inifinite loops we leave $line unchanged and return! return $line } # Special case: # for the last line, and if the justification is set to 'plain' # the real justification is 'left' if the length of the line # is less than 90% (rounded) of the max length allowed. This is # to avoid expansion of this line when it is too small: without # it, the added spaces will 'unbeautify' the result. # set justify $Justify if { ( "$index" == "end" ) && \ ( "$Justify" == "plain" ) && \ ( $len < round($Length * 0.90) ) } then { set justify left } # For a left justification, nothing to do, but to # add some spaces at the end of the line if requested if { "$justify" == "left" } then { set jus "" if { $FullLine } then { set jus [strRepeat " " [ expr { $Length - $len } ]] } return "${line}${jus}" } # For a right justification, just add enough spaces # at the beginning of the line if { "$justify" == "right" } then { set jus [strRepeat " " [ expr { $Length - $len } ]] return "${jus}${line}" } # For a center justification, add half of the needed spaces # at the beginning of the line, and the rest at the end # only if needed. if { "$justify" == "center" } then { set mr [ expr { ( $Length - $len ) / 2 } ] set ml [ expr { $Length - $len - $mr } ] set jusl [strRepeat " " $ml] set jusr [strRepeat " " $mr] if { $FullLine } then { return "${jusl}${line}${jusr}" } else { return "${jusl}${line}" } } # For a plain justification, it's a little bit complex: # # if some spaces are missing, then # # 1) sort the list of words in the current line by decreasing size # 2) foreach word, add one space before it, except if it's the # first word, until enough spaces are added # 3) rebuild the line if { "$justify" == "plain" } then { set miss [ expr { $Length - [ string length $line ] } ] # Bugfix tcllib-bugs-860753 (jhv) set words [split $line] set numWords [llength $words] if {$numWords < 2} { # current line consists of less than two words - we can't # insert blanks to achieve a plain justification => leave # $line unchanged and return! return $line } for {set i 0; set totalLen 0} {$i < $numWords} {incr i} { set w($i) [lindex $words $i] if {$i > 0} {set w($i) " $w($i)"} set wLen($i) [string length $w($i)] set totalLen [expr {$totalLen+$wLen($i)}] } set miss [expr {$Length - $totalLen}] # len walks through all lengths of words of the line under # consideration for {set len 1} {$miss > 0} {incr len} { for {set i 1} {($i < $numWords) && ($miss > 0)} {incr i} { if {$wLen($i) == $len} { set w($i) " $w($i)" incr wLen($i) incr miss -1 } } } set line "" for {set i 0} {$i < $numWords} {incr i} { set line "$line$w($i)" } # End of bugfix return "${line}" } error "Illegal justification key \"$justify\"" } proc ::textutil::adjust::SortList { list dir index } { if { [ catch { lsort -integer -$dir -index $index $list } sl ] != 0 } then { error "$sl" } return $sl } # Hyphenation utilities based on Knuth's algorithm # # Copyright (C) 2001-2003 by Dr.Johannes-Heinrich Vogeler (jhv) # These procedures may be used as part of the tcllib # textutil::adjust::Hyphenation # # Hyphenate a string using Knuth's algorithm # # Parameters: # str string to be hyphenated # # Returns: # the hyphenated string proc ::textutil::adjust::Hyphenation { str } { # if there are manual set hyphenation marks e.g. "Recht\-schrei\-bung" # use these for hyphenation and return if {[regexp {[^\\-]*[\\-][.]*} $str]} { regsub -all {(\\)(-)} $str {-} tmp return [split $tmp -] } # Don't hyphenate very short words! Minimum length for hyphenation # is set to 3 characters! if { [string length $str] < 4 } then { return $str } # otherwise follow Knuth's algorithm variable HyphPatterns; # hyphenation patterns (TeX) set w ".[string tolower $str]."; # transform to lower case set wLen [string length $w]; # and add delimiters # Initialize hyphenation weights set s {} for {set i 0} {$i < $wLen} {incr i} { lappend s 0 } for {set i 0} {$i < $wLen} {incr i} { set kmax [expr {$wLen-$i}] for {set k 1} {$k < $kmax} {incr k} { set sw [string range $w $i [expr {$i+$k}]] if {[info exists HyphPatterns($sw)]} { set hw $HyphPatterns($sw) set hwLen [string length $hw] for {set l1 0; set l2 0} {$l1 < $hwLen} {incr l1} { set c [string index $hw $l1] if {[string is digit $c]} { set sPos [expr {$i+$l2}] if {$c > [lindex $s $sPos]} { set s [lreplace $s $sPos $sPos $c] } } else { incr l2 } } } } } # Replace all even hyphenation weigths by zero for {set i 0} {$i < [llength $s]} {incr i} { set c [lindex $s $i] if {!($c%2)} { set s [lreplace $s $i $i 0] } } # Don't start with a hyphen! Take also care of words enclosed in quotes # or that someone has forgotten to put a blank between a punctuation # character and the following word etc. for {set i 1} {$i < ($wLen-1)} {incr i} { set c [string range $w $i end] if {[regexp {^[:alpha:][.]*} $c]} { for {set k 1} {$k < ($i+1)} {incr k} { set s [lreplace $s $k $k 0] } break } } # Don't separate the last character of a word with a hyphen set max [expr {[llength $s]-2}] if {$max} {set s [lreplace $s $max end 0]} # return the syllabels of the hyphenated word as a list! set ret "" set w ".$str." for {set i 1} {$i < ($wLen-1)} {incr i} { if {[lindex $s $i]} { append ret - } append ret [string index $w $i] } return [split $ret -] } # textutil::adjust::listPredefined # # Return the names of the hyphenation files coming with the package. # # Parameters: # None. # # Result: # List of filenames (without directory) proc ::textutil::adjust::listPredefined {} { variable here return [glob -type f -directory $here -tails *.tex] } # textutil::adjust::getPredefined # # Retrieve the full path for a predefined hyphenation file # coming with the package. # # Parameters: # name Name of the predefined file. # # Results: # Full path to the file, or an error if it doesn't # exist or is matching the pattern *.tex. proc ::textutil::adjust::getPredefined {name} { variable here if {![string match *.tex $name]} { return -code error \ "Illegal hyphenation file \"$name\"" } set path [file join $here $name] if {![file exists $path]} { return -code error \ "Unknown hyphenation file \"$path\"" } return $path } # textutil::adjust::readPatterns # # Read hyphenation patterns from a file and store them in an array # # Parameters: # filNam name of the file containing the patterns proc ::textutil::adjust::readPatterns { filNam } { variable HyphPatterns; # hyphenation patterns (TeX) # HyphPatterns(_LOADED_) is used as flag for having loaded # hyphenation patterns from the respective file (TeX format) if {[info exists HyphPatterns(_LOADED_)]} { unset HyphPatterns(_LOADED_) } # the array xlat provides translation from TeX encoded characters # to those of the ISO-8859-1 character set set xlat(\"s) \337; # 223 := sharp s " set xlat(\`a) \340; # 224 := a, grave set xlat(\'a) \341; # 225 := a, acute set xlat(\^a) \342; # 226 := a, circumflex set xlat(\"a) \344; # 228 := a, diaeresis " set xlat(\`e) \350; # 232 := e, grave set xlat(\'e) \351; # 233 := e, acute set xlat(\^e) \352; # 234 := e, circumflex set xlat(\`i) \354; # 236 := i, grave set xlat(\'i) \355; # 237 := i, acute set xlat(\^i) \356; # 238 := i, circumflex set xlat(\~n) \361; # 241 := n, tilde set xlat(\`o) \362; # 242 := o, grave set xlat(\'o) \363; # 243 := o, acute set xlat(\^o) \364; # 244 := o, circumflex set xlat(\"o) \366; # 246 := o, diaeresis " set xlat(\`u) \371; # 249 := u, grave set xlat(\'u) \372; # 250 := u, acute set xlat(\^u) \373; # 251 := u, circumflex set xlat(\"u) \374; # 252 := u, diaeresis " set fd [open $filNam RDONLY] set status 0 while {[gets $fd line] >= 0} { switch -exact $status { PATTERNS { if {[regexp {^\}[.]*} $line]} { # End of patterns encountered: set status # and ignore that line set status 0 continue } else { # This seems to be pattern definition line; to process it # we have first to do some editing # # 1) eat comments in a pattern definition line # 2) eat braces and coded linefeeds set z [string first "%" $line] if {$z > 0} { set line [string range $line 0 [expr {$z-1}]] } regsub -all {(\\n|\{|\})} $line {} tmp set line $tmp # Now $line should consist only of hyphenation patterns # separated by white space # Translate TeX encoded characters to ISO-8859-1 characters # using the array xlat defined above foreach x [array names xlat] { regsub -all {$x} $line $xlat($x) tmp set line $tmp } # split the line and create a lookup array for # the repective hyphenation patterns foreach item [split $line] { if {[string length $item]} { if {![string match {\\} $item]} { # create index for hyphenation patterns set var $item regsub -all {[0-9]} $var {} idx # store hyphenation patterns as elements of an array set HyphPatterns($idx) $item } } } } } EXCEPTIONS { if {[regexp {^\}[.]*} $line]} { # End of patterns encountered: set status # and ignore that line set status 0 continue } else { # to be done in the future } } default { if {[regexp {^\\endinput[.]*} $line]} { # end of data encountered, stop processing and # ignore all the following text .. break } elseif {[regexp {^\\patterns[.]*} $line]} { # begin of patterns encountered: set status # and ignore that line set status PATTERNS continue } elseif {[regexp {^\\hyphenation[.]*} $line]} { # some particular cases to be treated separately set status EXCEPTIONS continue } else { set status 0 } } } } close $fd set HyphPatterns(_LOADED_) 1 return } ####################################################### # @c The specified block is indented # @c by ing each line. The first # @c lines ares skipped. # # @a text: The paragraph to indent. # @a prefix: The string to use as prefix for each line # @a prefix: of with. # @a skip: The number of lines at the beginning to leave untouched. # # @r Basically , but indented a certain amount. # # @i indent # @n This procedure is not checked by the testsuite. proc ::textutil::adjust::indent {text prefix {skip 0}} { set text [string trimright $text] set res [list] foreach line [split $text \n] { if {[string compare "" [string trim $line]] == 0} { lappend res {} } else { set line [string trimright $line] if {$skip <= 0} { lappend res $prefix$line } else { lappend res $line } } if {$skip > 0} {incr skip -1} } return [join $res \n] } # Undent the block of text: Compute LCP (restricted to whitespace!) # and remove that from each line. Note that this preverses the # shaping of the paragraph (i.e. hanging indent are _not_ flattened) # We ignore empty lines !! proc ::textutil::adjust::undent {text} { if {$text == {}} {return {}} set lines [split $text \n] set ne [list] foreach l $lines { if {[string length [string trim $l]] == 0} continue lappend ne $l } set lcp [::textutil::string::longestCommonPrefixList $ne] if {[string length $lcp] == 0} {return $text} regexp "^(\[\t \]*)" $lcp -> lcp if {[string length $lcp] == 0} {return $text} set len [string length $lcp] set res [list] foreach l $lines { if {[string length [string trim $l]] == 0} { lappend res {} } else { lappend res [string range $l $len end] } } return [join $res \n] } # ### ### ### ######### ######### ######### ## Data structures namespace eval ::textutil::adjust { variable here [file dirname [info script]] variable Justify left variable Length 72 variable FullLine 0 variable StrictLength 0 variable Hyphenate 0 variable HyphPatterns namespace export adjust indent undent } # ### ### ### ######### ######### ######### ## Ready package provide textutil::adjust 0.7.1