package Dean::Util; use 5.006; use strict; use warnings; use Carp; use vars qw(@ISA @EXPORT_OK @EXPORT %EXPORT_TAGS $VERSION); require Exporter; @ISA = qw(Exporter); @EXPORT = qw/insert_Dean_Util_functions list_Dean_Util_functions get_Dean_Util_function_string get_Dean_Util_code check_Dean_Util_functions upgrade_Dean_Util_functions/; $EXPORT_TAGS{_fake} = [qw/INCLUDE_POD POD_ONLY/]; $EXPORT_TAGS{all} = \@EXPORT_OK; $VERSION = '1.038'; sub INCLUDE_POD { 1 }; sub POD_ONLY { 1 }; sub TO_FINISH { 1 }; =pod =head1 NAME Dean::Util - Utilities created by Dean Serenevy =head1 SYNOPSIS use Dean::Util qw/map_pair nsign min_max/; ... Then later, to remove dependance on Dean::Util perl -MDean::Util -we insert_Dean_Util_functions The/Module.pm =head1 DESCRIPTION This is a set of utility functions that I find myself rewriting frequently. Normally, putting functions into a module introduces a dependency on that module which can be a hassle in some situations. This is a "smart" module which is capable of replacing the C line with the code for the requested functions. Thus, machines that have Dean::Util installed can use it as a module, but when requested, a (Dean::Util) dependency-free version of the file may be made. =head1 EXPORTED FUNCTIONS =cut $EXPORT_TAGS{utility} = [qw/list_Dean_Util_functions check_Dean_Util_functions get_Dean_Util_code insert_Dean_Util_functions get_Dean_Util_function_string upgrade_Dean_Util_functions/]; =head2 :utility - Using Dean::Util =head3 list_Dean_Util_functions This function prints a column-formatted list of the functions included in the Dean::Util package. =cut #BEGIN: list_Dean_Util_functions, 1 line; depends: get_Dean_Util_code, format_cols sub list_Dean_Util_functions { print format_cols( [sort(keys %{ get_Dean_Util_code() })], col_space => 2 ) } =head3 check_Dean_Util_functions This function attempts to verify that the Dean/Util.pm is properly structured. This function is intended to be run only by people who make changes to the Dean/Util.pm file to check that their code is properly formatted for the module to parse. =cut #BEGIN: check_Dean_Util_functions, depends: get_Dean_Util_code, str my %_Dean_Util_ignore_paren_rule = (unique => 1, NOCOLOR => 1); my %_Dean_Util_ignore_dependencies = (NOCOLOR => 1); sub check_Dean_Util_functions { my %opt = @_; my %x = %{get_Dean_Util_code()}; for (grep {!exists $x{$_} and !/^(?:INCLUDE_POD|POD_ONLY)$/} @EXPORT_OK) { print "$_ in \@EXPORT_OK but does not exist.\n"; } for my $f (keys %x) { $x{$f}{line} = str($x{$f}{line}) || "???"; unless (grep $f eq $_, @EXPORT_OK) { print "$f not exportable, add to \@EXPORT_OK\n" } unless ($x{$f}{count}) { print "$f never defined, line $x{$f}{line}\n"; $x{$f}{count}=0; } unless ($x{$f}{code}) { print "$f has no code, line $x{$f}{line}\n" } if ($x{$f}{count} > 1) { print "$f multiply defined, line $x{$f}{line}\n" } if ($x{$f}{pod_redefined}) { print "POD for $f redefined, line $x{$f}{pod_redefined}\n" } if ($x{$f}{line} eq '???') { print "Can't determine line number of $f!?!\n" } if ($opt{check_pod} and !$x{$f}{pod}) { print "$f has no POD documentation\n" } # prepare to analize code more thoroughly my $code = str($x{$f}{code}); # so we can apply regexps $code =~ s/(?"'])\&(\w+)[^\w\(]/) { print "& subroutine, &$1, used without ( ) in $f, line $x{$f}{line}\n" unless exists $_Dean_Util_ignore_paren_rule{$f} } for my $g (keys %x) { # check for uses of $g as a function (not a variable, or a string) if ($g =~ /^\w/ and $code =~ /(?\*\w\$\@\%\"'\{:]\Q$g\E(?=[^\w]|$)(?!\s*=>|\()/s) { print "$g used without () in $f, line $x{$f}{line}\n" unless exists $_Dean_Util_ignore_paren_rule{$f} } if ($code =~ /(?)\Q$g\E(?!\s*=>)(?:[^\w]|$)/s and not grep /^\Q$g\E$/, $f, @{$x{$f}{depends}}) { print "$g used in $f but not included in dependencies, line $x{$f}{line}\n" unless exists $_Dean_Util_ignore_dependencies{$f} } } for my $g (@{$x{$f}{depends}}) { unless (exists $x{$g}) { print "$f depends on $g, but $g is not available\n" } unless ($code =~ /\Q$g\E/) { print "$f depends on $g, but doesn't seem to use it, line $x{$f}{line}\n" } } } } #END: check_Dean_Util_functions =head3 get_Dean_Util_code Returns a hash ref with an entry of the following type for each function and variable defined in Dean::Util. name => { code => '...', pod => '...', depends => [ 'thing 1', 'thing 2', ... ] } Some additional information may be included in each sub-hash for debugging purposes or internal use. =cut #BEGIN: get_Dean_Util_code #----------------------------------------------------------------- # Returns a hash ref of all functions defined in Dean::Util together with # their code and dependencies. # { name => { code => "code", depends => [ qw/func1 func2 .../ ] } } sub get_Dean_Util_code { my ($file) = ($INC{"Dean/Util.pm"}); my $UTIL; die "Can't "."find Dean::Util in %INC" unless $file and -r $file; open $UTIL, "<", $file or die "Error can't open $file for reading: $!"; my $token = '[\$\@\%\&\*]?[_a-zA-Z]\w*'; my $nontoken = '[^\$\@\%\&\*\w]'; my ($lines,$func,%code,$pod_func); while (<$UTIL>) { if ($lines and $func) { $code{$func}{code} .= $_; $func = undef unless --$lines } elsif ($pod_func) { $code{$pod_func}{pod} .= $_ } elsif ($func) { $code{$func}{code} .= $_ } # We break the if statement here for some extra error checking. if (/^#BEGIN(?:$nontoken)+($token)/) { die "Error: BEGINing function $1 before function $func has ended, at Dean::Util line $.\n" if $lines or $func; print STDERR "POD for $pod_func terminated by BEGIN for $1: line $.\n" if $pod_func; $func = $1; $code{$func}{count}++; $code{$func}{line} = $.; if (/\W(\d+)\s+line/i) { $code{$func}{code} = $_; $lines = $1 } else { $code{$func}{code} = $_ } if (/\W(?i:requires?|depends?)(?:$nontoken)+(.+?)\s*$/) { my $x = $1; $code{$func}{depends} = [ $x =~ /($token)/g ] } } elsif (/^#END(?:$nontoken)+($token)/) { $code{$func}{code} .= $/; $func = undef; } elsif (/^=head3(?:$nontoken)+($token)/) { die "Error: Starting POD for $1 before function $func has ended: line $.\n" if $lines or $func; print STDERR "POD for $pod_func terminated by POD for $1: line $.\n" if $pod_func; $pod_func = $1; $code{$pod_func}{pod_redefined} = $. if $code{$pod_func}{pod}; $code{$pod_func}{pod} = $_; } elsif (/^=cut\s*$/) { undef $pod_func } } close $UTIL; return \%code; } #END: get_Dean_Util_code =head3 insert_Dean_Util_functions Replaces all occurances of "use Dean::Util ...;" ("..." is everything up to first semi-colon, so don't use C) with the actual source code of the functions requested from Dean::Util. The original files are saved to a backup file which is just the original filename with a C<~> appended. The list of files to modify is either included as a list of arguments or is read from C<@ARGV>. As in the function L, the special symbols C and C may be used to indicate that all further inclusions (restricted to each individual "use" block) should include their POD documentation before the code, or exclude the code and only output the POD documentation. Example: use Dean::Util qw/max min INCLUDE_POD join_multi map_pair/; use Dean::Util qw/is_num is_int/; # ... later, possibly even after __END__ use Dean::Util qw/POD_ONLY is_num is_int/; Would include code and POD documentation for L and L. The code and POD documentation for L and L would be inserted separately. Note: Multiple C inclusions may result in multiple subroutine definitions so don't C the same function twice unless they are in different scopes. =cut #BEGIN: insert_Dean_Util_functions; depends: get_Dean_Util_function_string, get_Dean_Util_code #----------------------------------------------------------------- # Replaces all occurances of "use Dean::Util ...;" with the actual # source code of the functions requested from Dean::Util. sub insert_Dean_Util_functions { my @files = @_ ? @_ : @ARGV; my $code = get_Dean_Util_code(); my ($IN,$OUT); local ($,, $\); for my $file (@files) { rename $file, $file."~" or die "Error making backup of file $file: $!"; open $IN, "<", $file."~" or die "Error opening $file~ for reading: $!"; open $OUT, ">", $file or die "Error opening $file for writing: $!"; while (local $_ = <$IN>) { unless (/^\s*use Dean::Util\b(.*)/) { print $OUT $_; next } my ($list, $include_pod) = ($1, 0); until ($list =~ /;/) { $list .= <$IN> } # determine requested functions my @funcs = eval $list; $list =~ s/\n/\n# /g; $list =~ s/#\s+$//; print $OUT "#", "-"x65, $/, "# BEGIN Dean::Util code version $VERSION\n#\n"; print $OUT "# use Dean::Util$list\n\n"; print $OUT +get_Dean_Util_function_string($code, @funcs); print $OUT "#\n# END Dean::Util code version $VERSION\n", "#", "-"x65, $/; } close $IN; close $OUT; } } #END: insert_Dean_Util_functions =head3 upgrade_Dean_Util_functions Once C has been used to "export" a list of Dean::Util functions, this command will replace Dean::Util function blocks with more recent function versions, thus upgrading the exported script. =cut #BEGIN: upgrade_Dean_Util_functions; depends: get_Dean_Util_function_string, get_Dean_Util_code #----------------------------------------------------------------- sub upgrade_Dean_Util_functions { my @files = @_ ? @_ : @ARGV; my $code = get_Dean_Util_code(); my ($IN,$OUT); local ($,, $\, $_); my $token = '[\$\@\%\&\*]?[_a-zA-Z]\w*'; my $nontoken = '[^\$\@\%\&\*\w]'; for my $file (@files) { rename $file, $file."~" or die "Error making backup of file $file: $!"; open $IN, "<", $file."~" or die "Error opening $file~ for reading: $!"; open $OUT, ">", $file or die "Error opening $file for writing: $!"; my ($inblock, $useline, $version, @funcs); while ($_ = <$IN>) { if (/^# BEGIN(?>\s+)Dean::Util code version ([\d.]+)/) { $version = $1; if ($version < $VERSION) { $inblock = 1; next; } elsif ($version == $VERSION) { print STDERR "found Dean::Util block of same version as current version...skipping\n"; } else { print STDERR "found Dean::Util block of NEWER version as current version! ...skipping\n"; } } print $OUT $_ and next unless $inblock; next if $useline and !/^# END(?>\s+)Dean::Util code version/; if (!defined($useline) and /^# use Dean::Util(.*)/m) { $useline = $1; $useline =~ s/^\s+[\d.]+//; until ($useline =~ /;/) { $useline .= <$IN> } $useline =~ s/^# //mg; @funcs = eval $useline; next; } if (/^# END(?>\s+)Dean::Util code version/) { # Finalize! $useline =~ s/\n/\n# /g if $useline; $useline =~ s/#\s+$// if $useline; print $OUT "# BEGIN Dean::Util code version $VERSION\n#\n"; print $OUT "# use Dean::Util$useline\n\n" if $useline; print $OUT +get_Dean_Util_function_string($code, @funcs); print $OUT "#\n# END Dean::Util code version $VERSION\n"; $inblock = 0; undef $useline; next; } $useline = 0 unless /^#/; push @funcs, $1 if /^#BEGIN(?:$nontoken)+($token)/; } close $IN; close $OUT; } } #END: upgrade_Dean_Util_functions =head3 get_Dean_Util_function_string Returns the source code for the functions provided as arguments. If the argument list is empty, the function list is taken from C<@ARGV>. The special symbols C and C may be used to indicate that all further inclusions should include their POD documentation before the code, or exclude the code and only output the POD documentation. Example: get_Dean_Util_function_string qw/max min INCLUDE_POD join_multi map_pair/; Would include the POD documentation for only L and L. get_Dean_Util_function_string qw/POD_ONLY format_cols/; Would return just the POD documentation for L. =cut #BEGIN: get_Dean_Util_function_string, depends: get_Dean_Util_code, str, first_pos sub get_Dean_Util_function_string { my %code = (@_ and ref($_[0]) eq 'HASH') ? %{shift()} : %{ get_Dean_Util_code() }; my @funcs = (@_) ? @_ : @ARGV; my ($list, $include_pod, $out) = ($1, 0, ''); # determine requested functions my @pod_only = splice @funcs, $_ if defined($_ = first_pos(sub { str($_) eq "POD_ONLY" }, @funcs)); my %funcs = map( ($_ => 1), @funcs ); shift @pod_only; # the first entry is "POD_ONLY" # Calculate function dependencies my @temp; do { @temp = (); for my $f (@funcs) { push @temp, grep 1 == ++$funcs{$_}, @{$code{$f}{depends}} if $code{$f}{depends}; } unshift @funcs, @temp; } while (@temp); # Print out the requested code and POD for (@funcs) { if ($_ eq "INCLUDE_POD") { $include_pod++; next } print STDERR "$_ has no POD, Dean::Util line ", (str($code{$_}{line})||"???"), $/ if $include_pod && !$code{$_}{pod}; print STDERR "$_ has no code, Dean::Util line ", (str($code{$_}{line})||"???"), $/ unless $code{$_}{code}; $out .= $/ . str($code{$_}{pod}) . $/ if $include_pod; $out .= str($code{$_}{code}); } for (@pod_only) { print STDERR "$_ has no POD, Dean::Util line ", (str($code{$_}{line})||"???"), $/ unless $code{$_}{pod}; $out .= $/ . str($code{$_}{pod}) . $/; } return $out; } #END: get_Dean_Util_function_string =head1 EXPORTABLE FUNCTIONS =cut #----------------------------------------------------------------- $EXPORT_TAGS{numerical} = [qw/max min fmax fmin minimizer maximizer ceil floor round unbiased_round sum product average min_max max_min max_dirty min_dirty fmax_dirty fmin_dirty minimizer_dirty maximizer_dirty ceil_dirty floor_dirty sum_dirty product_dirty average_dirty min_max_dirty max_min_dirty is_prime sieve_of_eratosthenes next_prime base_hash base2base base2decimal base2integer decimal2base factorial $pi $e/]; #----------------------------------------------------------------- =head2 :numerical - Numerical Functions =head3 $pi The string, pi, to 30 digits after the decimal. =cut #BEGIN: $pi, 1 line our $pi = "3.141592653589793238462643383280"; =head3 $e The string, e, to 30 digits after the decimal. =cut #BEGIN: $e, 1 lines our $e = "2.718281828459045235360287471353"; =head3 max See also: List::Util max Return the maximum number in a list of values. All arguments must be numeric, use L for untrusted or mixed data. =cut =head3 min See also: List::Util min Return the minimum number in a list of values. All arguments must be numeric, use L for untrusted or mixed data. =cut #BEGIN: max, 1 line sub max { my $x = shift; for (@_) { $x = $_ if $_ > $x } $x } #BEGIN: min, 1 line sub min { my $x = shift; for (@_) { $x = $_ if $_ < $x } $x } =head3 max_dirty Return the maximum number in a list of values. This version of max should be used for untrusted data since undefined or non-numeric values are silently ignored rather than trowing errors. =cut =head3 min_dirty Return the minimum number in a list of values. This version of min should be used for untrusted data since undefined or non-numeric values are silently ignored rather than trowing errors. =cut #BEGIN: max_dirty, 2 lines; depends: is_num, strip_space sub max_dirty { shift while @_ and !is_num($_[0]); my $x = shift; for (map(strip_space($_),@_)) { $x = $_ if is_num($_) and $_ > $x } $x } #BEGIN: min_dirty, 2 lines; depends: is_num, strip_space sub min_dirty { shift while @_ and !is_num($_[0]); my $x = shift; for (map(strip_space($_),@_)) { $x = $_ if is_num($_) and $_ < $x } $x } =head3 fmax fmax { block } @list fmax \&sub, @list Return the maximum function value given by evaluating the given code at each element of the list. The code may be either a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. If the code returns any undefined or non-numeric values, perl will issue warnings. =cut =head3 fmin fmin { block } @list fmin \&sub, @list Return the minimum function value given by evaluating the given code at each element of the list. The code may be either a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. If the code returns any undefined or non-numeric values, perl will issue warnings. =cut #BEGIN: fmax, 1 line sub fmax(&@) { local $_; my ($f,$x,$m) = shift; return unless @_; $x = $f->($_ = shift); for (@_) { $x=$m if $x < ($m=$f->($_)) } $x } #BEGIN: fmin, 1 line sub fmin(&@) { local $_; my ($f,$x,$m) = shift; return unless @_; $x = $f->($_ = shift); for (@_) { $x=$m if $x > ($m=$f->($_)) } $x } =head3 fmax_dirty fmax_dirty { block } @list fmax_dirty \&sub, @list Return the maximum function value given by evaluating the given code at each element of the list. The code may be either a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. If the code returns any undefined or non-numeric values, they will be ignored. =cut =head3 fmin_dirty fmin_dirty { block } @list fmin_dirty \&sub, @list Return the minimum function value given by evaluating the given code at each element of the list. The code may be either a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. If the code returns any undefined or non-numeric values, they will be ignored. =cut #BEGIN: fmax_dirty, 2 lines; depends: is_num, strip_space sub fmax_dirty(&@) { local $_; my ($f, $x, $m) = shift; 1 while @_ and !is_num($x = strip_space($f->($_ = shift))); for (@_) { $x = $m if is_num($m = strip_space($f->($_))) and $m > $x } is_num($x) ? $x : undef } #BEGIN: fmin_dirty, 2 lines; depends: is_num, strip_space sub fmin_dirty(&@) { local $_; my ($f, $x, $m) = shift; 1 while @_ and !is_num($x = strip_space($f->($_ = shift))); for (@_) { $x = $m if is_num($m = strip_space($f->($_))) and $m < $x } is_num($x) ? $x : undef } =head3 minimizer minimizer { block } @list minimizer \&sub, @list Return the item of C<@list> which yields the minimum value when evaluated by the given code. The code may be provided either as a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. If the code returns any undefined or non-numeric values, perl will issue warnings. =cut =head3 maximizer maximizer { block } @list maximizer \&sub, @list Return the item of C<@list> which yields the maximum value when evaluated by the given code. The code may be provided either as a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. If the code returns any undefined or non-numeric values, perl will issue warnings. =cut #BEGIN: maximizer, 2 lines sub maximizer(&@) { local $_; my ($f,$x,$m,$y) = shift; return unless @_; $x = $f->($y = $_ = shift); for (@_) { if ($x < ($m = $f->($_))) { $x=$m; $y=$_ } } $y } #BEGIN: minimizer, 2 lines sub minimizer(&@) { local $_; my ($f,$x,$m,$y) = shift; return unless @_; $x = $f->($y = $_ = shift); for (@_) { if ($x > ($m = $f->($_))) { $x=$m; $y=$_ } } $y } =head3 minimizer_dirty minimizer_dirty { block } @list minimizer_dirty \&sub, @list Return the item of C<@list> which yields the minimum value when evaluated by the code. C may be either a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. If the code returns any undefined or non-numeric values, they will be ignored and the corresponding list item will not be considered as a minimizer. Note however that no filtering is performed on C<@list> so undefined values I be passed to the subroutine as a normal element. =cut =head3 maximizer_dirty maximizer_dirty { block } @list maximizer_dirty \&sub, @list Return the item of C<@list> which yields the maximum value when evaluated by the code. C may be either a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. If the code returns any undefined or non-numeric values, they will be ignored and the corresponding list item will not be considered as a minimizer. Note however that no filtering is performed on C<@list> so undefined values I be passed to the subroutine as a normal element. =cut #BEGIN: maximizer_dirty, 2 lines; depends: is_num, strip_space sub maximizer_dirty(&@) { local $_; my ($f, $x, $m, $y) = shift; 1 while @_ and !is_num($x = strip_space($f->($y = $_ = shift))); for(@_){ if (is_num($m = strip_space($f->($_))) and $m > $x) {$x=$m;$y=$_} } is_num($x) ? $y : undef } #BEGIN: minimizer_dirty, 2 lines; depends: is_num, strip_space sub minimizer_dirty(&@) { local $_; my ($f, $x, $m, $y) = shift; 1 while @_ and !is_num($x = strip_space($f->($y = $_ = shift))); for(@_){ if (is_num($m = strip_space($f->($_))) and $m < $x) {$x=$m;$y=$_} } is_num($x) ? $y : undef } =head3 ceil($) If the argument is numeric, then returns the smallest integer which is greater than or equal to the given argument. Otherwise this function will spew warnings. See also: POSIX::ceil [identical functionality] =cut #BEGIN: ceil, 1 line sub ceil($) { my $x = shift; return ($x == int $x) ? $x : ($x > 0) ? int( $x+1 ) : int($x) } =head3 ceil_dirty($) If the argument is numeric, then returns the smallest integer which is greater than or equal to the given argument. Otherwise this function will return undef. =cut #BEGIN: ceil_dirty, 2 line; depends: is_num, strip_space sub ceil_dirty($) { my $x = strip_space(shift()); return unless defined $x and is_num($x); return ($x == int $x) ? $x : ($x > 0) ? int( $x+1 ) : int($x) } =head3 floor($) If the argument is numeric, then returns the largest integer which is less than or equal to the given argument. Otherwise this function spwes warnings. See also: POSIX::floor [identical functionality] =cut #BEGIN: floor, 1 line sub floor($) { my $x = shift; return ($x == int $x) ? $x : ($x > 0) ? int($x) : int($x - 1) } =head3 floor_dirty($) If the argument is numeric, then returns the largest integer which is less than or equal to the given argument. Otherwise this function returns undef. =cut #BEGIN: floor_dirty, 2 line; depends: is_num, strip_space sub floor_dirty($) { my $x = strip_space(shift()); return unless defined $x and is_num($x); return ($x == int $x) ? $x : ($x > 0) ? int($x) : int($x - 1) } =head3 round round( $value ) # round to integer round( $value, 2 ) # round to even round( $value, "0.01" ) # round to cent Round C<$value> to multiple of second parameter. Applies traditional algorithm. Namely, C. Internal comparisons are performed at "string precision" to combat numerical precision problems. Thus, do not expect to to be able to round to too many digits. =cut #BEGIN: round sub round { my ($value, $r) = @_; return int($value + .5) unless $r;# shortcut the common case $r = "$r"; $r =~ s/0+$// if index($r,'.') >= 0; my $pow = ($r =~ s/\.(.*)/$1/) ? length($1) : 0; my $sign = ($value < 0) ? "-" : ""; $value = abs($value) * 10 ** $pow; # some integer math my $lower = int($value) - (int($value) % $r); my $upper = $lower + $r; my $new = ("$value" - $lower < $upper - "$value") ? $lower : $upper; # shift decimal on the string for precision if ($pow) { if (length($new) > $pow) { substr( $new, length($new)-$pow, 0, '.' ) } else { $new = "0.".("0"x($pow-length($new)))."$new"; } } return "$sign$new"; } #END: round =head3 unbiased_round unbiased_round( $value ) # round to integer unbiased_round( $value, 2 ) # round to even unbiased_round( $value, "0.01" ) # round to cent An unbiased round removes the upward bias of the traditional rounding algorithm by rounding the midpoint value up sometimes and down other times. The convention is to round midpoint values to even multiples, and round all other values normally. For example, C since 2 is even, however C as well since 2 is even. This system can be extended to the generalized rounding algorithm: unbiased_round( 1, 2 ) == 0 # since 0 is an even multiple of 2 unbiased_round( 3, 2 ) == 4 # since 4 is an even multiple of 2 =cut #BEGIN: unbiased_round sub unbiased_round { my ($value, $r) = @_; unless ($r) { # shortcut the common case my $v = int($value + .5); return ($value =~ /\.5$/ and $v % 2) ? $v - 1 : $v; } $r = "$r"; $r =~ s/0+$// if index($r,'.') >= 0; my $pow = ($r =~ s/\.(.*)/$1/) ? length($1) : 0; my $sign = ($value < 0) ? "-" : ""; $value = abs($value) * 10 ** $pow; # some integer math my $lower = int($value) - (int($value) % $r); my $upper = $lower + $r; my $new = ("$value" - $lower < $upper - "$value") ? $lower : ("$value" - $lower > $upper - "$value") ? $upper : (($lower / $r) % 2) ? $upper : $lower; # shift decimal on the string for precision if ($pow) { if (length($new) > $pow) { substr( $new, length($new)-$pow, 0, '.' ) } else { $new = "0.".("0"x($pow-length($new)))."$new"; } } return "$sign$new"; } #END: unbiased_round =head3 sum See also: List::Util sum Returns the sum of all numeric entries in a list. Undefined/non-numeric values cause warnings. =cut =head3 product See also: List::Util reduce Returns the product of all numeric entries in a list. Undefined/non-numeric values cause warnings. =cut =head3 average Returns the average over all entries in a list. Undefined or non-numeric entries will spew warnings. =cut #BEGIN: sum, 1 line sub sum { my $x=0; $x += $_ for @_; $x} #BEGIN: product, 1 line sub product { my $x=1; $x *= $_ for @_; $x} #BEGIN: average, 1 line; depends: sum sub average { @_ ? sum(@_)/@_ : undef } =head3 sum_dirty Returns the sum of all numeric entries in a list. Undefined/non-numeric values are ignored. =cut =head3 product_dirty Returns the product of all numeric entries in a list. Undefined/non-numeric values are ignored. =cut =head3 average_dirty Returns the average over all entries in a list. Undefined or non-numeric entries contribute a 0 to the average. =cut #BEGIN: sum_dirty, 1 line; depends: is_num, strip_space sub sum_dirty { my $x=0; is_num() && ($x += strip_space($_)) for @_; $x} #BEGIN: product_dirty, 1 line; depends: is_num, strip_space sub product_dirty { my $x=1; is_num() && ($x *= strip_space($_)) for @_; $x} #BEGIN: average_dirty, 1 line; depends: sum_dirty sub average_dirty { @_ ? sum_dirty(@_)/@_ : undef } =head3 min_max Returns a pair C<($m, $M)> which is the minimum and maximum numbers, respectively, in a list of values without looping over the list twice. Undefined or non-numeric values will cause warnings. =cut =head3 max_min Returns a pair C<($M, $m)> which is the maximum and minimum numbers, respectively, in a list of values without looping over the list twice. Undefined or non-numeric values will cause warnings. =cut #BEGIN: min_max, 1 line; depends: max_min sub min_max { return reverse max_min(@_) } #BEGIN: max_min sub max_min { my $M = shift; my $m = $M; for (@_) { $M = $_ if $_ > $M; $m = $_ if $_ < $m; } return( $M, $m ); } #END: max_min =head3 min_max_dirty Returns a pair C<($m, $M)> which is the minimum and maximum numbers, respectively, in a list of values without looping over the list twice. Undefined or non-numeric values are silently ignored. =cut =head3 max_min_dirty Returns a pair C<($M, $m)> which is the maximum and minimum numbers, respectively, in a list of values without looping over the list twice. Undefined or non-numeric values are silently ignored. =cut #BEGIN: min_max_dirty, 1 line; depends: max_min_dirty sub min_max_dirty { return reverse max_min_dirty(@_) } #BEGIN: max_min_dirty; depends is_num, strip_space sub max_min_dirty { shift while @_ and !is_num($_[0]); my $M = strip_space(shift); my $m = $M; for (map strip_space($_), @_) { next unless is_num(); $M = $_ if $_ > $M; $m = $_ if $_ < $m; } return( $M, $m ); } #END: max_min_dirty =head3 sieve_of_eratosthenes my $sieve = sieve_of_eratosthenes( $n ); sieve_of_eratosthenes( $m, $sieve ); Constructs a bitstring C<$sieve> using the Sieve of Eratosthenes so that: vec($sieve, $n, 1) == 1 iff $n is prime If a sieve (or an undefined scalar) is provided as a second argument, it will be appended to. Note: Since perl's C command deals only in bytes, this subroutine will round C<$n> up to make sure that C<$sieve> is correct to a whole number of bytes. In particular, you are guaranteed to be able to trust C<$sieve> up to C<$n = 8 * length($sieve) - 1>. =cut #BEGIN: sieve_of_eratosthenes sub sieve_of_eratosthenes { my ($n, $sieve) = @_; # "length" rounds to the neqarest byte, so we make sure that our sieve is accurate to the byte. $n = ($n % 8 == 7) ? $n : 8 * int($n/8) + 7; $_[1] = '' unless defined $_[1]; my $a = length($_[1]) ? 8*length($_[1]) : 2; return $_[1] if $n <= $a; # good enough # expand sieve vec($_[1], $_, 1) = 1 for $a..$n; # useful values my $q = int(sqrt($n)); my $i = 2; my $k; # print "Computing sieve from '$a' to '$n' sieve = ", unpack("b*", $_[1]), "\n"; while ($i <= $q) { next unless vec($_[1], $i, 1); vec($_[1], $_ * $i, 1) = 0 for ((($k = int($a/$i)) > 2) ? $k : 2)..int($n/$i); } continue { $i++ } return $_[1]; } #END: sieve_of_eratosthenes =head3 is_prime Determine primality. Constructs the Sieve of Eratosthenes to determine primality. The sieve is reused for each call to C so scripts are encouraged to prepare the sieve by calling is_prime on a large number before making multiple calls to is_prime. # SLOW: takes 21.89 seconds @primes = grep is_prime($_), 1..400000; # FAST: takes 1.387 seconds @primes = reverse grep is_prime($_), reverse 1..400000; This function may take some shortcuts if it can so if you want to prepare the sieve append the option "force_sieve", # SLOW: is_prime( 400000 ); # this test shortcuts since 400000 is even @primes = grep is_prime($_), 1..400000; # FAST: is_prime( 400000, force_sieve => 1 ); @primes = grep is_prime($_), 1..400000; =cut #BEGIN: is_prime, Depends: sieve_of_eratosthenes { my $sieve; sub is_prime { return $sieve unless @_; my ($n, %o) = @_; return vec($sieve, $n, 1) == 1 if $n < 8*length($sieve || ''); unless ($o{force_sieve}) { (0 == ($n % $_)) and return $n == $_ for 2,3,5,7,11; } sieve_of_eratosthenes($n, $sieve); return vec($sieve, $n, 1) == 1; } } #END: is_prime =head3 next_prime my $m = next_prime( $n ) Compute the next prime integer larger than C<$n>. =cut #BEGIN: next_prime, Depends: is_prime # Algorithm taken from GAP's NextPrimeInt function sub next_prime { my ($n) = @_; if ($n < 2) { return 2 } elsif ($n % 2 == 0) { $n++ } else { $n += 2 } # jump start since building up from low numbers is so slow is_prime(10_000 + $n, force_sieve => 1) unless $n+1000 < 8*length(is_prime()) - 1; $n += ($n % 6 == 1) ? 4 : 2 until is_prime($n); return $n; } #END: next_prime =head3 base_hash Given a base, this function returns a hash which may be used in future calls to the other base functions. A base is described by: integer <= 36 (0-9 a-z) array ref (list of symbols, length == base, index i == i, yes you get to define zero) string (string of symbols, shortcut for [split //, $str] hash ref (the output of a previous call to base_hash, this is silly in this case) =cut =head3 base2base base2base( string, base, base ) String may be decimal. The following symbols are tried (in order) to be used as the punctuation between the integer and fraction part of the number: . , : ; _ | / \ - + ' ` " Bases are described by: integer <= 36 (0-9 a-z) array ref (list of symbols, length == base, index i == i, yes you get to define zero) string (string of symbols, shortcut for [split //, $str] hash ref (the output of base_hash) =cut =head3 base2integer base2integer( string, base ) Convert a string to another base. The string may not be a decimal. Base is described by: integer <= 36 (0-9 a-z) array ref (list of symbols, length == base, index i == i, yes you get to define zero) string (string of symbols, shortcut for [split //, $str] hash ref (the output of base_hash or symbol => value pairs) =cut =head3 base2decimal base2decimal( string, base ) String may be decimal. The following symbols are tried (in order) to be used as the punctuation between the integer and fraction part of the number: . , : ; _ | / \ - + ' ` " Base is described by: integer <= 36 (0-9 a-z) array ref (list of symbols, length == base, index i == i, yes you get to define zero) string (string of symbols, shortcut for [split //, $str] hash ref (the output of base_hash) =cut =head3 decimal2base decimal2base( string, base ) String may be decimal. The following symbols are tried (in order) to be used as the punctuation between the integer and fraction part of the number: . , : ; _ | / \ - + ' ` " Base is described by: integer <= 36 (0-9 a-z) array ref (list of symbols, length == base, index i == i, yes you get to define zero) string (string of symbols, shortcut for [split //, $str] hash ref (the output of base_hash) =cut #BEGIN: base_hash sub base_hash { local $_ = shift; my (@x,%x); if (ref($_) =~ /HASH/) { return $_ } elsif (ref($_) =~ /ARRAY/) { @x = @{ $_ } } elsif (/^0/ or length > 2 or $_ > 36) { @x = split // } else { @x = (0..9,'a'..'z')[0..$_-1] } $x{$x[$_]} = $_ for 0..$#x; $x{base} = @x; return \%x; } #END: base_hash #BEGIN: base2base, 1 line; depends decimal2base, base2decimal sub base2base { decimal2base(base2decimal($_[0],$_[1]), $_[2]) } #BEGIN: base2integer, depends: base_hash sub base2integer { local $_ = shift; return unless @_ and defined and defined $_[0]; my $hash = base_hash($_[0]); my $base = (exists $hash->{base}) ? $hash->{base} : 0+(keys %$hash); my $num = 0; $num = ($num * $base) + ($hash->{$_}) for /./g; return $num; } #END: base2integer #BEGIN: base2decimal, depends: base_hash sub base2decimal { my $n = shift; return unless @_ and defined $n and defined $_[0]; my $base = base_hash($_[0]); my ($int,$dec); $n =~ /(?:\.|\,|\:|\;|\_|\||\/|\\|\-|\+|\'|\`|\")/ and do { # separate "." and "," to avoid "Possible attempt to separate words with commas" warning for (".", ",", qw/: ; _ | \/ \\ - + ' ` "/) { next if exists $base->{$_} or not $n =~ /\Q$_\E/; ($int, $dec) = split /\Q$_\E/, $n; $dec = '' unless defined $dec; }}; ($int,$dec) = ($n, '') unless defined $int; my $p = 1; $n = 0; for (reverse split //, $int) { $n += $p * $base->{$_}; $p *= $base->{base}; } $p = 1/$base->{base}; for (split //, $dec) { $n += $p * $base->{$_}; $p /= $base->{base}; } return $n; } #END: base2decimal #BEGIN: decimal2base, depends: base_hash sub decimal2base { my $n = shift; my $precision = (@_ > 1) ? pop : 0.0000000000001; return unless @_ and defined $n and defined $_[0]; my %base = %{base_hash($_[0])}; my ($int,$comma,$base,$res,$p,$inv_base); { no warnings; for (qw/. , : ; _ | \/ \\ - + ' ` "/) { unless (exists $base{$_}) { $comma = $_; last } } } $base = delete $base{base}; %base = reverse %base; $res = ''; $inv_base = 1/$base; $p = $base ** int(log($n)/log($base)); while ($n and $p > $precision) { $res .= $comma if $p == $inv_base; $res .= $base{$int = int($n/$p)}; $n -= $p * $int; $p *= $inv_base; } while ($p > $inv_base) { $res .= $base{0}; $p /= $base } return $res; } #END: decimal2base =head3 factorial factorial( $n ) Returns $n! if $n is a non-negative integer. =cut #BEGIN: factorial sub factorial { return if not defined $_[0] or $_[0] =~ /\D/; return 1 if $_[0] < 2; my $n = 1; $n *= $_ for 2..$_[0]; return $n; } #END: factorial #----------------------------------------------------------------- $EXPORT_TAGS{stat_prob} = [qw/roll_dice randomize one_var percentile npdf ncdf permutations k_arrangements arrangements k_combinations combinations correlation prob_model_invariants /]; #----------------------------------------------------------------- =head2 :stat_prob - Statistical / Probability =head3 prob_model_invariants prob_model_invariants( \%model, %options ) The model is a hash with keys the outcomes and values the corresponding probabilities. At most one of the probabilities may be undefined in which case it will be computed automatically (as $1 - \sum p_i$) and added to your passed probability model. =cut #BEGIN: prob_model_invariants sub prob_model_invariants { my $model = shift; my $missing_key; my ($x, $mu, $px2, $sd, $P); for $x (keys %$model) { if (not defined $$model{$x}) { croak "both P($missing_key) and P($x) are undefined when at most one may be missing" if defined $missing_key; $missing_key = $x; next; } $P += $$model{$x}; $mu += $x * $$model{$x}; $px2 += $x * $x * $$model{$x}; } if (defined $missing_key) { $x = $missing_key; $$model{$x} = 1 - $P; $P += $$model{$x};# Had better give us 1 $mu += $x * $$model{$x}; $px2 += $x * $x * $$model{$x}; } carp "Warning: Probabilities do not "."sum to 1 (\\"."sum p_i = $P)" unless 1 == sprintf("%.10f",$P); return { mean => $mu, sd => sqrt($px2 - $mu * $mu) }; } #END: prob_model_invariants =head3 roll_dice Roll I dice (default 1) and return the results. In scalar context, only the sum is returned. In list context, the individual rolls are returned as well as the final sum of the values (the sum is returned in the last position). =cut #BEGIN: roll_dice sub roll_dice { my ($s, %o, @R) = (0); @_ = ($_[0]."d6") if @_ == 1 and $_[0] =~ /^\d+$/; @_ = ("1d6") unless @_; for (@_) { my ($r, $n) = /^(\d+)d(\d+)$/ ? ($1,$2) : /^d(\d+)$/ ? (1,$1) : die "unable to parse die: '$_'"; for (1..$r) { push @R, 1+int(rand($n)); $s += $R[-1] } } wantarray ? (@R, $s) : $s } #END: roll_dice =head3 randomize See also: List::Util shuffle Randomize a list of values. Essentially the Fisher-Yates shuffle code from L ("How do I shuffle an array randomly?"). If the array is passed by reference then it will be altered, otherwise a copy is made. Returns a new list or a reference to a list depending on context. =cut #BEGIN: randomize sub randomize { my $x = (@_ == 1 and ref($_[0]) eq 'ARRAY') ? shift() : \@_; my $i; for ($i = @$x; --$i; ) { my $j = int rand ($i+1); @$x[$i,$j] = @$x[$j,$i]; } return wantarray ? @$x : $x; } #END: randomize =head3 one_var one_var( @data ); one_var( \@data ); one_var( \@data, $sorted ); Returns a hash (or hash reference if called in scalar context) of one-variable statistics on the input data. If the C<$sorted> parameter is not defined then the data is assumed to be not sorted and the subroutine will make its own sorted copy of the data. If the C<$sorted> parameter is defined but false, then the subroutine will sort C<@data> in place (C<@data> will be altered). If the C<$sorted> parameter is true then the data will be assumed to be already sorted. The returned hash will have the following keys: =over 4 =item average =item mean =item x-bar The average value of the data =item sum =item sum x The summation of the data =item sum_sq =item sum x^2 The sum of the squares of the data =item Svar =item sample_variance The sample variance, C<1/n-1 * sum (x_i - average)^2> =item Sx =item sample_standard_deviation The sample standard deviation, C =item variance =item sigma_sq The population variance, C =item sigma =item standard_deviation The population standard deviation, C =item n The number of measurements in the sample =item min The smallest data element =item max The smallest data element =item Q1 The first quartile computed using broken "Basic Math Course Method". =item Q2 =item med =item median The sample median =item Q3 The third quartile computed using broken "Basic Math Course Method". =item char:sum =item char:Sigma =item char:sigma The corresponding Unicode characters: "\x{2211}", "\x{03A3}", "\x{03C3}". Be warned that char:sum is a different symbol than char:Sigma and that the terminal that you are writing to will need to understand UTF-8 font encodings. =back Note: the list only needs to be sorted to compute the quartiles, min, median, and max values. If you are not interested in these values then you can speed up the computation by providing C<$sorted> with a true valued (regardless of whether the data is sorted) and simply ignore those values in the output. =cut #BEGIN: one_var, depends: sum, percentile sub one_var { my ($data, %s); return unless @_; if (ref $_[0] eq 'ARRAY') { $data = shift; if (@_) { @$data = sort {$a<=>$b} @$data unless $_[0] } else { $data = [sort {$a<=>$b} @$data] } } else { $data = [sort {$a<=>$b} @_] } # Need this before we can compute sample variance, so we do it separately: my $n = $s{n} = @$data; $s{sum} = $s{"sum x"} = sum(@$data); my $mean = $s{mean} = $s{average} = $s{"x-bar"} = 1/$n * $s{sum}; my ($sum_sq, $Svar) = (0,0); for (@$data) { $sum_sq += $_ * $_; $Svar += ($_ - $mean)*($_ - $mean); } $s{"char:sigma"} = "\x{03C3}"; $s{"char:Sigma"} = "\x{03A3}"; $s{"char:"."sum"} = "\x{2211}"; $s{sum_sq} = $s{"sum x^2"} = $sum_sq; $s{Svar} = $s{sample_variance} = 1/($n-1) * $Svar; $s{Sx} = $s{sample_standard_deviation} = sqrt($s{Svar}); $s{variance} = $s{sigma_sq} = 1/$n * $sum_sq - $mean * $mean; $s{standard_deviation} = $s{sigma} = sqrt($s{variance}); $s{min} = $$data[0]; $s{max} = $$data[-1]; my $med_idx = percentile(.5, $data, sorted => 1, method => "midpoint", return => "index"); $s{Q1} = percentile(.5, [@$data[0..(($med_idx == int($med_idx)) ? $med_idx-1 : $med_idx)]], sorted => 1, method => "midpoint"); $s{Q2} = $s{med} = $s{median} = percentile(.5, $data, sorted => 1, method => "midpoint"); $s{Q3} = percentile(.5, [@$data[($med_idx+1)..$#{$data}]], sorted => 1, method => "midpoint"); # $s{Q1} = percentile(.25, $data, sorted => 1, method => "midpoint"); # $s{Q2} = $s{med} = $s{median} = percentile(.5, $data, sorted => 1, method => "midpoint"); # $s{Q3} = percentile(.75, $data, sorted => 1, method => "midpoint"); wantarray ? %s : \%s; } #END: one_var =head3 percentile percentile($p, @data) percentile($p, \@data) percentile($p, \@data, $sorted) percentile($p, \@data, %options) Return the C<$p>-th percentile using the weighted average at X_{(n+1)p} method (http://www.xycoon.com/method_2.htm) That is, the number such that approximately C<100 * $p> of the data values are less than or equal to the given value. If an array reference is given as well as a third true value, the data will be assumed to be already sorted. The following options are available. =over 4 =item sorted Boolean value indicating whether the data are sorted already. If not, they will be sorted numerically. =item method One of "midpoint", "floor", "ceil", or "scaled". This controls what to do when a percentile divider is between two entries. The default behavior is "scaled", the returned percentile will be an appropriate linear combination of the neighboring entries. The "midpoint" method always returns the midpoint of the neighboring entries. Finally, the "floor" and "ceil" methods always return the lower or higher neighbor respectively. The "method" also affects the return value when C "index"> is enabled. =item return Either "value" or "index". Affects whether we return the actual percentile value, or simply its index in the array. =back =cut #BEGIN: percentile sub percentile { my ($p,$data,$n,$f,%o,@a) = (shift); return unless @_ and $p >= 0 and $p <= 1; if (ref $_[0] eq 'ARRAY' and 0 == (@_ % 2)) { $data = shift; %o = (sorted => @_) if @_ } elsif (ref $_[0] eq 'ARRAY') { ($data, %o) = @_ } else { $data = \@_ } $n = int($p * (1+@$data)); $f = $p * (1+@$data) - $n; $n-- if $n; # perl arrays start at zero if (!$o{method} or $o{method} =~ /^[Ss]/) { 1 } elsif ($o{method} =~ /^[Mm]/) { $f = .5 if $f != 0 } elsif ($o{method} =~ /^[Ff]/) { $f = 0 } elsif ($o{method} =~ /^[Cc]/) { ++$n and ($f = 0) if $f != 0 } return $n + $f if $o{return} and $o{return} =~ /^[Ii]/; if ($o{sorted}) { @a = @$data[$n,(($n == $#{$data}) ? $n : $n+1)] } else { @a = (sort {$a<=>$b} @$data)[$n,$n+1] } if ($n+1 > $#{$data}) { return $a[0] } return $a[0] + $f * ($a[1] - $a[0]); } #END: percentile =head3 correlation my $r = correlation( \@X, \@Y ); my %I = correlation( \@X, \@Y ); my $r = correlation( \@X, \@Y, %options ); Pearson product-moment correlation coefficient. =over 4 =item one_var_x =item one_var_y The result hash from C =item sd_x =item sd_y =item mean_x =item mean_y The sample standard deviation and mean of x and y. =back =cut #BEGIN: correlation, depends: one_var sub correlation { my ($X, $Y, %o, $r) = @_; my $n = (@$X < @$Y) ? $#{$X} : $#{$Y}; $o{one_var_x} = one_var($X) unless $o{one_var_x} or (defined($o{sd_x}) and defined($o{mean_x})); $o{sd_x} ||= $o{one_var_x}{Sx}; $o{mean_x} ||= $o{one_var_x}{mean}; $o{one_var_y} = one_var($Y) unless $o{one_var_y} or (defined($o{sd_y}) and defined($o{mean_y})); $o{sd_y} ||= $o{one_var_y}{Sx}; $o{mean_y} ||= $o{one_var_y}{mean}; my ($x_, $y_) = @o{qw/mean_x mean_y/}; $r += ($$X[$_] - $x_) * ($$Y[$_] - $y_) for 0..$n; $r *= 1 / ($o{sd_x} * $o{sd_y} * $n); # The following pseudocode (from wikipedia) computes correlation in a single pass #-------------------------------------------------------------------------------- # sum_sq_x = 0 # sum_sq_y = 0 # sum_coproduct = 0 # mean_x = x[1] # mean_y = y[1] # for i in 2 to N: # sweep = (i - 1.0) / i # delta_x = x[i] - mean_x # delta_y = y[i] - mean_y # sum_sq_x += delta_x * delta_x * sweep # sum_sq_y += delta_y * delta_y * sweep # sum_coproduct += delta_x * delta_y * sweep # mean_x += delta_x / i # mean_y += delta_y / i # pop_sd_x = sqrt( sum_sq_x / N ) # pop_sd_y = sqrt( sum_sq_y / N ) # cov_x_y = sum_coproduct / N # correlation = cov_x_y / (pop_sd_x * pop_sd_y) if (($o{one_var_x} or $o{one_var_y}) and wantarray) { return (correlation => $r, ($o{one_var_x} ? (one_var_x => $o{one_var_x}) : ()), ($o{one_var_x} ? (one_var_y => $o{one_var_y}) : ())); } else { return $r } } #END: correlation =head3 permutations permutations( $n ); permutations( @list ); # 1 < @list !! permutations( \@list ); Return a list of all permutations of the given input list. Note: This subroutine is slow and inefficient. If you want to use this for any real purpose then you should consider using Algorithm::Permute or Algorithm::FastPermute from cpan. =cut #BEGIN: permutations: depends k_arrangements, factorial sub permutations { return factorial($_[0]) if @_ == 1 and not ref($_[0]); return k_arrangements($_[0], 0+@{$_[0]}) if @_ == 1 and ref($_[0]) eq 'ARRAY'; return k_arrangements(\@_, 0+@_); } #END: permutations =head3 k_arrangements k_arrangements( \@list, $k ); k_arrangements( $n, $k ); Return a list of all arrangements (sub-permutations) of the given input list of length $k. If C<$n> and C<$k> are both integers, then simply the number of C<$k> arrangements is returned. Note: This subroutine is slow and inefficient. If you want to use this for any real purpose then you should consider looking up an XS module on CPAN. =cut =head3 arrangements arrangements( $n ); arrangements( \@list ); arrangements( \@list, $k ); arrangements( $n, $k ); arrangements( @list ); # @list > 2 !!! Return a list of all arrangements (sub-permutations) of the given input list (regardless of length). If the list is provided as a reference and an integer $k is provided then the results will be restrictetd to length $k as in the L subroutine. Note: This subroutine is slow and inefficient. If you want to use this for any real purpose then you should consider looking up an XS module on CPAN. =cut #BEGIN: k_arrangements, depends: product, sum, factorial sub k_arrangements { my ($n, $k) = @_; ($n, $k) = ($k, $n) if ref($k); # provided array in wrong order # provided two numbers if (not ref($n)) { return sum(map(factorial($_), 0..$n)) unless defined $k; return 0 if $k < 0 or $k > $n; return 1 if $k == 0; return product($n-$k+1..$n); } return wantarray ? [] : [[]] if $k == 0; return wantarray ? () : [] if $k > @$n or $k < 0; return wantarray ? map([$_],@$n) : [map([$_],@$n)] if $k == 1; my @A; for my $i (0..$#{$n}) { for (k_arrangements([@$n[0..$i-1,$i+1..$#{$n}]], $k-1)) { push @$_, $$n[$i]; push @A, $_; } } return wantarray ? @A : \@A; } #END: k_arrangements #BEGIN: arrangements: depends k_arrangements, sum sub arrangements { my ($n, $k) = @_; ($n, $k) = ($k, $n) if ref($k); # provided array in wrong order if (@_ == 1) { if (ref($n)) { return wantarray ? map(k_arrangements($n, $_), 0..@$n) : [ map(k_arrangements($n, $_), 0..@$n) ]; } else { return sum(map(k_arrangements($n,$_), 0..$n)) } } return k_arrangements($n, $k) if @_ == 2; # meant to call k_arrangements # otherwise, more that 2 elements. wantarray ? map k_arrangements(\@_, $_), 0..@_ : [ map k_arrangements(\@_, $_), 0..@_ ]; } #END: arrangements =head3 k_combinations k_combinations( \@list, $k ); k_combinations( $n, $k ); Return a list of all combinations of the given input list of length $k. Note: This subroutine is slow and inefficient. If you want to use this for any real purpose then you should consider looking up an XS module on CPAN. =cut =head3 combinations combinations( $n ); combinations( \@list ); combinations( \@list, $k ); combinations( $n, $k ); combinations( @list ); # @list > 2 !!! Return a list of all combinations of the given input list (regardless of length). If the list is provided as a reference and an integer $k is provided then the results will be restrictetd to length $k as in the L subroutine. Note: This subroutine is slow and inefficient. If you want to use this for any real purpose then you should consider looking up an XS module on CPAN. =cut #BEGIN k_combinations, depends: product sub k_combinations { my ($n, $k) = @_; ($n, $k) = ($k, $n) if ref($k); # provided array in wrong order # provided two numbers if (not ref($n)) { return 2 ** $n unless defined $k; return 0 if $k < 0 or $k > $n; return 1 if $k == 0 or $k == $n; # provide both as a (slight?) optimization return product($n-$k+1..$n)/product(2..$k) if $k < $n/2; return product($k+1..$n)/product(2..$n-$k);# if $k > $n/2; } return wantarray ? [] : [[]] if $k == 0; return wantarray ? () : [] if $k > @$n or $k < 0; return wantarray ? map([$_],@$n) : [map([$_],@$n)] if $k == 1; return wantarray ? [@$n] : [[@$n]] if $k == $n; return wantarray ? map([@$n[0..$_-1,$_+1..$#{$n}]],0..$#{$n}) : [map([@$n[0..$_-1,$_+1..$#{$n}]],0..$#{$n})] if $k == $n-1; my @A; for my $i (0..$#{$n}-$k+1) { for (k_combinations([@$n[$i+1..$#{$n}]], $k-1)) { unshift @$_, $$n[$i]; # unshift to make combinations be in expected order push @A, $_; } } return wantarray ? @A : \@A; } #END k_combinations #BEGIN: combinations: depends k_combinations sub combinations { my ($n, $k) = @_; ($n, $k) = ($k, $n) if ref($k); # provided array in wrong order if (@_ == 1) { if (ref($n)) { return wantarray ? map(k_combinations($n, $_), 0..@$n) : [ map(k_combinations($n, $_), 0..@$n) ]; } else { return 2 ** $n } } return k_combinations($n, $k) if @_ == 2; # meant to call k_combinations # otherwise, more that 2 elements. wantarray ? map k_combinations(\@_, $_), 0..@_ : [ map k_combinations(\@_, $_), 0..@_ ]; } #END: combinations =head3 npdf npdf $x npdf $x, $mu npdf $x, $mu, $sigma Compute the probability S )> assuming a normal distribution with mean C<$mu> and standard deviation C<$sigma>. C<$mu> and C<$sigma> are assumed to be C<0> and C<1> respectively if they are missing. C<$sigma> must be positive. =cut #BEGIN: npdf, depends: $pi sub npdf { return exp(-$_[0]*$_[0]/2) / 2.506628274631000502415765284811 if @_ == 1; my ($x,$mu,$s) = @_; $mu ||= 0; $s ||= 1; $s = abs($s); return exp(($mu-$x)*($x-$mu)/(2*$s*$s))/(sqrt(2*$pi) * $s) } #END: npdf =head3 ncdf ncdf $x ncdf $x, $mu ncdf $x, $mu, $sigma Compute the probability S= C<$x> )> assuming a normal distribution with mean C<$mu> and standard deviation C<$sigma>. C<$mu> and C<$sigma> are assumed to be C<0> and C<1> respectively if they are missing. C<$sigma> must be positive. =cut #BEGIN: ncdf, depends: interpolating_function { my @data = qw/ 0.503989 0.507978 0.511966 0.515953 0.519939 0.523922 0.527903 0.531881 0.535856 0.539828 0.543795 0.547758 0.551717 0.55567 0.559618 0.563559 0.567495 0.571424 0.575345 0.57926 0.583166 0.587064 0.590954 0.594835 0.598706 0.602568 0.60642 0.610261 0.614092 0.617911 0.62172 0.625516 0.6293 0.633072 0.636831 0.640576 0.644309 0.648027 0.651732 0.655422 0.659097 0.662757 0.666402 0.670031 0.673645 0.677242 0.680822 0.684386 0.687933 0.691462 0.694974 0.698468 0.701944 0.705401 0.70884 0.71226 0.715661 0.719043 0.722405 0.725747 0.729069 0.732371 0.735653 0.738914 0.742154 0.745373 0.748571 0.751748 0.754903 0.758036 0.761148 0.764238 0.767305 0.77035 0.773373 0.776373 0.77935 0.782305 0.785236 0.788145 0.79103 0.793892 0.796731 0.799546 0.802337 0.805105 0.80785 0.81057 0.813267 0.81594 0.818589 0.821214 0.823814 0.826391 0.828944 0.831472 0.833977 0.836457 0.838913 0.841345 0.843752 0.846136 0.848495 0.85083 0.853141 0.855428 0.85769 0.859929 0.862143 0.864334 0.8665 0.868643 0.870762 0.872857 0.874928 0.876976 0.879 0.881 0.882977 0.88493 0.886861 0.888768 0.890651 0.892512 0.89435 0.896165 0.897958 0.899727 0.901475 0.9032 0.904902 0.906582 0.908241 0.909877 0.911492 0.913085 0.914657 0.916207 0.917736 0.919243 0.92073 0.922196 0.923641 0.925066 0.926471 0.927855 0.929219 0.930563 0.931888 0.933193 0.934478 0.935745 0.936992 0.93822 0.939429 0.94062 0.941792 0.942947 0.944083 0.945201 0.946301 0.947384 0.948449 0.949497 0.950529 0.951543 0.95254 0.953521 0.954486 0.955435 0.956367 0.957284 0.958185 0.95907 0.959941 0.960796 0.961636 0.962462 0.963273 0.96407 0.964852 0.96562 0.966375 0.967116 0.967843 0.968557 0.969258 0.969946 0.970621 0.971283 0.971933 0.972571 0.973197 0.97381 0.974412 0.975002 0.975581 0.976148 0.976705 0.97725 0.977784 0.978308 0.978822 0.979325 0.979818 0.980301 0.980774 0.981237 0.981691 0.982136 0.982571 0.982997 0.983414 0.983823 0.984222 0.984614 0.984997 0.985371 0.985738 0.986097 0.986447 0.986791 0.987126 0.987455 0.987776 0.988089 0.988396 0.988696 0.988989 0.989276 0.989556 0.98983 0.990097 0.990358 0.990613 0.990863 0.991106 0.991344 0.991576 0.991802 0.992024 0.99224 0.992451 0.992656 0.992857 0.993053 0.993244 0.993431 0.993613 0.99379 0.993963 0.994132 0.994297 0.994457 0.994614 0.994766 0.994915 0.99506 0.995201 0.995339 0.995473 0.995604 0.995731 0.995855 0.995975 0.996093 0.996207 0.996319 0.996427 0.996533 0.996636 0.996736 0.996833 0.996928 0.99702 0.99711 0.997197 0.997282 0.997365 0.997445 0.997523 0.997599 0.997673 0.997744 0.997814 0.997882 0.997948 0.998012 0.998074 0.998134 0.998193 0.99825 0.998305 0.998359 0.998411 0.998462 0.998511 0.998559 0.998605 0.99865 0.998694 0.998736 0.998777 0.998817 0.998856 0.998893 0.99893 0.998965 0.998999 0.999032 0.999065 0.999096 0.999126 0.999155 0.999184 0.999211 0.999238 0.999264 0.999289 0.999313 0.999336 0.999359 0.999381 0.999402 0.999423 0.999443 0.999462 0.999481 0.999499 0.999517 0.999534 0.99955 0.999566 0.999581 0.999596 0.99961 0.999624 0.999638 0.999651 0.999663 0.999675 0.999687 0.999698 0.999709 0.99972 0.99973 0.99974 0.999749 0.999758 0.999767 0.999776 0.999784 0.999792 0.9998 0.999807 0.999815 0.999822 0.999828 0.999835 0.999841 0.999847 0.999853 0.999858 0.999864 0.999869 0.999874 0.999879 0.999883 0.999888 0.999892 0.999896 0.9999 0.999904 0.999908 0.999912 0.999915 0.999918 0.999922 0.999925 0.999928 0.999931 0.999933 0.999936 0.999938 0.999941 0.999943 0.999946 0.999948 0.99995 0.999952 0.999954 0.999956 0.999958 0.999959 0.999961 0.999963 0.999964 0.999966 0.999967 0.999968 0.99997 0.999971 0.999972 0.999973 0.999974 0.999975 0.999976 0.999977 0.999978 0.999979 0.99998 0.999981 0.999982 0.999983 0.999983 0.999984 0.999985 0.999985 0.999986 0.999987 0.999987 0.999988 0.999988 0.999989 0.999989 0.99999 0.99999 0.999991 0.999991 0.999991 0.999992 0.999992 0.999993 0.999993 0.999993 0.999993 0.999994 0.999994 0.999994 0.999995 0.999995 0.999995 0.999995 0.999996 0.999996 0.999996 0.999996 0.999996 0.999996 0.999997 0.999997 0.999997 0.999997 0.999997 0.999997 0.999997 0.999998 0.999998 0.999998 0.999998 0.999998 0.999998 0.999998 0.999998 0.999998 0.999998 0.999998 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 0.999999 1 1 1 1 1/; my $_f = { "0" => 0.5 }; my $x = 0; for (@data) { $x += 0.01; $$_f{$x} = $_; $$_f{-$x} = 1 - $_; } my $_std_ncdf = interpolating_function($_f,"",1); sub ncdf { return &$_std_ncdf(@_) if @_ == 1; my ($x,$mu,$s) = @_; $mu ||= 0; $s ||= 1; $s = abs($s); return &$_std_ncdf( ($x-$mu)/$s ); } } #END: ncdf #----------------------------------------------------------------- $EXPORT_TAGS{math} = [qw/Nintegrate interpolating_function interpolate continuous_compounding discrete_compounding savings_plan loan_payment union intersection difference frac gcd extended_euclidean_algorithm modular_inverse ndiff dotprod /]; #----------------------------------------------------------------- =head2 :math - Mathematical Functions =head3 dotprod(\@\@) my $d = dotprod @x, @y; my $d = &dotprod(\@x, [1,2,3]); Compute the dot product of two vectors =cut #BEGIN: dotprod sub dotprod(\@\@) { Carp::croak("dotprod: Vectors must have same length ($#{$_[0]} != $#{$_[1]})") unless $#{$_[0]} == $#{$_[1]}; my $p = 0; $p += $_[0][$_] * $_[1][$_] for 0..$#{$_[0]}; $p; } #END: dotprod =head3 modular_inverse $inverse = modular_inverse( $x, $m ); Compute the inverse of $x in the group Z_m. The inverse will be within the set [0..$m-1]. Note: $x must be relatively prime to $m. =cut #BEGIN: modular_inverse, depends: extended_euclidean_algorithm sub modular_inverse { my ($i, undef, $d ) = extended_euclidean_algorithm(@_); return $i % $_[1] if $d == 1 and $i; } #END: modular_inverse =head3 gcd Compute the Greatest Common Divisor of a list of integers using the Euclidean algorithm. Negative numbers are treated as positives by this routine. =cut #BEGIN: gcd sub gcd { return $_[0] if @_ == 1; my ($a, $b, $r, $d) = splice @_, 0, 2; for ($a, $b) { $_ = -$_ if $_ < 0 } return @_?gcd($a, @_):$a if $a == $b; ($b, $a) = ($a, $b) if $b < $a; return @_?gcd($b, @_):$b if $a == 0; return @_?gcd($a, @_):$a if $b == 0; $d = $a; $r = $b % $a; ($a, $b, $d, $r) = ($r, $a, $r, $a % $r) while $r != 0; return @_?gcd($d, @_):$d; } #END: gcd =head3 extended_euclidean_algorithm ($alphs, $beta, $d) = extended_euclidean_algorithm($a, $b) For a pair of integers, a and b, perform the extended Euclidean algorithm to compute alpha, beta, and d such that: d = alpha * a + beta * b In particular, if d = 1 then alpha = a^-1 mod b. =cut #BEGIN: extended_euclidean_algorithm sub extended_euclidean_algorithm { my ($A, $B) = @_; my ($x0, $x1, $y0, $y1, $q) = (1, 0, 0, 1); while ($B != 0) { ($q, $B, $A) = ((int($A/$B)), ($A % $B), $B); ($x0, $x1) = ($x1, ($x0 - ($q * $x1))); ($y0, $y1) = ($y1, ($y0 - ($q * $y1))); } return ($x0, $y0, $A); } #END: extended_euclidean_algorithm =head3 frac my ($N, $D) = frac( $dec ) Convert a decimal to a fraction. Returns undef if number is not rationalizable (must have repeating decimals). =cut #BEGIN: frac, depends: gcd $_Util::frac::decimallength = 16; sub frac { my $n = shift; my $gcd; return $n unless defined $n and $n =~ /\./; if ($n =~ /([-+]?)([\d\.]+?)(\d+?)(?:\3)+((??{ '\d{0,'.(length($3)-1).'}' }))$/) { my ($sign, $pre, $pat, $post, $mul) = ($1,$2,$3,$4, 0); return unless $post eq substr($pat, 0, length($post)); if ($pre =~ /^\d+\.(\d+)/) { $mul = length($1) } $pre =~ s/\.//; $pre =~ s/^0+(\d)/$1?$1:0/e; my ($N, $D) = ("$pre$pat"-$pre, ("9"x(length $pat)).("0"x$mul)); $gcd = gcd($N,$D); return wantarray ? ("$sign$N"/$gcd, $D/$gcd) : ("$sign$N"/$gcd).'/'.($D/$gcd); } else { my ($a, $b) = split /\./, $n; return unless $_Util::frac::decimallength > length("$a.$b"); ($a, $b) = ($a.$b, "1".("0"x(length $b))); $gcd = gcd($a,$b); return wantarray ? ($a/$gcd, $b/$gcd) : ($a/$gcd).'/'.($b/$gcd); } } #END: frac =head3 ndiff(&;@) my $df = ndiff \&f; my $df = ndiff \&f, $x; Perform numerical differentiation using the central difference formula. f'(a) \approx ( f(a+h) - f(a-h) ) / (2h) If M \approx f(a) \approx f''(c) for all c \in [a-h, a+h], then the total error (truncation plus round-off) is on the order of: error = M * (h^2/6 + eps/h) where eps is the machine epsilon (eps = 2E-16 on 32-bit perl; (1 + 2E-16 != 1), however (1 + (2E-16)/2 == 1) ). Thus, error is minimized when h \approx \sqrt[3]{eps}. We choose h = 2**(-20) = 0.00000095367431640625. Examples: sub f { $_[0]**2 } my $df = ndiff \&f; printf "%.5f | %.5f\n", f($_), $df->($_) for 0..10; say "f'(3) = ", ndiff(\&f, 3); $df = ndiff { $_ ** 2 }; =cut #BEGIN: ndiff { my $delta = 0.00000095367431640625; my $ddelta = 0.0000019073486328125; sub ndiff(&;@) { my $f = shift; my $df = sub { local $_; ($f->($_ = $_[0]+$delta) - $f->($_ = $_[0]-$delta)) / $ddelta }; @_ ? $df->(@_) : $df; } } #END: ndiff =head3 Nintegrate Nintegrate { block } $a, $b, $n Nintegrate \&sub, $a, $b, $n Integrate a function between two values using a composite Simpson's rule. The last argument C<$n> is optional and specifies the number of intervals to divide the region into. The default is 1000. The function is assumed to be continuous with continuous derivatives up to order 4. C<$n> should be even, but we adjust it if it is not. The error is given by, 5 (b-a) (4) err = -------- f ( x ) 4 180 n for some x in the interval (a,b). =cut #BEGIN: Nintegrate sub Nintegrate(&@) { local $_; my ($f, $A, $B, $n) = @_; $n ||= 1000; $n += 1 if $n % 2; my $h = ($B-$A)/$n; my ($x0, $x1, $x2) = (0, 0, 0); $_ = $A; $x0 += &$f($A); $_ = $B; $x0 += &$f($B); for my $i (1..$n-1) { $_ = $A + $i * $h; ($i % 2) ? $x1 = $x1 + &$f($_) : $x2 = $x2 + &$f($_); } return $h * ($x0 + 2 * $x2 + 4 * $x1) / 3; } #END: Nintegrate =head3 interpolating_function interpolating_function \%function, $message, $nowarn Returns a perl subroutine which interpolates C<%function> linearly using L. C<$message> is an optional message that will be used if an input value is given which is out of range of the interpolator. =cut #BEGIN: interpolating_function, depends: interpolate sub interpolating_function { my ($func, $message, $nowarn) = @_; $message = "" unless defined $message; my $keys = [ sort {$a<=>$b} keys %$func ]; return sub { interpolate(shift(), $func, $keys, $message, $nowarn) } } #END: interpolating_function =head3 interpolate interpolate $x, \%function, \@keys, $message, $nowarn Perform an interpolation of the provided function at the point C<$x>. The keys of the function need not be evenly spaced, the value is approximated linearly. The last two parameters are optional, C<@keys> is a sorted list of the keys of the function and C<$message> is used in the error message that is printed if C<$x> is out of range of the interpolator. =cut #BEGIN: interpolate { use Carp; sub interpolate { my ($x, $hash, $keys, $mesg, $nowarn) = @_; carp "Use of uninitialized value in interpolation" unless defined $x; $keys = [ sort {$a<=>$b} keys %$hash ] unless $keys; $mesg = "" unless defined $mesg; if ($x < $$keys[0]) { print STDERR "\nWarning! Hit lower endpoint in interpolation. $mesg\n", " Requested: x = $x Using: (x, y) = ($$keys[0], $$hash{$$keys[0]})\n" unless $nowarn; return $$hash{$$keys[0]}; } elsif ($x > $$keys[-1]) { print STDERR "\nWarning! Hit upper endpoint in interpolation. $mesg\n", " Requested: x = $x Using: (x, y) = ($$keys[-1], $$hash{$$keys[-1]})\n" unless $nowarn; return $$hash{$$keys[-1]}; } elsif ($x == $$keys[-1]) { return $$hash{$$keys[-1]} } elsif ($x == $$keys[0]) { return $$hash{$$keys[0]} } else { # find $i0 so that $keys[$i0] < $x < $keys[$i0+1]. my ($i0,$i1,$i) = (0,$#{$keys}); while ($i1-$i0 > 1) { $i = int(($i1+$i0)/2); if ($x == $$keys[$i]) { $i0 = $i; last } ($x < $$keys[$i]) ? ($i1 = $i) : ($i0 = $i); } # Note: This gives an exact answer if $x == $$keys[$i] for some $i. my ($x1, $x2, $y1, $y2) = ($$keys[$i0], $$keys[$i0+1], $$hash{$$keys[$i0]}, $$hash{$$keys[$i0+1]}); return ($y2-$y1)/($x2-$x1) * ($x-$x1) + $y1; } } } #END: interpolate =head3 continuous_compounding continuous_compounding P => $P, r => $r, t => $t; continuous_compounding A => $A, P => $P, r => $r, t => $t, solve_for => $q; Given any three of "A" (Accumulated balance), "P" (Principal balance), "r" (interest Rate), and "t" (Time to withdrawal), this function will return the fourth. If all four values are provided (presumedly one of them will be undefined or contain garbage) then you must provide a "solve_for" key which points to one of "A", "P", "r", or "t". All values are case insensitive. =cut #BEGIN: continuous_compounding, depends: str sub continuous_compounding { my %opt = map lc(str($_)), @_; unless ($opt{solve_for}) { my $pat = join "|", map quotemeta, keys %opt; my @missing = grep !/^(?:$pat)$/, qw/a p r t/; return unless @missing == 1; $opt{solve_for} = $missing[0]; } my ($A, $P, $r, $t) = @opt{qw/a p r t/}; for ($opt{solve_for}) { /a/ and return $P * exp($r*$t); /p/ and return $A * exp(-$r*$t); /r/ and return log($A/$P) / $t; /t/ and return log($A/$P) / $r; } } #END: continuous_compounding =head3 discrete_compounding discrete_compounding P => $P, r => $r, t => $t, n => $n; discrete_compounding A => $A, P => $P, r => $r, t => $t, n => $n, solve_for => $q; Given "n" (Number of compoundings per year) and any three of "A" (Accumulated balance), "P" (Principal balance), "r" (interest Rate), and "t" (Time to withdrawal), this function will return the fourth. If all five values are provided (presumedly one of them will be undefined or contain garbage) then you must provide a "solve_for" key which points to one of "A", "P", "r", or "t". All values are case insensitive. =cut #BEGIN: discrete_compounding, depends: str sub discrete_compounding { my %opt = map lc(str($_)), @_; unless ($opt{solve_for}) { my $pat = join "|", map quotemeta, keys %opt; my @missing = grep !/^(?:$pat)$/, qw/a p r t/; return unless @missing == 1; $opt{solve_for} = $missing[0]; } my ($A, $P, $r, $n, $t) = @opt{qw/a p r n t/}; for ($opt{solve_for}) { /a/ and return $P*(1+$r/$n)**($n*$t); /p/ and return $A*(1+$r/$n)**(-$n*$t); /r/ and return $n*(exp(log($A/$P)/($n*$t))-1); /t/ and return log($A/$P)/($n*log(1+$r/$n)); } } #END: discrete_compounding =head3 savings_plan savings_plan pmt => $pmt, r => $r, t => $t, n => $n; savings_plan A => $A, pmt => $pmt, r => $r, t => $t, n => $n, solve_for => $q; Given "n" (Number of deposits per year), "r" (interest Rate), and any two of "A" (Accumulated balance), "pmt" (Payment amount), and "t" (Time to withdrawal), this function will return the third. If all five values are provided (presumedly one of them will be undefined or contain garbage) then you must provide a "solve_for" key which points to one of "A", "pmt", "r", or "t". All values are case insensitive. =cut #BEGIN: savings_plan, depends: str sub savings_plan { my %opt = map lc(str($_)), @_; unless ($opt{solve_for}) { my $pat = join "|", map quotemeta, keys %opt; my @missing = grep !/^(?:$pat)$/, qw/a pmt t/; return unless @missing == 1; $opt{solve_for} = $missing[0]; } my ($A, $pmt, $r, $n, $t) = @opt{qw/a pmt r n t/}; for ($opt{solve_for}) { /a/ and return $pmt*( (1+$r/$n)**($n*$t)-1 )/($r/$n); /p/ and return $A*($r/$n)/( (1+$r/$n)**($n*$t)-1 ); /t/ and return log(1+($A*$r)/($pmt*$n))/($n*log(1+$r/$n)); } } #END: savings_plan =head3 loan_payment loan_payment pmt => $pmt, r => $r, t => $t, n => $n; loan_payment L => $L, pmt => $pmt, r => $r, t => $t, n => $n, solve_for => $q; Given "n" (Number of deposits per year), "r" (interest Rate), and any two of "L" (Loan amount), "pmt" (Payment amount), and "t" (Time to full payback), this function will return the third. If all five values are provided (presumedly one of them will be undefined or contain garbage) then you must provide a "solve_for" key which points to one of "A", "pmt", "r", or "t". All values are case insensitive. =cut #BEGIN: loan_payment, depends: str sub loan_payment { my %opt = map lc(str($_)), @_; unless ($opt{solve_for}) { my $pat = join "|", map quotemeta, keys %opt; my @missing = grep !/^(?:$pat)$/, qw/l pmt t/; return unless @missing == 1; $opt{solve_for} = $missing[0]; } my ($L, $pmt, $r, $n, $t) = @opt{qw/l pmt r n t/}; for ($opt{solve_for}) { /l/ and return $pmt/($r/$n+($r/$n)/((1+$r/$n)**($n*$t)-1)); /p/ and return $L*($r/$n+($r/$n)/((1+$r/$n)**($n*$t)-1)); /t/ and return log(1+1/( ($n*$pmt)/($r*$L)-1 ))/($n*log(1+$r/$n)); } } #END: loan_payment =head3 union union( $L1, $L2, ... ) Return the list of (string) elements which appear in any of the given arrays. Objects are stringified, and the string values are returned. This may be upgraded to be smarter someday. =cut #BEGIN: union, 1 line sub union { my %x; for (@_) { next unless @$_; undef(@x{@$_}) } keys %x } =head3 intersection intersection( $L1, $L2, ... ) Return the list of (string) elements which appear in all of the given arrays. Objects are stringified, and the string values are returned. This may be upgraded to be smarter someday. =cut #BEGIN: intersection, 1 line sub intersection { my %x; for (@_) { my %i; $i{$_}++ or $x{$_}++ for @$_ } grep($x{$_} == @_, keys %x) } =head3 difference difference( $L1, $L2, ... ) Return the list of (string) elements which appear in C<$L1> but not in any of the subsequent arrays. Objects are stringified, and the string values are returned. This may be upgraded to be smarter someday. =cut #BEGIN: difference, 1 line sub difference { my %x; undef $x{$_} for @{+shift}; for (@_) { delete($x{$_}) for @$_ } keys %x } #----------------------------------------------------------------- $EXPORT_TAGS{list} = [qw/find_index find_index_with_memory first first_pos partition even_positions odd_positions suggestion_sort unique lex_sort flatten transposed cartesian natural_sort natural_cmp binary_search bucketize /]; #----------------------------------------------------------------- =head2 :list - List Utilities =head3 binary_search(&@) binary_search { $_ > 4 } @sorted_nums; binary_search \&f, @sorted_nums; Implements a binary search. Second argument must be an array (not a list) and must be sorted. Returns the index of the first element for which the function C<&f> returns true. Returns C if there is no such element. Function must return true for all elements larger than desired element. To search for a particular element, the following must be done: my $i = binary_search { $_ >= 4 } @sorted_nums; $i = undef unless $sorted_nums[$i] == 4; =cut #BEGIN: binary_search sub binary_search(&\@) { my ($f, $x) = @_; local $_; return 0 if &$f($_ = $$x[0]); return unless &$f($_ = $$x[-1]); my ($a, $b, $i) = (0, $#{$x}, int($#{$x}/2)); while ($i != $a) { &$f($_ = $$x[$i]) ? ($b = $i) : ($a = $i); } continue { $i = $a + int(($b-$a)/2) } # Note: $i = int(($a+$b)/2) has possible overflow issues since a+b may be large return $b; } #END: binary_search =head3 natural_sort A "fast, flexible, stable sort" that sorts strings naturally (that is, numerical substrings are compared as numbers). Code lifted from tye on perlmonks: http://www.perlmonks.org/?node_id=442285 Limitations: http://www.perlmonks.org/?node_id=483466 It doesn't "properly" sort negative numbers, non-fixed decimal values, nor integers larger than 2^32-1. =cut #BEGIN: natural_sort sub natural_sort { @_[ map { unpack "N", substr($_,-4) } sort map { my $key = $_[$_]; $key =~ s[(\d+)][ pack "N", $1 ]ge; $key . pack "N", $_ } 0..$#_ ]; } #END: natural_sort =head3 natural_cmp A fast, flexible, stable comparator that sorts strings naturally (that is, numerical substrings are compared as numbers). Code lifted from tye on perlmonks: http://www.perlmonks.org/?node_id=442285 Limitations: http://www.perlmonks.org/?node_id=483466 It doesn't "properly" sort negative numbers, non-fixed decimal values, nor integers larger than 2^32-1. =cut #BEGIN: natural_cmp sub natural_cmp { my ($x,$y) = map { my $key = $_; $key =~ s[(\d+)][ pack "N", $1 ]ge; $key } @_; $x cmp $y; } #END: natural_cmp =head3 cartesian cartesian \@list1, \@list2, ... cartesian $n1, $n2, ... Form the cartesian product of the elements in the lists. That is, all lists of the form C<[ $e1, $e2, ... ]> where C<$e1> comes from C<@list1>, and so on. This function returns an array reference in scalar context, and a list in list context. In the second form, the lists C<[1..$n1]>, C<[1..$n2]>, ... will be constructed, and the cartesian product of those lists will be computed. Note however, that the two forms can not be combined, you must either provide only arrays or only numbers. =cut #BEGIN: cartesian { my $_X = sub { my ($A, $b) = @_; my @X; $b = [1..$b] unless ref($b); for my $a (@$A) { push @X, [@$a, $_] for @$b; } return \@X; }; sub cartesian { return wantarray ? () : [] unless @_; my $X = [map [$_], @{ref($_[0]) ? shift() : [1..shift]}]; $X = $_X->($X, $_) for @_; wantarray ? @$X : $X; } } #END: cartesian =head3 transposed transposed \@LoL Transpose the (possibly non-regular) list of lists C<@LoL>. Returns a new list reference containing the objects in C<@LoL>. =cut #BEGIN: transposed sub transposed { my $M = shift; my $N = []; my $i = 0; for (@$M) { $$N[$_][$i] = $$M[$i][$_] for 0..$#{$_}; } continue { $i++ } return $N; } #END: transposed =head3 flatten flatten @LoLoLoL Will recursively run through each element of the input list and will return all components as a single large list. Lists may be arbitrarily nested and any objects which are not perl ARRAY's will be considered plain elements. The expansion is done depth-first. Returns a reference in scalar context, and the list of elements in list context. Example: @y = flatten [1, 2, 3], [4, 5], [[6, 7], 8, 9]; say "Hooray!" if "@y" eq "1 2 3 4 5 6 7 8 9"; =cut #BEGIN: flatten sub flatten { my @x; for (@_) { if ('ARRAY' eq ref) { push @x, flatten( @$_ ) } else { push @x, $_ } } return wantarray ? @x : \@x; } #END: flatten =head3 find_index find_index \&f, \@array find_index { BLOCK } \@array find_index { BLOCK } \@array, $start, $stop, $step May be called with either a function or a block as the first argument. The function will then begin at C<$start> (or zero) and then step by C<$step> (or 1) until we reach C<$stop> (or the end of the array). C<$_> will be set to the current array entry which will also be passed to the function as its only argument. Thus you may use either C<$_> or C<$_[0]> within your function. C<$start> may be greater then C<$stop> in which case we will proceed backwards. In all cases the sign of C<$d> will be adjusted if necessary so that we finish in finite time. =cut =head3 find_index_with_memory find_index_with_memory \&f, \@array find_index_with_memory { BLOCK } \@array find_index_with_memory { BLOCK } \@array, $start, $stop, $step May be called with either a function or a block as the first argument. The function will then begin at C<$start> (or zero) and then step by C<$step> (or 1) until we reach C<$stop> (or the end of the array). The function will set the caller's C<$a> to the previous array entry and C<$b> to the current array entry and will also pass the two entries to the function as its only arguments. Thus you may use either C<$a, $b> or C<$_[0], $_[1]> as the previous and current entries respectively. C<$start> may be greater then C<$stop> in which case we will proceed backwards. In all cases the sign of C<$d> will be adjusted if necessary so that we finish in finite time. =cut #BEGIN: find_index sub find_index(&@) { local $_; my ($f, $A, $i, $n, $d) = @_; $i = 0 unless defined $i; $n = $#{$A} unless defined $n; $d ||= 1; # May not be zero! $d = -$d if $d*($n-$i) < 0; if ($i < $n) { while ($i <= $n) { return $i if &$f($_ = $$A[$i]); $i += $d } } else { while ($i >= $n) { return $i if &$f($_ = $$A[$i]); $i += $d } } return; } #END: find_index # local ($a, $b); # $a = $$A[$i]; $i += $d; # if ($i < $n) { while ($i <= $n) { return $i if &$f($a, $b = $$A[$i]); $a = $b; $i += $d } } # else { while ($i >= $n) { return $i if &$f($a, $b = $$A[$i]); $a = $b; $i += $d } } #BEGIN: find_index_with_memory sub find_index_with_memory(&@) { my ($f, $A, $i, $n, $d) = @_; $i = 0 unless defined $i; $n = $#{$A} unless defined $n; $d ||= 1; # May not be zero! $d = -$d if $d*($n-$i) < 0; # Ah, glorious Perl! no strict 'refs'; no warnings 'once'; my $caller = caller; local(*{$caller."::a"}) = \my $a; local(*{$caller."::b"}) = \my $b; $a = $$A[$i]; $i += $d; if ($i < $n) { while ($i <= $n) { return $i if &$f($a, $b = $$A[$i]); $a = $b; $i += $d } } else { while ($i >= $n) { return $i if &$f($a, $b = $$A[$i]); $a = $b; $i += $d } } return; } #END: find_index_with_memory =head3 first See also: List::Util first first \&sub, @list # if @list is not list of arrays first { block } @list # if @list is not list of arrays first { block } \@list first { block } \@list, $start_pos Return the first item of C<@list> for which the code returns true. Code may be either a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. You may pass C<@list> by reference (which means that you must pass it by reference if it contains an array reference in its first entry). If you pass C<@list> by reference and provide a third argument, then the tird argument will be taken to be the first position that should be checked. =cut =head3 first_pos See also: List::MoreUtils first_index first_pos \&sub, @list first_pos { block } @list first_pos { block } \@list, $start_pos Return the index of the first item of C<@list> for which the code returns true. Code may be either a subroutine reference or a code block. C<$_> will be set to each list entry and will also be passed in as the first (and only) argument. You may pass C<@list> by reference (which means that you must pass it by reference if it contains an array reference in its first entry). If you pass C<@list> by reference and provide a third argument, then the tird argument will be taken to be the first position that should be checked. In this case the returned index will still correspond correctly to a position in C<@list>. =cut #BEGIN: first sub first(&@) { my $f = shift; if (ref $_[0] eq 'ARRAY') { if (@_ > 1) { for (@{$_[0]}[$_[1]..$#{$_[0]}]) { return $_ if &$f($_) } } else { for (@{$_[0]}) { return $_ if &$f($_) } } } else { for (@_) { return $_ if &$f($_) } } undef } #END: first #BEGIN: first_pos sub first_pos(&@) { my $f = shift; if (ref $_[0] eq 'ARRAY') { if (@_ > 1) { for my $i ($_[1]..$#{$_[0]}) { return $i if &$f(local $_ = $_[0][$i]) } } else { for my $i (0..$#{$_[0]}) { return $i if &$f(local $_ = $_[0][$i]) } } } else { for my $i (0..$#_) { return $i if &$f(local $_ = $_[$i]) } } undef } #END: first_pos =head3 bucketize my %buckets = bucketize { block } @list; my %buckets = bucketize \&tagger, @list; my $buckets = bucketize \&tagger, @list; Partition items into buckets given a generic tagger. Returns hash ref in scalar context. Tagger should accept a single argument (or use C<$_>) and should return a tag indicating the bucket to place the item in. Function is called in list context so that the following works as expected: %by_file_type = bucketize { /\.([^\.]+)$/ } @images; Also note that values are given as bound aliases, so they can also be "cleverly" modified: # ("foo-bar", "foo-baz", "bip-bop") # becomes: ( foo => ["bar","baz"], bip => ["bop"] ) my %buckets = bucketize { s/^([^-]+)-//; $1 } @x; =cut #BEGIN: bucketize sub bucketize(&@) { my ($f,%h) = (shift); for (@_) { my ($key) = $f->($_); push @{$h{$key}}, $_ } return wantarray ? %h : \%h; } #END: bucketize =head3 partition See also: List::MoreUtils part ($true, $false) = partition { block } @list ($true, $false) = partition \&test_func, @list Partitions a list into two lists based on the truth value of a subroutine or block. The return value is two array references, the first of which is the elements of the original list for which the function returned true, and the second are those elements for which the function returned false. =cut #BEGIN: partition sub partition(&@) { my ($f,@a,@b) = (shift); for (@_) { $f->($_) ? push(@a,$_) : push(@b,$_) } return (\@a, \@b) } #END: partition =head3 even_positions @list_2 = even_positions @list_1; @list_2 = even_positions \@list_1; Returns the elements of the list that have even indices. Argument may be list or arrayref, always returns a list of values. =cut =head3 odd_positions @list_2 = odd_positions @list_1; @list_2 = odd_positions \@list_1; Returns the elements of the list that have even indices. Argument may be list or arrayref, always returns a list of values. =cut #BEGIN: even_positions sub even_positions { return even_positions(\@_) if @_ > 1; return @_ if @_ == 0 or (@_ == 1 and ref($_[0]) ne 'ARRAY'); my $x = shift; @$x[map 2*$_, 0..$#{$x}/2]; } #END: even_positions #BEGIN: odd_positions sub odd_positions { return odd_positions(\@_) if @_ > 1; return () if @_ == 0 or (@_ == 1 and ref($_[0]) ne 'ARRAY'); my $x = shift; @$x[map 2*$_-1, 1..@$x/2]; } #END: odd_positions =head3 suggestion_sort suggestion_sort \@list, \@preferred Returns @list sorted by the order of the objects in @preferred. All elements are matched as strings and elements of @list that are not in @preferred are placed at the end of the resulting list in a way that preserves their original ordering within @list. Notes: Undefined entries will be ignored. Only the first appearence of an element in the C<@preferred> list will be considered. Repetions in C<@list> will be reduced to a single occurrence. =cut #BEGIN: suggestion_sort sub suggestion_sort { my ($toSort, $Suggestion) = @_; my ($i, %sugg, @sorted) = 1; for (@$Suggestion) { $sugg{$_} ||= $i++ }; for (@$toSort) { if (defined $sugg{$_}) { $sorted[$sugg{$_}] = $_ } else { $sorted[$i++] = $_ } } grep defined, @sorted; } #END: suggestion_sort =head3 unique See also: List::MoreUtils uniq my @u = unique @list; my @u = unique \@list; my $h = unique @list; my $h = unique \@list; Takes a list (or reference to an array) and returns a list of unique (up to stringification) objects in apparently random order. In scalar context, a histogram (hash with objects as keys, and counts as values) is returned. Note: List::MoreUtils::uniq preserves the original order of the elements. =cut #BEGIN: unique sub unique { my %x; my $A = (@_ == 1 and ref($_[0]) eq 'ARRAY') ? $_[0] : \@_; return unless @$A; if (wantarray) { # be more efficient when we only care about existence undef(@x{@$A}); return keys %x; } else { $x{$_}++ for @$A; return \%x; } } #END: unique =head3 lex_sort lex_sort @list_of_lists lex_sort sub{ }, @list_of_lists Sort the lists lexicographically element-wise. The sorting subroutine may use the package variables C<$a> and C<$b> or may take two arguments, but need only worry about element-wise comparison. Example: lex_sort( [qw/abc ac a/], [qw/abc ab c d/], [qw/x y z/], [qw/abc ab c/] ) # gives: # ( [qw/abc ab c/], # [qw/abc ab c d/], # [qw/abc ac a/], # [qw/x y z/] # ) Similarly with numerical data using: C=E $b }> =cut #BEGIN: lex_sort sub lex_sort { return unless @_; my $f = (ref($_[0]) eq 'CODE') ? shift() : sub{ $_[0] cmp $_[1] }; # Ah, glorious Perl! no strict 'refs'; no warnings 'once'; my $caller = caller; local(*{$caller."::a"}) = \my $a; local(*{$caller."::b"}) = \my $b; my ($rlex,$x); $rlex = sub($$){ my ($A,$B) = @_; return -1 if @$A == 0 and @$B > 0; return 1 if @$A > 0 and @$B == 0; $x = $f->($a = $$A[0], $b = $$B[0]); return $x if $x != 0; return $rlex->([@$A[1..$#{$A}]], [@$B[1..$#{$B}]]); }; sort $rlex @_; } #END: lex_sort #----------------------------------------------------------------- $EXPORT_TAGS{patterns} = [qw/$_re_int $_re_num $_re_exp $_re_wrd is_int is_num is_float is_word readonly like_array like_hash like_scalar $_re_image_ext is_image_file/]; #----------------------------------------------------------------- =head2 :patterns - Tests and Patterns =head3 $_re_int Pattern which matches an integer expression. Beware, this pattern allows whitespace in the string which perl may not allow when interpreting strings as numbers. You may need to remove all whitespace from strings which match this pattern. =cut =head3 $_re_num Pattern which matches an floating-point expression. Beware, this pattern allows whitespace in the string which perl may not allow when interpreting strings as numbers. You may need to remove all whitespace from strings which match this pattern. =cut =head3 $_re_exp Pattern which matches an exponent part (Ex: S<2.3 e -10>) of a floating-point expression. Beware, this pattern allows whitespace in the string which perl may not allow when interpreting strings as numbers. You may need to remove all whitespace from strings which match this pattern. =cut =head3 $_re_wrd Pattern which matches safe "word-like" data. This pattern does not match whitespace and most punctuation but does allow hyphens "-" and underscores. =cut #BEGIN: $_re_int, 1 line our $_re_int = '[\+\-]?\s*\d+'; #BEGIN: $_re_exp, 1 line; depends: $_re_int our $_re_exp = '[eE]\s*'.$_re_int; #BEGIN: $_re_num, 1 line; depends: $_re_exp our $_re_num = '[\+\-]?\s*(?:\d*\.\d+|\d+\.?\d*)(?:'.$_re_exp.')?'; #BEGIN: $_re_wrd, 1 line our $_re_wrd = '[\w\-]+'; =head3 is_int Returns a true value if the argument looks like an integer expression. If no argument is provided, C<$_> is examined. Beware, this subroutine allows whitespace in the string which perl may not allow when interpreting strings as numbers. You may need to remove all whitespace from strings for which this subroutine returns true. =cut =head3 is_num Returns a true value if the argument looks like a floating-point (or integer) expression. If no argument is provided, C<$_> is examined. Beware, this subroutine allows whitespace in the string which perl may not allow when interpreting strings as numbers. You may need to remove all whitespace from strings for which this subroutine returns true. =cut =head3 is_float Returns a true value if the argument looks like a floating-point (or integer) expression. If no argument is provided, C<$_> is examined. Beware, this subroutine allows whitespace in the string which perl may not allow when interpreting strings as numbers. You may need to remove all whitespace from strings for which this subroutine returns true. =cut =head3 is_word Returns a true value if the argument looks like a word. If no argument is provided, C<$_> is examined. Words do not have spaces and do not typically have punctuation, though hyphens "-" and underscores are allowed. =cut #BEGIN: is_num, 1 line; depends: str $_re_num sub is_num { @_ ? str($_[0]) =~ /^\s*$_re_num\s*$/o : str($_) =~ /^\s*$_re_num\s*$/o } #BEGIN: is_float, 1 line; depends: str $_re_num sub is_float { @_ ? str($_[0]) =~ /^\s*$_re_num\s*$/o : str($_) =~ /^\s*$_re_num\s*$/o } #BEGIN: is_int, 1 line; depends: str $_re_int sub is_int { @_ ? str($_[0]) =~ /^\s*$_re_int\s*$/o : str($_) =~ /^\s*$_re_int\s*$/o } #BEGIN: is_word, 1 line; depends: str $_re_wrd sub is_word { @_ ? str($_[0]) =~ /^$_re_wrd$/o : str($_) =~ /^$_re_wrd$/o } =head3 $_re_image_ext Pattern which matches image-type filename extensions. The list of extensions matched (case insensitive) are: BMP CMYK CMYKA DCM DCX DIB DPS DPX EPI EPS EPS2 EPS3 EPSF EPSI EPT FAX FITS FPX G3 GIF GIF87 GRAY ICB ICM ICO ICON IPTC JBG JBIG JP2 JPC JPEG JPG MAP MIFF MNG MONO MPC MTV MVG OTB P7 PAL PALM PBM PCD PCDS PCL PCT PCX PDB PGM PICON PICT PIX PLASMA PNG PNM PPM PSD PTIF RAS RGB RGBA RLA RLE ROSE SGI SUN SVG TGA TIF TIFF UYVY VDA VICAR VID VIFF VST WBMP X XBM XC XCF XPM XV XWD YUV =cut #BEGIN: $_re_image_ext, 1 line our $_re_image_ext = '(?i:bmp|cmyk|cmyka|dcm|dcx|dib|dps|dpx|epi|eps|eps2|eps3|epsf|epsi|ept|fax|fits|fpx|g3|gif|gif87|gray|icb|icm|ico|icon|iptc|jbg|jbig|jp2|jpc|jpeg|jpg|map|miff|mng|mono|mpc|mtv|mvg|otb|p7|pal|palm|pbm|pcd|pcds|pcl|pct|pcx|pdb|pgm|picon|pict|pix|plasma|png|pnm|ppm|psd|ptif|ras|rgb|rgba|rla|rle|rose|sgi|sun|svg|tga|tif|tiff|uyvy|vda|vicar|vid|viff|vst|wbmp|x|xbm|xc|xcf|xpm|xv|xwd|yuv)'; =head3 is_image_file Returns a true value if the argument looks like an image file. If no argument is provided, C<$_> is examined. The ist of extensions matched (case insensitive) are: BMP CMYK CMYKA DCM DCX DIB DPS DPX EPI EPS EPS2 EPS3 EPSF EPSI EPT FAX FITS FPX G3 GIF GIF87 GRAY ICB ICM ICO ICON IPTC JBG JBIG JP2 JPC JPEG JPG MAP MIFF MNG MONO MPC MTV MVG OTB P7 PAL PALM PBM PCD PCDS PCL PCT PCX PDB PGM PICON PICT PIX PLASMA PNG PNM PPM PSD PTIF RAS RGB RGBA RLA RLE ROSE SGI SUN SVG TGA TIF TIFF UYVY VDA VICAR VID VIFF VST WBMP X XBM XC XCF XPM XV XWD YUV =cut #BEGIN: is_image_file; depends: str $_re_image_ext sub is_image_file { my $pat = reverse substr($_re_image_ext, 4, -1); my $str = reverse( @_ ? str($_[0]) : str($_) ); $str =~ /^(?:$pat)\./oi; } #END: is_image_file =head3 readonly Returns true if scalar argument is readonly. (Taken from Scalar::Util.) =cut #BEGIN: readonly # Taken straight from Scalar::Util sub readonly { return 0 if tied($_[0]) || (ref(\($_[0])) ne "SCALAR"); local($@, $SIG{__DIE__}, $SIG{__WARN__}); my $tmp = $_[0]; !eval { $_[0] = $tmp; 1 }; } #END: readonly =head3 like_array Returns true if the object can behave like an array. (This is just a nicer way to call UNIVERSAL::isa) =cut =head3 like_hash Returns true if the object can behave like a hash. (This is just a nicer way to call UNIVERSAL::isa) =cut =head3 like_scalar Returns true if the object can behave like a scalar. (This is just a nicer way to call UNIVERSAL::isa) =cut #BEGIN: like_array sub like_array($) { require UNIVERSAL; UNIVERSAL::isa($_[0],'ARRAY'); } #END: like_array #BEGIN: like_hash sub like_hash($) { require UNIVERSAL; UNIVERSAL::isa($_[0],'HASH'); } #END: like_hash #BEGIN: like_scalar sub like_scalar($) { require UNIVERSAL; UNIVERSAL::isa($_[0],'SCALAR'); } #END: like_scalar #----------------------------------------------------------------- $EXPORT_TAGS{parse} = [qw/str2hash unformat parse_user_agent/]; #----------------------------------------------------------------- =head2 :parse - General Interpreters / Parsers =head3 parse_user_agent my $hashref = parse_user_agent( $string ); my %hash = parse_user_agent( $string ); Given a user-agent string returns a hash containing the following fields. Fields which can not be determined are left undefined. =over 4 =item generic_os Returns the generic operating system type: Windows, Mac, OS2, Linux, UNIX =item os Returns the specific operating system type: Windoiws Vista, Windows Server 2003, Windows XP, Windows 2000, Debian, ... =item type One of: browser, textbrowser, bot, downloader, mobile Note: For this field, we try to make our best guess at which class the agent string fits into. =item program Quasi-canonicalized program name: Internet Explorer, Netscape, Mozilla, Firefox, wget, ... =item version Our best guess at the program version =item engine The Browser's rendering engine: Gecko, KHTML, MSHTML, Presto (opera), WebCore (apple), custom (other custom engines) =item engine-version The version of the rendering engine =item user-agent The unmodified user-agent string =item obsolete If true, the agent appears to be an obsolete web browser =back =cut #BEGIN: parse_user_agent, DEPENDS: fappend { my %kw = ( sie => "Siemens", sonyericsson => "SonyEricsson", ericsson => "SonyEricsson", blackberry => "BlackBerry", mot => "Motorola", palmos => "Palm OS", symbian => "Symbian OS", j2me => "Java Platform Micro Edition", beos => "BeOS", openvms => "OpenVMS", irix => "IRIX", sunos => "Solaris", seamonkey => "SeaMonkey", iceape => "IceApe", netscape6 => "Netscape", mdk => "Mandrake", "mac os x" => "Mac OS X", "america online browser" => "AOL Browser", ); sub parse_user_agent { return wantarray ? (qw/type bot user-agent -/) : {qw/type bot user-agent -/} if $_[0] eq '-'; my %ua = ("user-agent" => $_[0]); my $guessed_type; local $_ = lc shift; study; # OPERATING SYSTEM #----------------- if (/windows nt 6\.0/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows Vista'; } elsif (/windows nt 5\.2/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows Server 2003'; } elsif (/windows nt 5\.1/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows XP'; } elsif (/windows nt 5\.0/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows 2000'; $ua{obsolete} = 1; } elsif (/windows nt /) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows NT UNKNOWN'; $ua{obsolete} = 1; } elsif (/windows nt|winnt/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows NT'; $ua{obsolete} = 1; } elsif (/windows ce/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows CE'; $ua{type} = 'mobile'; } elsif (/windows 98|win98/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows 98'; $ua{obsolete} = 1; } elsif (/windows 95|win95|win32/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows 95'; $ua{obsolete} = 1; } elsif (/windows (16|3)|win16|16bit/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows 3.x'; $ua{obsolete} = 1; } elsif (/windows/) { $ua{generic_os} = 'Windows'; $ua{os} = 'Windows UNKNOWN'; $ua{obsolete} = 1; } elsif (/OS\/2/) { $ua{generic_os} = 'OS/2'; $ua{os} = 'OS/2'; $ua{obsolete} = 1; } elsif (/linux/) { $ua{generic_os} = "Linux"; if (/\b(gentoo|debian|fedora|ubuntu|redhat|slackware|mdk|kanotix|suse|lycoris|knoppix|centos)\b/) { $ua{os} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; } } elsif (/\b(freebsd|openbsd|netbsd)\b/) { $ua{generic_os} = "BSD"; $ua{os} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; } elsif (/(macintosh|mac os x|mac os|macos|mac_\w+|mc68|\bmac\b)/) { $ua{generic_os} = "Mac OS"; $ua{os} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; $ua{obsolete} = 1 unless $ua{os} eq 'Mac OS X'; } elsif (/\b(blackberry|acer|philips|panasonic|alcatel|(?:sony)?ericsson|samsung|sie|mot|nokia|palmos|symbian)[ \-\/]?([\w.]*)\b/) { $ua{generic_os} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; $ua{os} = "$ua{generic_os} $2"; $ua{type} = 'mobile'; } elsif (/\b(beos|openvms|irix|amiga|sunos|j2me)\b/) { $ua{generic_os} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; $ua{os} = $ua{generic_os}; } elsif (/X11/) { $ua{generic_os} = "UNIX"; } # BROWSERS #--------- # Gecko-based browsers (netscape 6 and later) if (m!\bgecko/(\d+).*(seamonkey|firefox|bonecho|minefield|firebird|phoenix|iceape|iceweasel|camino|netscape6?|epiphany|galeon|flock|minimo|k\-meleon|k\-ninja|kazehakase)/([\d.]+)!) { $ua{program} = (exists $kw{$2}) ? $kw{$2} : ucfirst $2; $ua{version} = $3; $ua{type} = 'browser'; $ua{obsolete} = 1 if $1 < 19990000; } if (m!microsoft internet explorer!) { $ua{program} = 'Internet Explorer'; $ua{version} = "1.0"; $ua{type} = 'browser'; $ua{obsolete} = 1; } if (m!\bMSIE ([\d.]+)!) { $ua{program} = 'Internet Explorer'; $ua{version} = $1; $ua{type} = 'browser'; $ua{obsolete} = 1 if $1 < 6; } # Other non-obsolete browsers if (m!\b(shiira|omniweb|sunrisebrowser|icab|deskbrowse|safari|opera|konqueror|dillo)[ /]([\d.]+)!) { $ua{program} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; $ua{version} = $2; $ua{type} = 'browser'; } if (m!\b(lynx|e?links)[ /\(]+([\d.]+)!) { $ua{program} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; $ua{version} = $2; $ua{type} = 'textbrowser'; $ua{engine} = 'custom'; } if (m!(america online browser)[ /]([\d.]+)!) { $ua{program} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; $ua{version} = $2; $ua{type} = 'browser'; $ua{obsolete} = 1; $ua{engine} = 'custom'; } # Bots with versions if (m!\b(w3c\-checklink|googlebot(?:\-image)?|gigabot|w3c_(?:css_)validator(?:_[a-z]+)?|msnbot|cfetch|voyager|becomebot|grub-client|scooter|sbider|exabot)[/ \-]([\d.]*)!) { $ua{program} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; $ua{version} = $2; $ua{type} = 'bot'; } # downloaders if (m!\b(curl|wget|svn|apt\-http)/([\d.]+)!) { $ua{program} = (exists $kw{$1}) ? $kw{$1} : $1; $ua{version} = $2; $ua{type} = 'downloader'; } # Bots without versions if (m!\b(ia_archiver|ask\s+jeeves|baiduspider|gamespy|yahoo|looksmart\.net|slurp|http://[^/\s]+|\S*[Bb][Oo][Tt]\b\S*)\b! and !$ua{program}) { $ua{program} = (exists $kw{$1}) ? $kw{$1} : ucfirst $1; $ua{type} = 'bot'; } # ENGINES #-------- if (m!gecko/([0-9]+)!) { $ua{engine} = 'Gecko'; $ua{"engine-version"} = $1; delete $ua{obsolete} } elsif (m!khtml/([\d.]+)!) { $ua{engine} = 'KHTML'; $ua{"engine-version"} = $1; delete $ua{obsolete} } elsif (m!applewebkit/([\d.]+)!) { $ua{engine} = 'WebKit'; $ua{"engine-version"} = $1; delete $ua{obsolete} } if (exists($ua{program}) and $ua{program} eq 'Opera' and exists($ua{version})) { if ($ua{version} < 7) { $ua{engine} = 'Elektra'; $ua{obsolete} = 1 } else { $ua{engine} = 'Presto'; delete $ua{obsolete} } } if (exists($ua{program}) and $ua{program} eq 'Internet Explorer' and exists($ua{version})) { if ($ua{version} < 5) { $ua{engine} = 'Trident'; $ua{"engine-version"} = 1; $ua{obsolete} = 1 } elsif ($ua{version} < 5.5) { $ua{engine} = 'Trident'; $ua{"engine-version"} = 2; $ua{obsolete} = 1 } elsif ($ua{version} < 6) { $ua{engine} = 'Trident'; $ua{"engine-version"} = 3; $ua{obsolete} = 1 } elsif ($ua{version} < 7) { $ua{engine} = 'Trident'; $ua{"engine-version"} = 4; $ua{obsolete} = 1 } elsif ($ua{version} =~ /^7/) { $ua{engine} = 'Trident'; $ua{"engine-version"} = 5; $ua{obsolete} = 1 } else { $ua{engine} = 'Trident'; } } if (exists($ua{program}) and $ua{program} eq 'Konqueror' and exists($ua{version}) and !exists($ua{engine})) { $ua{engine} = 'KHTML'; $ua{"engine-version"} = $ua{version}; delete $ua{obsolete}; } # Old mozilla versions if (m|mozilla/([1234]\.\d+)|) { $ua{obsolete} = 1; if(!exists($ua{type}) and !exists($ua{program}) and (exists($ua{os}) or exists($ua{generic_os}))) { $ua{type} = 'browser'; $ua{program} = "Netscape"; $ua{version} = $1; $ua{engine} = "Mozilla"; $ua{"engine-version"} = $1; } } if (!exists($ua{type})) { if (exists($ua{engine})) { $guessed_type = $ua{type} = 'browser'; } elsif (exists($ua{generic_os})) { $guessed_type = $ua{type} = 'browser'; } else { $guessed_type = $ua{type} = 'bot'; } } if ((exists($ua{type}) and $ua{type} eq 'browser' and !exists($ua{engine})) or ($guessed_type) ) { require YAML; fappend("/tmp/unrecognized_user-agent_strings", YAML::Dump(\%ua)); } return wantarray ? %ua : \%ua; } } #END: parse_user_agent =head3 str2hash Parse a string into a hash using Text::Balanced::extract_delimited. This function recognises perl 5 style hashes as well as the basic perl 6 adverbial form. Items missing a value will set the corresponding hash value to true. Example: str2hash 'foo, bar => "Hmmm, a comma", :baz<23>, :!bip, quxx => Spaces are fine' Parses to: { foo => 1, bar => 'Hmmm, a comma', baz => 23, bip => 0, quxx => 'Spaces are fine', } Unfortunately, the adverbial form will behave strangely with embedded commas: str2hash ':baz' becomes { ':baz 1, 'how odd>' => 1, } =cut #BEGIN: str2hash; depends SPLIT { my $comma_splitter = SPLIT(qr/\s*,\s*/); my $pair_splitter = SPLIT(qr/\s*=>\s*/); sub str2hash { return unless defined $_[0]; my %o; for ($comma_splitter->($_[0])) { my ($x,$y) = $pair_splitter->($_); if (defined $y) { $o{$x} = ($y =~ /^([`'"])/ and substr($y,-1) eq $1) ? substr($y, 1, -1) : $y; } elsif (':!' eq substr($x,0,2)) { $o{substr($x,2)} = 0; } elsif (':' eq substr($x,0,1)) { if ($x =~ /^:(\w+)<(.*)>$/) { $o{$1} = $2; } else { $o{substr($x,1)} = 1; } } else { $o{$x} = 1; } } return %o; } } #END: str2hash =head3 unformat WARNING: still quite experimental! unformat $fmt, @strings unformat \%options, @strings Attempts to reverse the actions of L or other formatted output (for instance date formats or apache logs). The return value is a list of reports (see below) unless these was only a single input string to parse in which case C may be safely called in scalar context. =over 4 =item format The format string =item as Specify how to return the findings. By default just a list of matched components is returned however, we can also return the following reports: =over 4 =item hash A hash mapping conversions (or their corresponding names, if given) to their corresponding strings. BEWARE KEY COLLISION { ~conv, str, ~conv, str, ... } =item list The default, the return values are each an array of strings that could have been used to generate one of the input strings. [ str, str, ... ] =item list_list Each return value is an array of two arrays the first of which is the list of strings returned by the "list" option. The second is the conversion instructions giving each corresponding string. [ [ str, str, ... ], [ conv, conv, ... ] ] Note, in this case, each list of conversions is an array reference pointing to the same array, so altering one will alter them all. =item pairs Each return value is a flat array of pairs: [ conv, str, conv, str, ... ] =item regex Return a regular expression that will match the given pattern. In scalar context just the list is returned. In list context the conversions will be returned also. ( regex, conv, conv, ... ) =item tuples Each return value is an array of arrays each with two elements. First the conversion instruction and second the string that it matched. [ [conv, str], [conv, str], ... ] =back In all ases except for the hash, the conversion instructions are the precise ones given in the format string, including any formatting options. For the hash however, the conversion are the simplified two-character labels (E.g. "%s" instead of "% 35s"). Additionally, the escape '%%' is treated as a string literal '%' and will not appear in any of the report types. A "formatted percent" (for instance "%-05%") will pass through the conversions and will appear in the reports if you define a special conversion for it (since we define no standard conversion for this case). =item conversion_aliases A hash of aliases between conversion types. Use this to map your custom conversion (for instance from the date formatting commands) to standard perl conversions. Conversions of the form C<( a =E "s" )> will preserve formatting options while aliaes that start with '%' C<( Y =E "%04d" )> will use the formatting options "04" rather than any options that may have appeared before the "Y". (Which would presumedly cause "0035" to parse to 35.) Conversion aliases are searched before conversions or special conversions. Once can also add aliases that include the conversion options to override other behavior C<( '02Y' =E '%02d', Y =E 's' )>. =item special_conversions A hash of conversions as in the L option but these conversions will be added to the list of standard conversions and will be consulted first should a standard conversion type appear in this listing. =item conversions A hash of conversions C<( type =E action )>. Each "type" is simply the conversion type (E.g. the "s" in "%- 10s") and each action is a pattern that CAPTURES (preferrably non-greedily) the conversion type (for instance C<(s =E '(.*?)')>). The action could also be a subroutine which accepts two arguments. First the formatting options and second the conversion type. For instance, a sub action for the "f" conversion type might convert its arguments C<(".1", "f")> into the pattern '(\d+\.\d{1})'. Be sure that all of your conversions produce a pattern that captures exactly one substring. Specifying this option replaces the built-in conversions which attempt to reverse the sandard perl conversions listed in the L documentation. =item conversion_map If defined and a hash then the conversions in the above reports will be transformed by this hash. conversions will be first searched for in their full form (including formatting options) both with and without their leading '%', then searched for under only the converions type (both with and without the '%'). Anything not appearing in the conversion map will be treated normally as described above. =item conversion_pattern Default: '(%([^a-zA-Z%]*)([%a-zA-Z]))' Should capture three strings. The entire conversion pattern, any formatting options that may be present, and the conversion type. The default pattern captures single character conversions as well as the '%' escape ("%%"). See also the "Limitations" below. =back Limitations: format conversions are assumed to be one character long. That is, conversions like "%ld" will be interpreted as "%l". This can be fixed by altering the L but I don't have the need to be careful about it. If you code up a more careful parser and are willing to share, feel free to send it and I will add it in. Also, no locale information is considered. sprinf considers the "LC_NUMERIC" value to affect how numbers are formatted. We do not make such considerations here. =cut #BEGIN: unformat, depends: zip, max { my $s_like_sub_maker = sub { my $pat = shift; sub { local $_ = shift; return "($pat)" unless defined and length; return " {0,$1}($pat)" if /^ ?([1-9]\d*)$/; return "0{0,$1}($pat)" if /^0(\d+)$/; return "($pat) {0,$1}" if /^\- ?(\d+)$/; return "$1*($pat)" if /^( |0)?\*$/; return "($pat)$1*" if /^\-( |0)?\*$/; return; }; }; my $replace_null = sub { local $_ = shift; s/\0/$_[0]/g; $_; }; my $e_like_sub_maker = sub { my $pat = shift; my $simple = $s_like_sub_maker->($replace_null->($pat,"0,")); sub { local $_ = shift; my $ret = $simple->($_); return $ret if defined $ret; return "(".$replace_null->($pat,$1).")" if /^\.(\d+)$/; return " {0,$1}(".$replace_null->($pat,$2).")" if /^ ?([1-9]\d*)\.(\d+)$/; return "0{0,$1}(".$replace_null->($pat,$2).")" if /^0(\d+)\.(\d+)$/; return "(".$replace_null->($pat,$2).") {0,$1}" if /^\- ?(\d+)\.(\d+)$/; return "$1*(".$replace_null->($pat,$2).")" if /^( |0)?\*\.(\d+)$/; return "(".$replace_null->($pat,$2).")$1*" if /^\-( |0)?\*\.(\d+)$/; return; }; }; # XXX: Off by one error! Need $1-1 (since must have a digit), though it isn't that careful of a measurement anyway. my $d_like_sub_maker = sub { my $pat = shift; sub { local $_ = shift; return "(\\-?$pat)" unless defined and length; return " {0,$1}(\\-?$pat)" if /^([1-9]\d*)$/; return " {0,$1}(\\-?$pat)" if /^\+? +([1-9]\d*)$/; return " {0,$1}([+-]$pat)" if /^ *\+([1-9]\d*)$/; return "0{0,$1}(\\-?$pat)" if /^0(\d+)$/; return " 0{0,$1}(\\-?$pat)" if /^\+?(?: 0|0 ) *(\d+)$/; return "([+-]0{0,$1}$pat)" if /^(?: 0|0 ) *\+(\d+)$/; return "(\\-?$pat) {0,$1}" if /^\-(\d+)$/; return " (\\-?$pat) {0,$1}" if /^ *(?: \-|\- ) *(\d+)$/; return "([+-]$pat) {0,$1}" if /^(?:\-\+|\+\-)(\d+)$/; return " *(\\-?$pat)" if /^\*\s*$/; return "(\\-?$pat) *" if /^\-\*\s*$/; return "([+-]$pat) *" if /^(?:\-\+|\+\-)\*$/; return " *([+-]$pat)" if /^ *\+\*$/; return; }; }; # XXX: Will be incorrect for %.0f my $f_like_sub_maker = sub { my $pat = shift; my $simple = $d_like_sub_maker->($replace_null->($pat,"0,")); sub { local $_ = shift; my $ret = $simple->($_); return $ret if defined $ret; return "(\\-?".$replace_null->($pat,$1).")" if /^\.(\d+)$/; return " {0,$1}(\\-?".$replace_null->($pat,$2).")" if /^([1-9]\d*)\.(\d+)$/; return " {0,$1}(\\-?".$replace_null->($pat,$2).")" if /^\+? +([1-9]\d*)\.(\d+)$/; return " {0,$1}([+-]".$replace_null->($pat,$2).")" if /^ *\+([1-9]\d*)\.(\d+)$/; return "0{0,$1}(\\-?".$replace_null->($pat,$2).")" if /^0(\d+)\.(\d+)$/; return " 0{0,$1}(\\-?".$replace_null->($pat,$2).")" if /^\+?(?: 0|0 ) *(\d+)\.(\d+)$/; return "([+-]0{0,$1}".$replace_null->($pat,$2).")" if /^(?: 0|0 ) *\+(\d+)\.(\d+)$/; return "(\\-?".$replace_null->($pat,$2).") {0,$1}" if /^\-(\d+)\.(\d+)$/; return " (\\-?".$replace_null->($pat,$2).") {0,$1}" if /^ *(?: \-|\- ) *(\d+)\.(\d+)$/; return "([+-]".$replace_null->($pat,$2).") {0,$1}" if /^(?:\-\+|\+\-)(\d+)\.(\d+)$/; return " *(\\-?".$replace_null->($pat,$2).")" if /^\*\.(\d+)$/; return "(\\-?".$replace_null->($pat,$2).") *" if /^\-\*/; return "([+-]".$replace_null->($pat,$2).") *" if /^(?:\-\+|\+\-)\*$/; return " *([+-]".$replace_null->($pat,$2).")" if /^ *\+\*$/; return; }; }; my %conversions = ( s => $s_like_sub_maker->('.*?'), # XXX: length issues. Not sure how long these can be... b => $s_like_sub_maker->('[01]+?'), u => $s_like_sub_maker->('[0-9]+?'), o => $s_like_sub_maker->('[0-7]+?'), x => $s_like_sub_maker->('[0-9a-f]+?'), X => $s_like_sub_maker->('[0-9A-F]+?'), d => $d_like_sub_maker->('\d+'), e => $e_like_sub_maker->("\\-?\\d\\.\\d{\0}e[+-]\\d+"), E => $e_like_sub_maker->("\\-?\\d\\.\\d{\0}E[+-]\\d+"), f => $f_like_sub_maker->("\\d+\\.\\d{\0}"), g => $f_like_sub_maker->("\\d+\\.\\d{\0}(?:e[+-]\\d+)?"), # XXX: rather broken, but will probably work in the common case G => $f_like_sub_maker->("\\d+\\.\\d{\0}(?:E[+-]\\d+)?"), # XXX: rather broken, but will probably work in the common case ); @conversions{qw/i D U O F p/} = @conversions{qw/d d u o f x/}; my $DEBUG = 0; sub unformat { $DEBUG && (local $\ = "\n"); my %o; if (ref($_[0]) eq 'HASH') { %o = %{ shift() } } else { %o = ( format => shift ) } $o{conversion_pattern} = '(%([^a-zA-Z%]*)([%a-zA-Z]))' unless exists $o{conversion_pattern}; $o{conversions} = \%conversions unless exists $o{conversions}; my @format = split /$o{conversion_pattern}/, $o{format}; my $map = $o{conversion_map}; my $pat = ''; my @conv; while (@format) { my $oconv = shift @format; $DEBUG && print "Examining component: '$oconv'"; unless (substr($oconv,0,1) eq '%') { $pat .= quotemeta($oconv); $DEBUG && print " string literal -> pat = $pat"; next; } if ($oconv eq '%%') { # This is boring and is skipped $DEBUG && print " literal %"; $pat .= '%'; next; } my ($oopt, $otype) = splice(@format, 0, 2); my ($conv, $opt, $type) = ($oconv, $oopt, $otype); # Aliases for ($oopt.$otype, $otype) { next unless exists $o{conversion_aliases}{$_} and defined $o{conversion_aliases}{$_}; ($conv, $opt, $type) = substr($o{conversion_aliases}{$_},0,1) eq '%' ? ($o{conversion_aliases}{$_} =~ /$o{conversion_pattern}/) : ("%".$oopt.$o{conversion_aliases}{$_}, $oopt, $o{conversion_aliases}{$_}); $DEBUG && print " has alias $oconv -> $conv"; last; # stop when we get a hit. } # Conversions my $conversion_pattern; for (@o{qw/special_conversions conversions/}) { next unless exists $$_{$type} and defined $$_{$type}; if (ref($$_{$type}) eq 'CODE') { $conversion_pattern = $$_{$type}->($opt, $type); } elsif (!ref($$_{$type})) { $conversion_pattern = $$_{$type}; } else { die "Invalid conversion handler in "."unformat: '$type' => $$_{$type}"; } $DEBUG && print " converted '$type' -> '$conversion_pattern'"; last; } unless (defined $conversion_pattern) { die "Invalid conversion in "."unformat: '$oconv'" if $oconv eq $conv; die "Invalid conversion in "."unformat: '$oconv' (aliased to '$conv')"; } $pat .= $conversion_pattern; next if !$o{as} or $o{as} eq 'list'; my $flag; if ($map) { for ($oconv, substr($oconv, 1), '%'.$otype, $otype) { next unless exists $$map{$_}; $oconv = $$map{$_}; $flag = 1; last; } } if (!$flag and $o{as} and $o{as} eq 'hash') { $oconv = '%'.substr($oconv,-1); } push @conv, $oconv; } # DO IT! $DEBUG && print "FINAL PATTERN: ^$pat\$"; if ($o{as} and $o{as} eq 'regex') { return wantarray ? ($pat, @conv) : $pat; } my $match = qr/^$pat$/; my @res = map [$_ =~ $match], @_; # Generate the report if (!$o{as} or $o{as} eq 'list') { 1; # done, we'll just return res as it is. } elsif ($o{as} eq 'hash') { @res = map +{ @{$res[$_]} ? zip( \@conv, $res[$_] ) : () }, 0..$#res; } elsif ($o{as} eq 'list_list') { @res = map +( @{$res[$_]} ? [$res[$_], \@conv] : []), 0..$#res; } elsif ($o{as} eq 'pairs') { @res = map +( @{$res[$_]} ? [zip(\@conv, $res[$_])] : []), 0..$#res; } elsif ($o{as} eq 'tuples') { for my $a (0..$#res) { $res[$a] = [ @{$res[$a]} ? (map [$conv[$_], $res[$a][$_]], 0..max($#conv, $#{$res[$a]})) : () ]; } } else { die "Invalid report format for "."unformat: $o{as}"; } if (@_ == 1 and !wantarray) { return $res[0] } else { return @res } } } #END: unformat #----------------------------------------------------------------- $EXPORT_TAGS{canonicalize} = [qw/str replace_windows_characters strip_space sign nsign canonicalize_newlines canonicalize_newlines_copy canonicalize_timeword qbash stringify simple_range2list glob2regexp canonicalize_filename trim uri_rel2abs uri_rel2abs_fast length2pt nicef rtf2txt /]; #----------------------------------------------------------------- =head2 :canonicalize - Canonicalization =head3 rtf2txt rtf2txt( file => $filename_or_handle ) rtf2txt( string => $rtf_text ) rtf2txt( $existing_file ) rtf2txt( $rtf_text ) =cut #BEGIN: rtf2txt sub rtf2txt { require RTF::Tokenizer; unshift @_, (-e $_[0]) ? "file" : "string" if @_ == 1; my $tokenizer = RTF::Tokenizer->new( @_ ); my ($token_type, $argument, $parameter, $TEXT); my $level = 0; while (($token_type, $argument, $parameter) = $tokenizer->get_token()) { if ($token_type eq 'text') { $TEXT .= $argument if $level == 1; } elsif ($token_type eq 'control') { $TEXT .= "\n" if $argument eq 'par' and $level == 1; } elsif ($token_type eq 'group') { $level += $argument ? 1 : -1; } elsif ($token_type eq 'eof') { last; # } elsif ($token_type eq 'escape') { # } } return $TEXT; } #END: rtf2txt =head3 nicef nicef( $num, $digits ) Nicely formats sprintf("%.${digits}f", $num); =cut #BEGIN: nicef sub nicef { my ($n, $d) = @_; local $_ = sprintf("%.${d}f", $n); s/0+$//; s/\.$//; return $_; } #END: nicef =head3 length2pt Given a string like "4in" or "2ft - 7in", return the value as a number of points (72 points per inch). C is returned if we can't parse the string. Recognized units: pt in, ft, mi km, m, cm, mm, nm =cut #BEGIN: length2pt, depends: $_re_num { my %conv = qw/ pt 1 in 72 ft 864 mi 4561920 km 2834645.688 m 2834.645688 cm 28.34645688 mm 2.834645688 nm 0.000002834645688 /; sub length2pt { local $_ = shift || 0; s/\s+//g; return unless /$_re_num(?:pt|in|ft|mi|[kcmn]?m|)/i; my $total = 0; while (s/($_re_num)(pt|in|ft|mi|[kcmn]?m|)//i) { my ($num, $units) = ($1, lc $2); my $size; if (exists $conv{$units}) { $size = $num * $conv{$units}; } elsif (!length($units)) { $size = $num; } else { die "error parsing remainder: '$1$2$_'\n"; } $total += $size; } return if /\d/; # Can't parse string. return $total; } } #END: length2pt =head3 uri_rel2abs my $url = uri_rel2abs( $path, $base ) Converts a path into an absolute path based at the given base unless the path is already absolute. Any file part of the base is ignored. This subroutine is should be a proper rfc3986 uri parser as it is simply calls URI-Enew_abs. However, proper parsing pays a penalty in execution time. Compare the benchmarks between uri_rel2abs and uri_rel2abs_fast: Rate URI FAST URI 208/s -- -93% FAST 3012/s 1350% -- =cut #BEGIN: uri_rel2abs sub uri_rel2abs { require URI; URI->new_abs( @_ ) } #END: uri_rel2abs =head3 uri_rel2abs_fast my $url = uri_rel2abs_fast( $path, $base ) Converts a path into an absolute path based at the given base unless the path is already absolute. Any file part of the base is ignored. This subroutine is not and will likely never be a reasonable implementation of a proper rfc3986 uri parser. At the moment, however, it appears to be "good enough" for typical web address (http, ftp, mms, ...) handling. The uri_rel2abs function uses the URI module to properly produce an absolute uri, however at a significant speed cost. Rate URI FAST URI 208/s -- -93% FAST 3012/s 1350% -- =cut #BEGIN: uri_rel2abs_fast { my $scheme = '[a-zA-Z][a-zA-Z0-9\+\-.]*'; my $host = '[^/?]+'; sub uri_rel2abs_fast { my ($path, $base) = @_; return $path if $path =~ /^$scheme:/o; if ($path =~ m|^/|) { return $base if $base =~ s|^($scheme:/+$host).*|$1$path|o; } elsif ($base =~ m|^($scheme:/+$host)/*$|o) { return "$1/$path"; } else { return $base if $base =~ s|/[^/]*$|/$path|; } } } #END: uri_rel2abs_fast =head3 glob2regexp Constructs a regular expression pattern (string) that matches the same patterns as the given glob. The pattern matches a whole string and is anchored using C<^> and C<$> unless the glob ends with C<*> in which case the trailing C<.*$> will be removed. Keep this in mind if you wish to capture the pattern matched by the glob. Current capabilities: =over 4 =item Globby chars C<*> match many chars; C match one char =item Escaping of globby chars C<\**> matches C<'\*Hello'>, C<\\\**> matches C<"\\*Hello"> =item Grouping constructs C<[abc]> match a character, C<[^abc]> don't match chars, C<{foo,bar}> match options =back Current restrictions: =over 4 =item The globby chars '*' and '?' may not appear within grouping constructs ('[]' and '{}'). =item Can't match grouping chars in groups: '[ab\]]' does not work. =back =cut #BEGIN: glob2regexp sub glob2regexp { my @glob = reverse map scalar reverse, split /(\*|\?|\][^\[]+\[|\}[^\{]+\{)(?=(?:\\\\)*(?:[^\\]|$))/, reverse shift; @glob = grep +(defined and length), @glob; for (@glob) { if ($_ eq '?') { $_ = '.' } elsif ($_ eq '*') { $_ = '.*' } elsif (substr($_,0,1) eq '{' and substr($_,-1) eq '}') { $_ = '(?:' . join("|", map quotemeta, split /,/, substr($_,1,-1)) . ')'; } elsif (substr($_,0,1) eq '[' and substr($_,-1) eq ']') { my $s = ''; for (split /(\w\-\w|)/, substr($_,1,-1)) { if (/\w\-\w/) { $s .= $_ } else { $s .= quotemeta } } $_ = "[$s]"; } else { $_ = quotemeta } } if (@glob == 1 and $glob[0] eq '.*') { @glob = ('[^.]', '.*') } if (@glob and $glob[0] eq '.*') { $glob[0] = '^(?=[^.]).*' } else { unshift @glob, '^' } if (@glob and $glob[-1] eq '.*') { pop @glob } else { push @glob, '$' } join '', @glob; } #END: glob2regexp =head3 str($) Returns string form of argument (forces string context) if it is defined, otherwise returns the empty string. =cut #BEGIN: str, 1 line sub str($) {(defined$_[0])?''.$_[0]:''} =head3 replace_windows_characters Replaces unsightly Extended Windows characters with reasonable ASCII equivalents. See: http://www.cs.tut.fi/~jkorpela/www/windows-chars.html =cut #BEGIN: replace_windows_characters #----------------------------------------------------------------- # These lines fix the famous evil "Windows Characters" # http://www.cs.tut.fi/~jkorpela/www/windows-chars.html sub replace_windows_characters { for (@_) { next unless $_; # 130 -- 139 s/\x82/'/g;s/\x83/f/g;s/\x84/"/g;s/\x85/.../g;s/\x86/*/g;s/\x87/**/g;s/\x88/^/g;s|\x89|0/00|g;s/\x8a/S/g;s/\x8b//g;s/\x9c/oe/g;s/\x9d//g;s/\x9e//g;s/\x9f/Y/g; }} #END: replace_windows_characters =head3 strip_space Remove all space from the provided argument. If the argument is undefined, return the empty string. =cut #BEGIN: strip_space, 1 line sub strip_space($) {local $_=shift; defined || return ''; s/\s+//g; $_} =head3 sign($) Returns "+" or "-" depending on the sign of the argument. =cut =head3 nsign($) Returns "" or "-" depending on the sign of the argument. =cut #BEGIN: sign, 1 line sub sign($) { ($_[0] >= 0) ? '+' : '-' } #BEGIN: nsign, 1 line sub nsign($) { ($_[0] >= 0) ? '' : '-' } =head3 canonicalize_newlines Replace CRLF, CR, LF with the Perl magic C<\n>. Arguments are modified in-place. If no arguments are provided then C<$_> is altered instead. Any undefined arguments are ignored. (though C will not alter C<$_>). =cut =head3 canonicalize_newlines_copy Replace CRLF, CR, LF with the Perl magic C<\n>. Arguments are copied before canonicalization. If no arguments are provided then C<$_> is used instead. Any undefined arguments result in undefined output values. =cut #BEGIN: canonicalize_newlines, 1 line sub canonicalize_newlines { @_ ? do{ defined && s/(?:\015?\012|\015)/\n/ for @_ } : defined && s/(?:\015?\012|\015)/\n/; 1 } #BEGIN: canonicalize_newlines_copy sub canonicalize_newlines_copy { my $x; if (@_) { return wantarray ? map { defined() ? do { $x = $_; $x =~ s/(?:\015?\012|\015)/\n/; $x } : undef } @_ : do { $x = $_; $x =~ s/(?:\015?\012|\015)/\n/; $x } } else { $x = $_; $x =~ s/(?:\015?\012|\015)/\n/; return $x } } #END: canonicalize_newlines_copy =head3 canonicalize_timeword Transform a reasonable (case-insensitive) abbreviations (or plural forms) of "second", "minute", "hour", "day", "week", "month", "year" into one of these canonical forms. Whitespace and mumerical values are allowed at the beginning of the string and will be ignored (and not included in the return value). NOTE: minutes are preferred over months, thus "m" will return "minute" rather than "month". =cut #BEGIN: canonicalize_timeword, depends: $_re_num, str sub canonicalize_timeword { local $_ = (@_) ? str(shift) : str($_); s/^\s*(?:$_re_num)?\s*s(?:econds?|ecs?\.?)?//io && return "second"; s/^\s*(?:$_re_num)?\s*mo(?:\.|nths?|ns?\.?)?//io && return "month" ; # months before minutes since s/^\s*(?:$_re_num)?\s*m(?:inutes?|ins?\.?)?//io && return "minute"; # m defaults to minutes s/^\s*(?:$_re_num)?\s*h(?:ours?|rs?\.?)?//io && return "hour" ; s/^\s*(?:$_re_num)?\s*d(?:ays?|ys?\.?)?//io && return "day" ; s/^\s*(?:$_re_num)?\s*w(?:eeks?|ks?\.?)?//io && return "week" ; s/^\s*(?:$_re_num)?\s*y(?:ears?|rs?\.?)?//io && return "year" ; return; } #END: canonicalize_timeword, depends: $_re_num, str =head3 qbash($) Returns a string quoted for bash-like shells. The string must contin only printable characters or whitespace, otherwise the subroutine will C. The return value is an untainted string wrapped in single quotes C<'> that is ready (and safe) to pass to a shell. =cut #BEGIN: qbash { no warnings; eval { qr/[^\pL\pM\pN\pP\pS\pZ[:print:]\s]/ }; my $unprintable = ($@) ? '[^[:print:]\s]' : '[^\pL\pM\pN\pP\pS\pZ[:print:]\s]'; # Should be: \pL\pM\pN\pP\pS\pZ[:print:]\s (but we don't have unicode everywhere) # equivalently: \p{Letter}\p{Mark}\p{Number}\p{Punctuation}\p{Symbol}\p{Separator} # was: [:print:]\s sub qbash($) { local $_ = shift; die "Unquotable expression: $_" if /$unprintable/o; s/'/'\\''/g; /^([\w\-\+\.\/]+)$/s and return "$1"; # Pretty print simple things /^(.*)$/s and return "'$1'"; } } #END: qbash =head3 stringify stringify( $thing, %options ) Stringifies Perl objects (SCALAR, HASH, or ARRAY based). Stringifies only a single object at a time, and accepts the options below. Note: CODE, GLOB, LVALUE, and Regexp references are not supported. =over 4 =item stringify_underlying_object By default, overloaded stringification will be respected. Set this option to true to stringify the underlying object rather than use its overload function. =item list_type List which describes how lists are translated. DEFAULT: [ "[", ",", "]" ] =item hash_type List which describes how hashes are translated. DEFAULT: [ "{", "=>", ",", "}" ] =back =cut #BEGIN: stringify, depends: str sub stringify { my $data = shift; my $ref = ref($data); return str($data) if !$ref; my %opt = @_; if (!$opt{stringify_underlying_object} and $ref !~ /(?:SCALAR|ARRAY|HASH|CODE|REF|GLOB|LVALUE|Regexp)/) { # Is an interesting object, does it overload stringification? require overload; return "$data" unless overload::StrVal($data) eq "$data"; } if (UNIVERSAL::isa( $data, "SCALAR" ) or UNIVERSAL::isa( $data, "REF" )) { # XXX: perhaps not the right thing to do? return stringify($$data, %opt); } if (UNIVERSAL::isa( $data, "ARRAY" )) { $opt{list_type} ||= ["[", ",", "]"]; return $opt{list_type}->[0].join($opt{list_type}->[1], map(stringify($_, %opt), @$data)).$opt{list_type}->[2]; } if (UNIVERSAL::isa( $data, "HASH" )) { $opt{hash_type} ||= ["{", "=>", ",", "}"]; return $opt{hash_type}->[0] . join($opt{hash_type}->[2], map( stringify(