ICU-2508 perl performance driver. Initial revision

X-SVN-Rev: 10295
2002-11-19 19:32:32 +00:00 · 2002-11-19 19:32:32 +00:00 · 6d93dcd857
commit 6d93dcd857
parent 24bf088281
6 changed files with 987 additions and 0 deletions
--- a/icu4c/source/test/perf/normperf/NormPerf.pl
+++ b/icu4c/source/test/perf/normperf/NormPerf.pl
@ -0,0 +1,55 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use lib '../perldriver';
+
+use PerfFramework;
+
+
+my $options = {
+	       "title"=>"Normalization performance: ICU vs. Win",
+	       "headers"=>"ICU Win",
+	       "operationIs"=>"File size in code points",
+	       "timePerOperationIs"=>"Time per code point",
+	       #"passes"=>"10",
+	       #"time"=>"5",
+	       "dataDir"=>"c:/src/perf/data",
+	       "outputType"=>"HTML",
+	       "outputDir"=>"../results"
+	      };
+
+# programs
+# tests will be done for all the programs. Results will be stored and connected
+my $p = "normperf.exe -b -u";
+
+my $tests = { 
+	     "NFC_NFD_Text", ["$p TestICU_NFC_NFD_Text",  "$p TestWin_NFC_NFD_Text" ],
+	     "NFC_NFC_Text", ["$p TestICU_NFC_NFC_Text",  "$p TestWin_NFC_NFC_Text" ],
+	     "NFC_Orig_Text", ["$p TestICU_NFC_Orig_Text", "$p TestWin_NFC_Orig_Text"],
+	     "NFD_NFD_Text", ["$p TestICU_NFD_NFD_Text",  "$p TestWin_NFD_NFD_Text" ],
+	     "NFD_NFC_Text", ["$p TestICU_NFD_NFC_Text",  "$p TestWin_NFD_NFC_Text" ],
+	     "NFD_Orig_Text", ["$p TestICU_NFD_Orig_Text", "$p TestWin_NFD_Orig_Text"]
+	    };
+
+my $dataFiles = {
+		 "",
+		 [
+#		  "TestNames_Asian.txt",
+#		  "TestNames_Chinese.txt",
+		  "TestNames_Japanese.txt",
+		  "TestNames_Japanese_h.txt",
+		  "TestNames_Japanese_k.txt",
+#		  "TestNames_Korean.txt",
+#		  "TestNames_Latin.txt",
+#		  "TestNames_SerbianSH.txt",
+#		  "TestNames_SerbianSR.txt",
+#		  "TestNames_Thai.txt",
+#		  "Testnames_Russian.txt",
+#		  "th18057.txt",
+#		  "thesis.txt",
+#		  "vfear11a.txt",
+		 ]
+		};
+
+runTests($options, $tests, $dataFiles);
--- a/icu4c/source/test/perf/perldriver/Dataset.pm
+++ b/icu4c/source/test/perf/perldriver/Dataset.pm
@ -0,0 +1,132 @@
+package Dataset;
+use Statistics::Descriptive;
+use Statistics::Distributions;
+use strict;
+
+# Create a new Dataset with the given data.
+sub new {
+    my ($class) = shift;
+    my $self = bless {
+        _data => \@_,
+        _scale => 1.0,
+        _mean => 0.0,
+        _error => 0.0,
+    }, $class;
+
+    my $n = @_;
+    
+    if ($n >= 1) {
+        my $stats = Statistics::Descriptive::Full->new();
+        $stats->add_data(@{$self->{_data}});
+        $self->{_mean} = $stats->mean();
+
+        if ($n >= 2) {
+            # Use a t distribution rather than Gaussian because (a) we
+            # assume an underlying normal dist, (b) we do not know the
+            # standard deviation -- we estimate it from the data, and (c)
+            # we MAY have a small sample size (also works for large n).
+            my $t = Statistics::Distributions::tdistr($n-1, 0.005);
+            $self->{_error} = $t * $stats->standard_deviation();
+        }
+    }
+
+    $self;
+}
+
+# Set a scaling factor for all data; 1.0 means no scaling.
+# Scale must be > 0.
+sub setScale {
+    my ($self, $scale) = @_;
+    $self->{_scale} = $scale;
+}
+
+# Multiply the scaling factor by a value.
+sub scaleBy {
+    my ($self, $a) = @_;
+    $self->{_scale} *= $a;
+}
+
+# Return the mean.
+sub getMean {
+    my $self = shift;
+    return $self->{_mean} * $self->{_scale};
+}
+
+# Return a 99% error based on the t distribution.  The dataset
+# is desribed as getMean() +/- getError().
+sub getError {
+    my $self = shift;
+    return $self->{_error} * $self->{_scale};
+}
+
+# Divide two Datasets and return a new one, maintaining the
+# mean+/-error.  The new Dataset has no data points.
+sub divide {
+    my $self = shift;
+    my $rhs = shift;
+    
+    my $minratio = ($self->{_mean} - $self->{_error}) /
+                   ($rhs->{_mean} + $rhs->{_error});
+    my $maxratio = ($self->{_mean} + $self->{_error}) /
+                   ($rhs->{_mean} - $rhs->{_error});
+
+    my $result = Dataset->new();
+    $result->{_mean} = ($minratio + $maxratio) / 2;
+    $result->{_error} = $result->{_mean} - $minratio;
+    $result->{_scale} = $self->{_scale} / $rhs->{_scale};
+    $result;
+}
+
+# subtracts two Datasets and return a new one, maintaining the
+# mean+/-error.  The new Dataset has no data points.
+sub subtract {
+    my $self = shift;
+    my $rhs = shift;
+    
+    my $result = Dataset->new();
+    $result->{_mean} = $self->{_mean} - $rhs->{_mean};
+    $result->{_error} = $self->{_error} + $rhs->{_error};
+    $result->{_scale} = $self->{_scale};
+    $result;
+}
+
+# adds two Datasets and return a new one, maintaining the
+# mean+/-error.  The new Dataset has no data points.
+sub add {
+    my $self = shift;
+    my $rhs = shift;
+    
+    my $result = Dataset->new();
+    $result->{_mean} = $self->{_mean} + $rhs->{_mean};
+    $result->{_error} = $self->{_error} + $rhs->{_error};
+    $result->{_scale} = $self->{_scale};
+    $result;
+}
+
+# Divides a dataset by a scalar.
+# The new Dataset has no data points.
+sub divideByScalar {
+    my $self = shift;
+    my $s = shift;
+    
+    my $result = Dataset->new();
+    $result->{_mean} = $self->{_mean}/$s;
+    $result->{_error} = $self->{_error}/$s;
+    $result->{_scale} = $self->{_scale};
+    $result;
+}
+
+# Divides a dataset by a scalar.
+# The new Dataset has no data points.
+sub multiplyByScalar {
+    my $self = shift;
+    my $s = shift;
+    
+    my $result = Dataset->new();
+    $result->{_mean} = $self->{_mean}*$s;
+    $result->{_error} = $self->{_error}*$s;
+    $result->{_scale} = $self->{_scale};
+    $result;
+}
+
+1;
--- a/icu4c/source/test/perf/perldriver/Format.pm
+++ b/icu4c/source/test/perf/perldriver/Format.pm
@ -0,0 +1,159 @@
+my $PLUS_MINUS = "&plusmn;";
+
+#|#---------------------------------------------------------------------
+#|# Format a confidence interval, as given by a Dataset.  Output is as
+#|# as follows:
+#|#   241.23 - 241.98 => 241.5 +/- 0.3
+#|#   241.2 - 243.8 => 242 +/- 1
+#|#   211.0 - 241.0 => 226 +/- 15 or? 230 +/- 20
+#|#   220.3 - 234.3 => 227 +/- 7
+#|#   220.3 - 300.3 => 260 +/- 40
+#|#   220.3 - 1000 => 610 +/- 390 or? 600 +/- 400
+#|#   0.022 - 0.024 => 0.023 +/- 0.001
+#|#   0.022 - 0.032 => 0.027 +/- 0.005
+#|#   0.022 - 1.000 => 0.5 +/- 0.5
+#|# In other words, take one significant digit of the error value and
+#|# display the mean to the same precision.
+#|sub formatDataset {
+#|    my $ds = shift;
+#|    my $lower = $ds->getMean() - $ds->getError();
+#|    my $upper = $ds->getMean() + $ds->getError();
+#|    my $scale = 0;
+#|    # Find how many initial digits are the same
+#|    while ($lower < 1 ||
+#|           int($lower) == int($upper)) {
+#|        $lower *= 10;
+#|        $upper *= 10;
+#|        $scale++;
+#|    }
+#|    while ($lower >= 10 &&
+#|           int($lower) == int($upper)) {
+#|        $lower /= 10;
+#|        $upper /= 10;
+#|        $scale--;
+#|    }
+#|}
+
+#---------------------------------------------------------------------
+# Format a number, optionally with a +/- delta, to n significant
+# digits.
+#
+# @param significant digit, a value >= 1
+# @param multiplier
+# @param time in seconds to be formatted
+# @optional delta in seconds
+#
+# @return string of the form "23" or "23 +/- 10".
+#
+sub formatNumber {
+    my $sigdig = shift;
+    my $mult = shift;
+    my $a = shift;
+    my $delta = shift; # may be undef
+    
+    my $result = formatSigDig($sigdig, $a*$mult);
+    if (defined($delta)) {
+        my $d = formatSigDig($sigdig, $delta*$mult);
+        # restrict PRECISION of delta to that of main number
+        if ($result =~ /\.(\d+)/) {
+            # TODO make this work for values with all significant
+            # digits to the left of the decimal, e.g., 1234000.
+
+            # TODO the other thing wrong with this is that it
+            # isn't rounding the $delta properly.  Have to put
+            # this logic into formatSigDig().
+            my $x = length($1);
+            $d =~ s/\.(\d{$x})\d+/.$1/;
+        }
+        $result .= " $PLUS_MINUS " . $d;
+    }
+    $result;
+}
+
+#---------------------------------------------------------------------
+# Format a time, optionally with a +/- delta, to n significant
+# digits.
+#
+# @param significant digit, a value >= 1
+# @param time in seconds to be formatted
+# @optional delta in seconds
+#
+# @return string of the form "23 ms" or "23 +/- 10 ms".
+#
+sub formatSeconds {
+    my $sigdig = shift;
+    my $a = shift;
+    my $delta = shift; # may be undef
+
+    my @MULT = (1   , 1e3,  1e6,  1e9);
+    my @SUFF = ('s' , 'ms', 'us', 'ns');
+
+    # Determine our scale
+    my $i = 0;
+    #always do seconds if the following line is commented out
+    ++$i while ($a*$MULT[$i] < 1 && $i < @MULT);
+    
+    formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i];
+}
+
+#---------------------------------------------------------------------
+# Format a percentage, optionally with a +/- delta, to n significant
+# digits.
+#
+# @param significant digit, a value >= 1
+# @param value to be formatted, as a fraction, e.g. 0.5 for 50%
+# @optional delta, as a fraction
+#
+# @return string of the form "23 %" or "23 +/- 10 %".
+#
+sub formatPercent {
+    my $sigdig = shift;
+    my $a = shift;
+    my $delta = shift; # may be undef
+    
+    formatNumber($sigdig, 100, $a, $delta) . ' %';
+}
+
+#---------------------------------------------------------------------
+# Format a number to n significant digits without using exponential
+# notation.
+#
+# @param significant digit, a value >= 1
+# @param number to be formatted
+#
+# @return string of the form "1234" "12.34" or "0.001234".  If
+#         number was negative, prefixed by '-'.
+#
+sub formatSigDig {
+    my $n = shift() - 1;
+    my $a = shift;
+
+    local $_ = sprintf("%.${n}e", $a);
+    my $sign = (s/^-//) ? '-' : '';
+
+    my $a_e;
+    my $result;
+    if (/^(\d)\.(\d+)e([-+]\d+)$/) {
+        my ($d, $dn, $e) = ($1, $2, $3);
+        $a_e = $e;
+        $d .= $dn;
+        $e++;
+        $d .= '0' while ($e > length($d));
+        while ($e < 1) {
+            $e++;
+            $d = '0' . $d;
+        }
+        if ($e == length($d)) {
+            $result = $sign . $d;
+        } else {
+            $result = $sign . substr($d, 0, $e) . '.' . substr($d, $e);
+        }
+    } else {
+        die "Can't parse $_";
+    }
+    $result;
+}
+
+1;
+
+#eof
--- a/icu4c/source/test/perf/perldriver/Output.pm
+++ b/icu4c/source/test/perf/perldriver/Output.pm
@ -0,0 +1,194 @@
+#!/usr/local/bin/perl
+
+use strict;
+
+my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"';
+my $outType = "HTML";
+my $html = "noName";
+my $inTable;
+my @headers;
+my @timetypes = ("per iteration", "per operation", "events", "per event");
+my %raw;
+my $current;
+my $exp = 0;
+
+sub startTest {
+  $current = shift;
+  $exp = 0;
+  outputData($current);
+}
+
+sub startTable {
+  my $printEvents = shift;
+  $inTable = 1;
+  print HTML "<table $TABLEATTR>\n";
+  if($#headers >= 0) {
+    my ($header, $i);
+    print HTML "<tr>";
+    print HTML "<th>Test Name</th>";
+    print HTML "<th>Operations</th>";
+    foreach $i (@timetypes) {
+      foreach $header (@headers) {
+	print HTML "<th>$header<br>$i</th>" unless ($i =~ /event/ && !$printEvents);
+      }
+    }
+    print HTML "</tr>\n";
+  }
+}
+
+sub closeTable {
+  if($inTable) {
+    undef $inTable;
+    print HTML "</tr>\n";
+    print HTML "</table>\n";
+  }
+}
+
+sub newRow {
+  if(!$inTable) {
+    startTable;
+  } else {
+    print HTML "</tr>\n";
+  }
+  print HTML "<tr>";
+}
+
+sub outputData {
+  my $message;
+  if($inTable) {
+    print HTML "<td>";
+    foreach $message (@_) {
+      print HTML "$message";
+    }
+    print HTML "</td>";
+  } else {
+    foreach $message (@_) {
+      print HTML "$message";
+    }
+  }
+}
+
+sub setupOutput {
+  my $date = localtime;
+  my $options = shift;
+  my %options = %{ $options };
+  my $title = $options{ "title" };
+  my $headers = $options{ "headers" };
+  @headers = split(/ /, $headers);
+  my ($t, $rest);
+  ($t, $rest) = split(/\.\w+/, $0);
+  $t =~ /^.*\W(\w+)$/;
+  $t = $1;
+  if($outType eq 'HTML') {
+    $html = $date;
+    $html =~ s/://g; # ':' illegal
+    $html =~ s/\s*\d+$//; # delete year
+    $html =~ s/^\w+\s*//; # delete dow
+    $html = "$t $html.html";
+    if($options{ "outputDir" }) {
+      $html = $options{ "outputDir" }."/".$html;
+    }
+
+    open(HTML,">$html") or die "Can't write to $html: $!";
+
+    print HTML <<EOF;
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+   "http://www.w3.org/TR/html4/strict.dtd">
+<HTML>
+   <HEAD>
+      <TITLE>$title</TITLE>
+   </HEAD>
+   <BODY>
+EOF
+    print HTML "<H1>$title</H1>\n";
+
+    #print HTML "<H2>$TESTCLASS</H2>\n";
+  }
+}
+
+sub closeOutput {
+  if($outType eq 'HTML') {
+    if($inTable) {
+      closeTable;
+    }
+    printRaw();
+    print HTML <<EOF;
+   </BODY>
+</HTML>
+EOF
+    close(HTML) or die "Can't close $html: $!";
+  }
+}
+
+
+sub printRaw {
+  print HTML "<h2>Raw data</h2>";
+  my $key;
+  my $i;
+  for $key (sort keys %raw) {
+    print HTML $key;
+     foreach $i ( 0 .. $#{ $raw{$key} } ) {
+         print HTML " $i = $raw{$key}[$i]";
+     }
+    
+    
+  }
+  print %raw;
+}
+
+sub outputRow {
+  $raw{$current}[$exp++] =  [@_];
+  my $testName = shift;
+  my @iterPerPass = @{shift(@_)};
+  my @noopers =  @{shift(@_)};
+  my @timedata =  @{shift(@_)};
+  my @noevents;
+  if($#_ >= 0) {
+    @noevents =  @{shift(@_)};
+  }
+  if(!$inTable) {
+    if(@noevents) {
+      debug("Have events header\n");
+      startTable(1);
+    } else {
+      debug("No events header\n");
+      startTable;
+    }
+  }
+  debug("No events: @noevents, $#noevents\n");
+
+  my $j;
+
+  # Finished one row of results. Outputting
+  newRow;
+  outputData($testName);
+  #outputData($iterCount);
+  outputData($noopers[0]);
+  for $j ( 0 .. $#timedata ) {
+    my $perOperation = $timedata[$j]->divideByScalar($iterPerPass[$j]); # time per operation
+    #debug("Time per operation: ".formatSeconds(4, $perOperation->getMean, $perOperation->getError)."\n");
+    outputData(formatSeconds(4, $perOperation->getMean, $perOperation->getError));
+  }
+  for $j ( 0 .. $#timedata ) {
+    my $perOperation = $timedata[$j]->divideByScalar($iterPerPass[$j]*$noopers[$j]); # time per operation
+    #debug("Time per operation: ".formatSeconds(4, $perOperation->getMean, $perOperation->getError)."\n");
+    outputData(formatSeconds(4, $perOperation->getMean, $perOperation->getError));
+  }
+
+  if(@noevents) {
+    for $j ( 0 .. $#timedata ) {
+      outputData($noevents[$j]);
+    }
+
+    for $j ( 0 .. $#timedata ) {
+      my $perEvent =  $timedata[$j]->divideByScalar($iterPerPass[$j]*$noevents[$j]); # time per event
+      #debug("Time per operation: ".formatSeconds(4, $perEvent->getMean, $perEvent->getError)."\n");
+      outputData(formatSeconds(4, $perEvent->getMean, $perEvent->getError));
+    }   
+  }
+}
+
+
+1;
+
+#eof
--- a/icu4c/source/test/perf/perldriver/PerfFramework.pm
+++ b/icu4c/source/test/perf/perldriver/PerfFramework.pm
@ -0,0 +1,371 @@
+#!/usr/local/bin/perl
+
+use strict;
+
+use Dataset;
+use Format;
+use Output;
+
+my $VERBOSE = 0;
+my $DEBUG   = 1;
+my $start_l = ""; #formatting help
+my $end_l   = "";
+my @testArgs; # different kinds of tests we want to do
+my $datadir = "data";
+my $extraArgs; # stuff that always gets passed to the test program
+
+
+my $iterCount = 0;
+my $NUMPASSES = 4;
+my $TIME = 2;
+my $DATADIR;
+
+sub setupOptions {
+  my %options = %{shift @_};
+
+  if($options{"time"}) {
+    $TIME = $options{"time"};
+  }
+
+  if($options{"passes"}) {
+    $NUMPASSES = $options{"passes"};
+  }
+	
+  if($options{"dataDir"}) {
+    $DATADIR = $options{"dataDir"};
+  }
+}
+
+sub runTests {
+  my $options = shift;
+  my @programs;
+  my $tests = shift;
+  my %datafiles;
+  if($#_ >= 0) { # maybe no files/locales
+    my $datafiles = shift;
+    if($datafiles) {
+      %datafiles = %{$datafiles};
+    }
+  }
+  setupOutput($options);
+  setupOptions($options);
+
+  my($locale, $iter, $data, $program, $args, $variable);
+#
+#  Outer loop runs through the locales to test
+#
+  if (%datafiles) {
+    foreach $locale (sort keys %datafiles ) {
+      foreach $data (@{ $datafiles{$locale} }) {
+	closeTable;
+	my $locdata = "";
+	if(!($locale eq "")) {
+	  $locdata = "<b>Locale:</b> $locale<br>";
+	}
+	$locdata .= "<b>Datafile:</b> $data<br>";
+	startTest($locdata);
+
+	if($DATADIR) {
+	  compareLoop ($tests, $locale, $DATADIR."/".$data);
+	} else {
+	  compareLoop ($tests, $locale, $data);
+	}
+      }
+    }
+  } else {
+    compareLoop($tests);
+  }
+  closeOutput();
+}
+
+sub compareLoop {
+  my $tests = shift;
+  #my @tests = @{$tests};
+  my %tests = %{$tests};
+  my $locale = shift;
+  my $datafile = shift;
+  my $locAndData = "";
+  if($locale) {
+    $locAndData .= " -L $locale";
+  }
+  if($datafile) {
+    $locAndData .= " -f $datafile";
+  }
+	  
+  my $args;
+  my ($i, $j, $aref);
+  foreach $i ( sort keys %tests ) {
+    debug("Test: $i\n");
+    $aref = $tests{$i};
+    my @timedata;
+    my @iterPerPass;
+    my @noopers;
+    my @noevents;
+
+    my $program;
+    my @argsAndTest;
+    for $j ( 0 .. $#{$aref} ) {
+    # first we calibrate. Use time from somewhere
+    # first test is used for calibration
+      ($program, @argsAndTest) = split(/\ /, @{ $tests{$i} }[$j]);
+      my @res = measure1("$program -t $TIME -p $NUMPASSES $locAndData @argsAndTest");
+
+      push(@iterPerPass, shift(@res));
+      push(@noopers, shift(@res));
+      my @data = @{ shift(@res) };
+      if($#res >= 0) {
+	push(@noevents, shift(@res));
+      }
+    
+
+      shift(@data) if (@data > 1); # discard first run
+
+      #debug("data is @data\n");
+      my $ds = Dataset->new(@data);
+
+      push(@timedata, $ds);
+    }
+
+    outputRow($i, \@iterPerPass, \@noopers, \@timedata, \@noevents);
+  }
+
+}
+
+#---------------------------------------------------------------------
+# Measure a given test method with a give test pattern using the
+# global run parameters.
+#
+# @param the method to run
+# @param the pattern defining characters to test
+# @param if >0 then the number of iterations per pass.  If <0 then
+#        (negative of) the number of seconds per pass.
+#
+# @return array of:
+#         [0] iterations per pass
+#         [1] events per iteration
+#         [2..] ms reported for each pass, in order
+#
+sub measure1 {
+    # run passes
+    my @t = callProg(shift); #"$program $args $argsAndTest");
+    my @ms = ();
+    my @b; # scratch
+    for my $a (@t) {
+        # $a->[0]: method name, corresponds to $method
+        # $a->[1]: 'begin' data, == $iterCount
+        # $a->[2]: 'end' data, of the form <ms> <eventsPerIter>
+        # $a->[3...]: gc messages from JVM during pass
+        @b = split(/\s+/, $a->[2]);
+        #push(@ms, $b[0]);
+        push(@ms, shift(@b));
+    }
+    my $iterCount = shift(@b);
+    my $operationsPerIter = shift(@b);
+    my $eventsPerIter;
+    if($#b >= 0) {
+      $eventsPerIter = shift(@b);
+    }
+
+#    out("Iterations per pass: $iterCount<BR>\n");
+#    out("Events per iteration: $eventsPerIter<BR>\n");
+#    debug("Iterations per pass: $iterCount<BR>\n");
+#    if($eventsPerIter) {
+#      debug("Events per iteration: $eventsPerIter<BR>\n");
+#    }
+
+    my @ms_str = @ms;
+    $ms_str[0] .= " (discarded)" if (@ms_str > 1);
+#    out("Raw times (ms/pass): ", join(", ", @ms_str), "<BR>\n");
+    debug("Raw times (ms/pass): ", join(", ", @ms_str), "<BR>\n");
+    if($eventsPerIter) {
+      ($iterCount, $operationsPerIter, \@ms, $eventsPerIter);
+    } else {
+      ($iterCount, $operationsPerIter, \@ms);
+    }
+}
+
+
+
+#---------------------------------------------------------------------
+# Measure a given test method with a give test pattern using the
+# global run parameters.
+#
+# @param the method to run
+# @param the pattern defining characters to test
+# @param if >0 then the number of iterations per pass.  If <0 then
+#        (negative of) the number of seconds per pass.
+#
+# @return a Dataset object, scaled by iterations per pass and
+#         events per iteration, to give time per event
+#
+sub measure2 {
+    my @res = measure1(@_);
+    my $iterPerPass = shift(@res);
+    my $operationsPerIter = shift(@res);
+    my @data = @{ shift(@res) };
+    my $eventsPerIter = shift(@res);
+    
+
+    shift(@data) if (@data > 1); # discard first run
+
+    my $ds = Dataset->new(@data);
+    #$ds->setScale(1.0e-3 / ($iterPerPass * $operationsPerIter));
+    ($ds, $iterPerPass, $operationsPerIter, $eventsPerIter);
+}
+
+
+#---------------------------------------------------------------------
+# Invoke program and capture results, passing it the given parameters.
+#
+# @param the method to run
+# @param the number of iterations, or if negative, the duration
+#        in seconds.  If more than on pass is desired, pass in
+#        a string, e.g., "100 100 100".
+# @param the pattern defining characters to test
+#
+# @return an array of results.  Each result is an array REF
+#         describing one pass.  The array REF contains:
+#         ->[0]: The method name as reported
+#         ->[1]: The params on the '= <meth> begin ...' line
+#         ->[2]: The params on the '= <meth> end ...' line
+#         ->[3..]: GC messages from the JVM, if any
+#
+sub callProg {
+    my $cmd = shift;
+    #my $pat = shift;
+    #my $n = shift;
+
+    #my $cmd = "java -cp c:\\dev\\myicu4j\\classes $TESTCLASS $method $n $pat";
+    debug( "[$cmd]\n"); # for debugging
+    open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
+    my @out;
+    while (<PIPE>) {
+        push(@out, $_);
+    }
+    close(PIPE) or die "Program failed: \"$cmd\"";
+
+    @out = grep(!/^\#/, @out);  # filter out comments
+
+    #debug( "[", join("\n", @out), "]\n");
+
+    my @results;
+    my $method = '';
+    my $data = [];
+    foreach (@out) {
+        next unless (/\S/);
+
+        if (/^=\s*(\w+)\s*(\w+)\s*(.*)/) {
+            my ($m, $state, $d) = ($1, $2, $3);
+            #debug ("$_ => [[$m $state !!!$d!!! $data ]]\n");
+            if ($state eq 'begin') {
+                die "$method was begun but not finished" if ($method);
+                $method = $m;
+                push(@$data, $d);
+                push(@$data, ''); # placeholder for end data
+            } elsif ($state eq 'end') {
+                if ($m ne $method) {
+                    die "$method end does not match: $_";
+                }
+                $data->[1] = $d; # insert end data at [1]
+                #debug( "#$method:", join(";",@$data), "\n");
+                unshift(@$data, $method); # add method to start
+                push(@results, $data);
+                $method = '';
+                $data = [];
+            } else {
+                die "Can't parse: $_";
+            }
+        }
+
+        elsif (/^\[/) {
+            if ($method) {
+                push(@$data, $_);
+            } else {
+                # ignore extraneous GC notices
+            }
+        }
+
+        else {
+            die "Can't parse: $_";
+        }
+    }
+
+    die "$method was begun but not finished" if ($method);
+
+    @results;
+}
+
+sub debug  {
+  my $message;
+  if($DEBUG != 0) {
+    foreach $message (@_) {
+      print STDERR "$message";
+    }
+  }
+}
+
+sub measure1Alan {
+  #Added here, was global
+  my $CALIBRATE = 2; # duration in seconds for initial calibration
+
+    my $method = shift;
+    my $pat = shift;
+    my $iterCount = shift; # actually might be -seconds/pass
+
+    out("<P>Measuring $method using $pat, ");
+    if ($iterCount > 0) {
+        out("$iterCount iterations/pass, $NUMPASSES passes</P>\n");
+    } else {
+        out(-$iterCount, " seconds/pass, $NUMPASSES passes</P>\n");
+    }
+
+    # is $iterCount actually -seconds?
+    if ($iterCount < 0) {
+
+        # calibrate: estimate ms/iteration
+        print "Calibrating...";
+        my @t = callJava($method, $pat, -$CALIBRATE);
+        print "done.\n";
+
+        my @data = split(/\s+/, $t[0]->[2]);
+        my $timePerIter = 1.0e-3 * $data[0] / $data[2];
+    
+        # determine iterations/pass
+        $iterCount = int(-$iterCount / $timePerIter + 0.5);
+
+        out("<P>Calibration pass ($CALIBRATE sec): ");
+        out("$data[0] ms, ");
+        out("$data[2] iterations = ");
+        out(formatSeconds(4, $timePerIter), "/iteration<BR>\n");
+    }
+    
+    # run passes
+    print "Measuring $iterCount iterations x $NUMPASSES passes...";
+    my @t = callJava($method, $pat, "$iterCount " x $NUMPASSES);
+    print "done.\n";
+    my @ms = ();
+    my @b; # scratch
+    for my $a (@t) {
+        # $a->[0]: method name, corresponds to $method
+        # $a->[1]: 'begin' data, == $iterCount
+        # $a->[2]: 'end' data, of the form <ms> <eventsPerIter>
+        # $a->[3...]: gc messages from JVM during pass
+        @b = split(/\s+/, $a->[2]);
+        push(@ms, $b[0]);
+    }
+    my $eventsPerIter = $b[1];
+
+    out("Iterations per pass: $iterCount<BR>\n");
+    out("Events per iteration: $eventsPerIter<BR>\n");
+
+    my @ms_str = @ms;
+    $ms_str[0] .= " (discarded)" if (@ms_str > 1);
+    out("Raw times (ms/pass): ", join(", ", @ms_str), "<BR>\n");
+
+    ($iterCount, $eventsPerIter, @ms);
+}
+
+
+1;
+
+#eof
--- a/icu4c/source/test/perf/ubrkperf/UBrkPerf.pl
+++ b/icu4c/source/test/perf/ubrkperf/UBrkPerf.pl
@ -0,0 +1,76 @@
+#!/usr/bin/perl
+
+use strict;
+
+use lib '../perldriver';
+
+use PerfFramework;
+
+my $options = {
+	       "title"=>"BreakIterator performance: ICU 2.0 vs. ICU 2.4",
+	       "headers"=>"ICU20 ICU24",
+	       "operationIs"=>"File size in code points",
+	       "timePerOperationIs"=>"Time per code point",
+	       "passes"=>"3",
+	       #"time"=>"1.1",
+	       "dataDir"=>"c:/src/perf/data",
+	       "outputType"=>"HTML",
+	       "outputDir"=>"../results"
+	      };
+
+# programs
+# tests will be done for all the programs. Results will be stored and connected
+my $m1 = "-- -m char";
+my $m2 = "-- -m word";
+my $m3 = "-- -m line";
+my $m4 = "-- -m sentence";
+
+my $m;
+
+if(@_ >= 0) {
+  $m = "-- -m ".shift;
+} else {
+  $m = $m1;
+}
+
+my $p1 = "ubrkperf20.exe";
+my $p2 = "ubrkperf24.exe";
+
+my $dataFiles = {
+"en", ["thesis.txt", 
+#       #"2drvb10.txt", 
+#       #"ulyss10.txt",  
+#       "nvsbl10.txt", 
+#       "vfear11a.txt", 		  
+#       "TestNames_Asian.txt",
+#       "TestNames_Chinese.txt",
+       "TestNames_Japanese.txt",
+#       "TestNames_Japanese_h.txt",
+#       "TestNames_Japanese_k.txt",
+#       "TestNames_Korean.txt",
+       "TestNames_Latin.txt",
+#       "TestNames_SerbianSH.txt",
+#       "TestNames_SerbianSR.txt",
+#       "TestNames_Thai.txt",
+#       "Testnames_Russian.txt",
+],
+#"th", ["TestNames_Thai.txt", "th18057.txt"]
+};
+
+
+my $tests = { 
+"TestForwardChar",     ["$p1 $m1 TestICUForward", "$p2 $m1 TestICUForward"],
+"TestForwardWord",     ["$p1 $m2 TestICUForward", "$p2 $m2 TestICUForward"],
+#"TestForwardLine",     ["$p1 $m3 TestICUForward", "$p2 $m3 TestICUForward"],
+#"TestForwardSentence", ["$p1 $m4 TestICUForward", "$p2 $m4 TestICUForward"],
+
+#"TestIsBoundChar",     ["$p1 $m1 TestICUIsBound", "$p2 $m1 TestICUIsBound"],
+#"TestIsBoundWord",     ["$p1 $m2 TestICUIsBound", "$p2 $m2 TestICUIsBound"],
+#"TestIsBoundLine",     ["$p1 $m3 TestICUIsBound", "$p2 $m3 TestICUIsBound"],
+#"TestIsBoundSentence", ["$p1 $m4 TestICUIsBound", "$p2 $m4 TestICUIsBound"],
+
+};
+
+runTests($options, $tests, $dataFiles);
+
+