diff --git a/icu4c/source/test/perf/normperf/NormPerf.pl b/icu4c/source/test/perf/normperf/NormPerf.pl new file mode 100755 index 0000000000..78f365a60a --- /dev/null +++ b/icu4c/source/test/perf/normperf/NormPerf.pl @@ -0,0 +1,55 @@ +#!/usr/bin/perl -w + +use strict; + +use lib '../perldriver'; + +use PerfFramework; + + +my $options = { + "title"=>"Normalization performance: ICU vs. Win", + "headers"=>"ICU Win", + "operationIs"=>"File size in code points", + "timePerOperationIs"=>"Time per code point", + #"passes"=>"10", + #"time"=>"5", + "dataDir"=>"c:/src/perf/data", + "outputType"=>"HTML", + "outputDir"=>"../results" + }; + +# programs +# tests will be done for all the programs. Results will be stored and connected +my $p = "normperf.exe -b -u"; + +my $tests = { + "NFC_NFD_Text", ["$p TestICU_NFC_NFD_Text", "$p TestWin_NFC_NFD_Text" ], + "NFC_NFC_Text", ["$p TestICU_NFC_NFC_Text", "$p TestWin_NFC_NFC_Text" ], + "NFC_Orig_Text", ["$p TestICU_NFC_Orig_Text", "$p TestWin_NFC_Orig_Text"], + "NFD_NFD_Text", ["$p TestICU_NFD_NFD_Text", "$p TestWin_NFD_NFD_Text" ], + "NFD_NFC_Text", ["$p TestICU_NFD_NFC_Text", "$p TestWin_NFD_NFC_Text" ], + "NFD_Orig_Text", ["$p TestICU_NFD_Orig_Text", "$p TestWin_NFD_Orig_Text"] + }; + +my $dataFiles = { + "", + [ +# "TestNames_Asian.txt", +# "TestNames_Chinese.txt", + "TestNames_Japanese.txt", + "TestNames_Japanese_h.txt", + "TestNames_Japanese_k.txt", +# "TestNames_Korean.txt", +# "TestNames_Latin.txt", +# "TestNames_SerbianSH.txt", +# "TestNames_SerbianSR.txt", +# "TestNames_Thai.txt", +# "Testnames_Russian.txt", +# "th18057.txt", +# "thesis.txt", +# "vfear11a.txt", + ] + }; + +runTests($options, $tests, $dataFiles); diff --git a/icu4c/source/test/perf/perldriver/Dataset.pm b/icu4c/source/test/perf/perldriver/Dataset.pm new file mode 100644 index 0000000000..744e728b0d --- /dev/null +++ b/icu4c/source/test/perf/perldriver/Dataset.pm @@ -0,0 +1,132 @@ +package Dataset; +use Statistics::Descriptive; +use Statistics::Distributions; +use strict; + +# Create a new Dataset with the given data. +sub new { + my ($class) = shift; + my $self = bless { + _data => \@_, + _scale => 1.0, + _mean => 0.0, + _error => 0.0, + }, $class; + + my $n = @_; + + if ($n >= 1) { + my $stats = Statistics::Descriptive::Full->new(); + $stats->add_data(@{$self->{_data}}); + $self->{_mean} = $stats->mean(); + + if ($n >= 2) { + # Use a t distribution rather than Gaussian because (a) we + # assume an underlying normal dist, (b) we do not know the + # standard deviation -- we estimate it from the data, and (c) + # we MAY have a small sample size (also works for large n). + my $t = Statistics::Distributions::tdistr($n-1, 0.005); + $self->{_error} = $t * $stats->standard_deviation(); + } + } + + $self; +} + +# Set a scaling factor for all data; 1.0 means no scaling. +# Scale must be > 0. +sub setScale { + my ($self, $scale) = @_; + $self->{_scale} = $scale; +} + +# Multiply the scaling factor by a value. +sub scaleBy { + my ($self, $a) = @_; + $self->{_scale} *= $a; +} + +# Return the mean. +sub getMean { + my $self = shift; + return $self->{_mean} * $self->{_scale}; +} + +# Return a 99% error based on the t distribution. The dataset +# is desribed as getMean() +/- getError(). +sub getError { + my $self = shift; + return $self->{_error} * $self->{_scale}; +} + +# Divide two Datasets and return a new one, maintaining the +# mean+/-error. The new Dataset has no data points. +sub divide { + my $self = shift; + my $rhs = shift; + + my $minratio = ($self->{_mean} - $self->{_error}) / + ($rhs->{_mean} + $rhs->{_error}); + my $maxratio = ($self->{_mean} + $self->{_error}) / + ($rhs->{_mean} - $rhs->{_error}); + + my $result = Dataset->new(); + $result->{_mean} = ($minratio + $maxratio) / 2; + $result->{_error} = $result->{_mean} - $minratio; + $result->{_scale} = $self->{_scale} / $rhs->{_scale}; + $result; +} + +# subtracts two Datasets and return a new one, maintaining the +# mean+/-error. The new Dataset has no data points. +sub subtract { + my $self = shift; + my $rhs = shift; + + my $result = Dataset->new(); + $result->{_mean} = $self->{_mean} - $rhs->{_mean}; + $result->{_error} = $self->{_error} + $rhs->{_error}; + $result->{_scale} = $self->{_scale}; + $result; +} + +# adds two Datasets and return a new one, maintaining the +# mean+/-error. The new Dataset has no data points. +sub add { + my $self = shift; + my $rhs = shift; + + my $result = Dataset->new(); + $result->{_mean} = $self->{_mean} + $rhs->{_mean}; + $result->{_error} = $self->{_error} + $rhs->{_error}; + $result->{_scale} = $self->{_scale}; + $result; +} + +# Divides a dataset by a scalar. +# The new Dataset has no data points. +sub divideByScalar { + my $self = shift; + my $s = shift; + + my $result = Dataset->new(); + $result->{_mean} = $self->{_mean}/$s; + $result->{_error} = $self->{_error}/$s; + $result->{_scale} = $self->{_scale}; + $result; +} + +# Divides a dataset by a scalar. +# The new Dataset has no data points. +sub multiplyByScalar { + my $self = shift; + my $s = shift; + + my $result = Dataset->new(); + $result->{_mean} = $self->{_mean}*$s; + $result->{_error} = $self->{_error}*$s; + $result->{_scale} = $self->{_scale}; + $result; +} + +1; diff --git a/icu4c/source/test/perf/perldriver/Format.pm b/icu4c/source/test/perf/perldriver/Format.pm new file mode 100644 index 0000000000..253571db79 --- /dev/null +++ b/icu4c/source/test/perf/perldriver/Format.pm @@ -0,0 +1,159 @@ +my $PLUS_MINUS = "±"; + +#|#--------------------------------------------------------------------- +#|# Format a confidence interval, as given by a Dataset. Output is as +#|# as follows: +#|# 241.23 - 241.98 => 241.5 +/- 0.3 +#|# 241.2 - 243.8 => 242 +/- 1 +#|# 211.0 - 241.0 => 226 +/- 15 or? 230 +/- 20 +#|# 220.3 - 234.3 => 227 +/- 7 +#|# 220.3 - 300.3 => 260 +/- 40 +#|# 220.3 - 1000 => 610 +/- 390 or? 600 +/- 400 +#|# 0.022 - 0.024 => 0.023 +/- 0.001 +#|# 0.022 - 0.032 => 0.027 +/- 0.005 +#|# 0.022 - 1.000 => 0.5 +/- 0.5 +#|# In other words, take one significant digit of the error value and +#|# display the mean to the same precision. +#|sub formatDataset { +#| my $ds = shift; +#| my $lower = $ds->getMean() - $ds->getError(); +#| my $upper = $ds->getMean() + $ds->getError(); +#| my $scale = 0; +#| # Find how many initial digits are the same +#| while ($lower < 1 || +#| int($lower) == int($upper)) { +#| $lower *= 10; +#| $upper *= 10; +#| $scale++; +#| } +#| while ($lower >= 10 && +#| int($lower) == int($upper)) { +#| $lower /= 10; +#| $upper /= 10; +#| $scale--; +#| } +#|} + +#--------------------------------------------------------------------- +# Format a number, optionally with a +/- delta, to n significant +# digits. +# +# @param significant digit, a value >= 1 +# @param multiplier +# @param time in seconds to be formatted +# @optional delta in seconds +# +# @return string of the form "23" or "23 +/- 10". +# +sub formatNumber { + my $sigdig = shift; + my $mult = shift; + my $a = shift; + my $delta = shift; # may be undef + + my $result = formatSigDig($sigdig, $a*$mult); + if (defined($delta)) { + my $d = formatSigDig($sigdig, $delta*$mult); + # restrict PRECISION of delta to that of main number + if ($result =~ /\.(\d+)/) { + # TODO make this work for values with all significant + # digits to the left of the decimal, e.g., 1234000. + + # TODO the other thing wrong with this is that it + # isn't rounding the $delta properly. Have to put + # this logic into formatSigDig(). + my $x = length($1); + $d =~ s/\.(\d{$x})\d+/.$1/; + } + $result .= " $PLUS_MINUS " . $d; + } + $result; +} + +#--------------------------------------------------------------------- +# Format a time, optionally with a +/- delta, to n significant +# digits. +# +# @param significant digit, a value >= 1 +# @param time in seconds to be formatted +# @optional delta in seconds +# +# @return string of the form "23 ms" or "23 +/- 10 ms". +# +sub formatSeconds { + my $sigdig = shift; + my $a = shift; + my $delta = shift; # may be undef + + my @MULT = (1 , 1e3, 1e6, 1e9); + my @SUFF = ('s' , 'ms', 'us', 'ns'); + + # Determine our scale + my $i = 0; + #always do seconds if the following line is commented out + ++$i while ($a*$MULT[$i] < 1 && $i < @MULT); + + formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i]; +} + +#--------------------------------------------------------------------- +# Format a percentage, optionally with a +/- delta, to n significant +# digits. +# +# @param significant digit, a value >= 1 +# @param value to be formatted, as a fraction, e.g. 0.5 for 50% +# @optional delta, as a fraction +# +# @return string of the form "23 %" or "23 +/- 10 %". +# +sub formatPercent { + my $sigdig = shift; + my $a = shift; + my $delta = shift; # may be undef + + formatNumber($sigdig, 100, $a, $delta) . ' %'; +} + +#--------------------------------------------------------------------- +# Format a number to n significant digits without using exponential +# notation. +# +# @param significant digit, a value >= 1 +# @param number to be formatted +# +# @return string of the form "1234" "12.34" or "0.001234". If +# number was negative, prefixed by '-'. +# +sub formatSigDig { + my $n = shift() - 1; + my $a = shift; + + local $_ = sprintf("%.${n}e", $a); + my $sign = (s/^-//) ? '-' : ''; + + my $a_e; + my $result; + if (/^(\d)\.(\d+)e([-+]\d+)$/) { + my ($d, $dn, $e) = ($1, $2, $3); + $a_e = $e; + $d .= $dn; + $e++; + $d .= '0' while ($e > length($d)); + while ($e < 1) { + $e++; + $d = '0' . $d; + } + if ($e == length($d)) { + $result = $sign . $d; + } else { + $result = $sign . substr($d, 0, $e) . '.' . substr($d, $e); + } + } else { + die "Can't parse $_"; + } + $result; +} + +1; + +#eof diff --git a/icu4c/source/test/perf/perldriver/Output.pm b/icu4c/source/test/perf/perldriver/Output.pm new file mode 100644 index 0000000000..34119605df --- /dev/null +++ b/icu4c/source/test/perf/perldriver/Output.pm @@ -0,0 +1,194 @@ +#!/usr/local/bin/perl + +use strict; + +my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"'; +my $outType = "HTML"; +my $html = "noName"; +my $inTable; +my @headers; +my @timetypes = ("per iteration", "per operation", "events", "per event"); +my %raw; +my $current; +my $exp = 0; + +sub startTest { + $current = shift; + $exp = 0; + outputData($current); +} + +sub startTable { + my $printEvents = shift; + $inTable = 1; + print HTML "
Test Name | "; + print HTML "Operations | "; + foreach $i (@timetypes) { + foreach $header (@headers) { + print HTML "$header $i | " unless ($i =~ /event/ && !$printEvents);
+ }
+ }
+ print HTML "
---|
Measuring $method using $pat, "); + if ($iterCount > 0) { + out("$iterCount iterations/pass, $NUMPASSES passes
\n"); + } else { + out(-$iterCount, " seconds/pass, $NUMPASSES passes\n"); + } + + # is $iterCount actually -seconds? + if ($iterCount < 0) { + + # calibrate: estimate ms/iteration + print "Calibrating..."; + my @t = callJava($method, $pat, -$CALIBRATE); + print "done.\n"; + + my @data = split(/\s+/, $t[0]->[2]); + my $timePerIter = 1.0e-3 * $data[0] / $data[2]; + + # determine iterations/pass + $iterCount = int(-$iterCount / $timePerIter + 0.5); + + out("Calibration pass ($CALIBRATE sec): ");
+ out("$data[0] ms, ");
+ out("$data[2] iterations = ");
+ out(formatSeconds(4, $timePerIter), "/iteration
\n");
+ }
+
+ # run passes
+ print "Measuring $iterCount iterations x $NUMPASSES passes...";
+ my @t = callJava($method, $pat, "$iterCount " x $NUMPASSES);
+ print "done.\n";
+ my @ms = ();
+ my @b; # scratch
+ for my $a (@t) {
+ # $a->[0]: method name, corresponds to $method
+ # $a->[1]: 'begin' data, == $iterCount
+ # $a->[2]: 'end' data, of the form
\n");
+ out("Events per iteration: $eventsPerIter
\n");
+
+ my @ms_str = @ms;
+ $ms_str[0] .= " (discarded)" if (@ms_str > 1);
+ out("Raw times (ms/pass): ", join(", ", @ms_str), "
\n");
+
+ ($iterCount, $eventsPerIter, @ms);
+}
+
+
+1;
+
+#eof
diff --git a/icu4c/source/test/perf/ubrkperf/UBrkPerf.pl b/icu4c/source/test/perf/ubrkperf/UBrkPerf.pl
new file mode 100755
index 0000000000..128a939924
--- /dev/null
+++ b/icu4c/source/test/perf/ubrkperf/UBrkPerf.pl
@@ -0,0 +1,76 @@
+#!/usr/bin/perl
+
+use strict;
+
+use lib '../perldriver';
+
+use PerfFramework;
+
+my $options = {
+ "title"=>"BreakIterator performance: ICU 2.0 vs. ICU 2.4",
+ "headers"=>"ICU20 ICU24",
+ "operationIs"=>"File size in code points",
+ "timePerOperationIs"=>"Time per code point",
+ "passes"=>"3",
+ #"time"=>"1.1",
+ "dataDir"=>"c:/src/perf/data",
+ "outputType"=>"HTML",
+ "outputDir"=>"../results"
+ };
+
+# programs
+# tests will be done for all the programs. Results will be stored and connected
+my $m1 = "-- -m char";
+my $m2 = "-- -m word";
+my $m3 = "-- -m line";
+my $m4 = "-- -m sentence";
+
+my $m;
+
+if(@_ >= 0) {
+ $m = "-- -m ".shift;
+} else {
+ $m = $m1;
+}
+
+my $p1 = "ubrkperf20.exe";
+my $p2 = "ubrkperf24.exe";
+
+my $dataFiles = {
+"en", ["thesis.txt",
+# #"2drvb10.txt",
+# #"ulyss10.txt",
+# "nvsbl10.txt",
+# "vfear11a.txt",
+# "TestNames_Asian.txt",
+# "TestNames_Chinese.txt",
+ "TestNames_Japanese.txt",
+# "TestNames_Japanese_h.txt",
+# "TestNames_Japanese_k.txt",
+# "TestNames_Korean.txt",
+ "TestNames_Latin.txt",
+# "TestNames_SerbianSH.txt",
+# "TestNames_SerbianSR.txt",
+# "TestNames_Thai.txt",
+# "Testnames_Russian.txt",
+],
+#"th", ["TestNames_Thai.txt", "th18057.txt"]
+};
+
+
+my $tests = {
+"TestForwardChar", ["$p1 $m1 TestICUForward", "$p2 $m1 TestICUForward"],
+"TestForwardWord", ["$p1 $m2 TestICUForward", "$p2 $m2 TestICUForward"],
+#"TestForwardLine", ["$p1 $m3 TestICUForward", "$p2 $m3 TestICUForward"],
+#"TestForwardSentence", ["$p1 $m4 TestICUForward", "$p2 $m4 TestICUForward"],
+
+#"TestIsBoundChar", ["$p1 $m1 TestICUIsBound", "$p2 $m1 TestICUIsBound"],
+#"TestIsBoundWord", ["$p1 $m2 TestICUIsBound", "$p2 $m2 TestICUIsBound"],
+#"TestIsBoundLine", ["$p1 $m3 TestICUIsBound", "$p2 $m3 TestICUIsBound"],
+#"TestIsBoundSentence", ["$p1 $m4 TestICUIsBound", "$p2 $m4 TestICUIsBound"],
+
+};
+
+runTests($options, $tests, $dataFiles);
+
+