From f69a374304620da74bc8af292130a0755daf222b Mon Sep 17 00:00:00 2001
From: Andrew J Macheret
Date: Fri, 8 Jun 2007 20:35:05 +0000
Subject: [PATCH] ICU-5738 ported performance tests for icu4jni charset apis
into icu4j
X-SVN-Rev: 21671
---
.../test/perf/ConverterPerformanceTest.java | 322 +++++++++++
.../com/ibm/icu/dev/test/perf/runPerfConv.pl | 507 ++++++++++++++++++
2 files changed, 829 insertions(+)
create mode 100644 icu4j/src/com/ibm/icu/dev/test/perf/ConverterPerformanceTest.java
create mode 100755 icu4j/src/com/ibm/icu/dev/test/perf/runPerfConv.pl
diff --git a/icu4j/src/com/ibm/icu/dev/test/perf/ConverterPerformanceTest.java b/icu4j/src/com/ibm/icu/dev/test/perf/ConverterPerformanceTest.java
new file mode 100644
index 0000000000..dde39bb4d0
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/dev/test/perf/ConverterPerformanceTest.java
@@ -0,0 +1,322 @@
+package com.ibm.icu.dev.test.perf;
+
+import java.io.*;
+
+import sun.io.*;
+//import com.ibm.icu.converters.*;
+import com.ibm.icu.charset.*;
+import java.nio.charset.*;
+import java.nio.*;
+
+/**
+ * Copyright (c) 2002-2005, International Business Machines Corporation
+ * and others. All rights reserved.
+ *
+ * @author ram
+ */
+public class ConverterPerformanceTest extends PerfTest {
+ public static void main(String[] args) throws Exception {
+ new ConverterPerformanceTest().run(args);
+ }
+ String fileName=null;
+ String srcEncoding=null;
+ String testEncoderName=null;
+ char unicodeBuffer[] = null;
+ byte encBuffer[] = null;
+
+ protected void setup(String[] args) {
+ try{
+ // We only take 3 arguments file name and encoding,
+ if (args.length < 6 ) {
+ System.err.println("args.length = " + args.length);
+ for (int i=0; i src_encoding test ");
+ }
+ for(int i=0; i 1 then the first pass
+ # is discarded as a JIT warm-up pass.
+
+my $TABLEATTR = 'BORDER="1" CELLPADDING="4" CELLSPACING="0"';
+
+my $PLUS_MINUS = "±";
+
+if ($NUMPASSES < 3) {
+ die "Need at least 3 passes. One is discarded (JIT warmup) and need two to have 1 degree of freedom (t distribution).";
+}
+
+my $OUT; # see out()
+
+main();
+
+#---------------------------------------------------------------------
+# ...
+sub main {
+ my $date = localtime;
+ my $title = "ICU4J Performance Test $date";
+
+ my $html = $date;
+ $html =~ s/://g; # ':' illegal
+ $html =~ s/\s*\d+$//; # delete year
+ $html =~ s/^\w+\s*//; # delete dow
+ $html = "perf $html.html";
+
+ open(HTML,">$html") or die "Can't write to $html: $!";
+
+ print HTML <
+
+
+ $title
+
+
+EOF
+ print HTML "$title
\n";
+
+ print HTML "$TESTCLASS
\n";
+
+ my $raw = "";
+
+ for my $methodPair (@METHODS) {
+
+ my $testMethod = $methodPair->[0];
+ my $baselineMethod = $methodPair->[1];
+
+ print HTML "\n";
+ print HTML " $testMethod vs. $baselineMethod \n";
+
+ print HTML "\n";
+ print HTML "Options | $testMethod | ";
+ print HTML "$baselineMethod | Ratio | \n";
+
+ $OUT = '';
+
+ for my $pat (@OPTIONS) {
+ print HTML "@$pat[0], @$pat[2] | \n";
+
+ out("");
+
+ # measure the test method
+ out("");
+ print "\n$testMethod [@$pat]\n";
+ my $t = measure2($testMethod, $pat, -$DURATION);
+ out(" | ");
+ print HTML "", formatSeconds(4, $t->getMean(), $t->getError);
+ print HTML "/event | \n";
+
+ # measure baseline method
+ out("");
+ print "\n$baselineMethod [@$pat]\n";
+ my $b = measure2($baselineMethod, $pat, -$DURATION);
+ out(" | ");
+ print HTML "", formatSeconds(4, $b->getMean(), $t->getError);
+ print HTML "/event | \n";
+
+ out(" ");
+
+ # output ratio
+ my $r = $t->divide($b);
+ my $mean = $r->getMean() - 1;
+ my $color = $mean < 0 ? "RED" : "BLACK";
+ print HTML "", formatPercent(3, $mean, $r->getError);
+ print HTML " | \n";
+ }
+
+ print HTML " \n";
+
+ print HTML "Raw data: \n";
+ print HTML $OUT;
+ print HTML " |
\n";
+ }
+
+ print HTML <
+
+EOF
+ close(HTML) or die "Can't close $html: $!";
+}
+
+#---------------------------------------------------------------------
+# Append text to the global variable $OUT
+sub out {
+ $OUT .= join('', @_);
+}
+
+#---------------------------------------------------------------------
+# Append text to the global variable $OUT
+sub outln {
+ $OUT .= join('', @_) . "\n";
+}
+
+#---------------------------------------------------------------------
+# Measure a given test method with a give test pattern using the
+# global run parameters.
+#
+# @param the method to run
+# @param the pattern defining characters to test
+# @param if >0 then the number of iterations per pass. If <0 then
+# (negative of) the number of seconds per pass.
+#
+# @return a Dataset object, scaled by iterations per pass and
+# events per iteration, to give time per event
+#
+sub measure2 {
+ my @data = measure1(@_);
+ my $iterPerPass = shift(@data);
+ my $eventPerIter = shift(@data);
+
+ shift(@data) if (@data > 1); # discard first run
+
+ my $ds = Dataset->new(@data);
+ $ds->setScale(1.0e-3 / ($iterPerPass * $eventPerIter));
+ $ds;
+}
+
+#---------------------------------------------------------------------
+# Measure a given test method with a give test pattern using the
+# global run parameters.
+#
+# @param the method to run
+# @param the pattern defining characters to test
+# @param if >0 then the number of iterations per pass. If <0 then
+# (negative of) the number of seconds per pass.
+#
+# @return array of:
+# [0] iterations per pass
+# [1] events per iteration
+# [2..] ms reported for each pass, in order
+#
+sub measure1 {
+ my $method = shift;
+ my $pat = shift;
+ my $iterCount = shift; # actually might be -seconds/pass
+
+ out("Measuring $method for input file @$pat[0] for encoding @$pat[2] , ");
+ if ($iterCount > 0) {
+ out("$iterCount iterations/pass, $NUMPASSES passes
\n");
+ } else {
+ out(-$iterCount, " seconds/pass, $NUMPASSES passes\n");
+ }
+
+ # is $iterCount actually -seconds/pass?
+ if ($iterCount < 0) {
+
+ # calibrate: estimate ms/iteration
+ print "Calibrating...";
+ my @t = callJava($method, $pat, -$CALIBRATE, 1);
+ print "done.\n";
+
+ my @data = split(/\s+/, $t[0]->[2]);
+ $data[0] *= 1.0e+3;
+
+ my $timePerIter = 1.0e-3 * $data[0] / $data[1];
+
+ # determine iterations/pass
+ $iterCount = int(-$iterCount / $timePerIter + 0.5);
+
+ out("Calibration pass ($CALIBRATE sec): ");
+ out("$data[0] ms, ");
+ out("$data[1] iterations = ");
+ out(formatSeconds(4, $timePerIter), "/iteration
\n");
+ }
+
+ # run passes
+ print "Measuring $iterCount iterations x $NUMPASSES passes...";
+ my @t = callJava($method, $pat, $iterCount, $NUMPASSES);
+ print "done.\n";
+ my @ms = ();
+ my @b; # scratch
+ for my $a (@t) {
+ # $a->[0]: method name, corresponds to $method
+ # $a->[1]: 'begin' data, == $iterCount
+ # $a->[2]: 'end' data, of the form
+ # $a->[3...]: gc messages from JVM during pass
+ @b = split(/\s+/, $a->[2]);
+ push(@ms, $b[0] * 1.0e+3);
+ }
+ my $eventsPerIter = $b[2];
+
+ out("Iterations per pass: $iterCount
\n");
+ out("Events per iteration: $eventsPerIter
\n");
+
+ my @ms_str = @ms;
+ $ms_str[0] .= " (discarded)" if (@ms_str > 1);
+ out("Raw times (ms/pass): ", join(", ", @ms_str), "
\n");
+
+ ($iterCount, $eventsPerIter, @ms);
+}
+
+#---------------------------------------------------------------------
+# Invoke java to run $TESTCLASS, passing it the given parameters.
+#
+# @param the method to run
+# @param the number of iterations, or if negative, the duration
+# in seconds. If more than on pass is desired, pass in
+# a string, e.g., "100 100 100".
+# @param the pattern defining characters to test
+#
+# @return an array of results. Each result is an array REF
+# describing one pass. The array REF contains:
+# ->[0]: The method name as reported
+# ->[1]: The params on the '= begin ...' line
+# ->[2]: The params on the '= end ...' line
+# ->[3..]: GC messages from the JVM, if any
+#
+sub callJava {
+ my $method = shift;
+ my $pat = shift;
+ my $n = shift;
+ my $passes = shift;
+ my $fileName = $SOURCEDIR.@$pat[0] ;
+ my $n = ($n < 0) ? "-t ".(-$n) : "-i ".$n;
+ my $cmd = "c:\\j2sdk1.4.2_14\\bin\\java -classpath ;c:\\svn\\icu4j\\classes; $TESTCLASS $method $n -p $passes file_name $fileName src_encoding @$pat[1] test @$pat[2]";
+ print "[$cmd]\n"; # for debugging
+ open(PIPE, "$cmd|") or die "Can't run \"$cmd\"";
+ my @out;
+ while () {
+ push(@out, $_);
+ }
+ close(PIPE) or die "Java failed: \"$cmd\"";
+
+ @out = grep(!/^\#/, @out); # filter out comments
+
+ #print "[", join("\n", @out), "]\n";
+
+ my @results;
+ my $method = '';
+ my $data = [];
+ foreach (@out) {
+ next unless (/\S/);
+
+ if (/^=\s*(\w+)\s*(\w+)\s*(.*)/) {
+ my ($m, $state, $d) = ($1, $2, $3);
+ #print "$_ => [[$m $state $data]]\n";
+ if ($state eq 'begin') {
+ die "$method was begun but not finished" if ($method);
+ $method = $m;
+ push(@$data, $d);
+ push(@$data, ''); # placeholder for end data
+ } elsif ($state eq 'end') {
+ if ($m ne $method) {
+ die "$method end does not match: $_";
+ }
+ $data->[1] = $d; # insert end data at [1]
+ #print "#$method:", join(";",@$data), "\n";
+ unshift(@$data, $method); # add method to start
+
+ push(@results, $data);
+ $method = '';
+ $data = [];
+ } else {
+ die "Can't parse: $_";
+ }
+ }
+
+ elsif (/^\[/) {
+ if ($method) {
+ push(@$data, $_);
+ } else {
+ # ignore extraneous GC notices
+ }
+ }
+
+ else {
+ die "Can't parse: $_";
+ }
+ }
+
+ die "$method was begun but not finished" if ($method);
+
+ @results;
+}
+
+#|#---------------------------------------------------------------------
+#|# Format a confidence interval, as given by a Dataset. Output is as
+#|# as follows:
+#|# 241.23 - 241.98 => 241.5 +/- 0.3
+#|# 241.2 - 243.8 => 242 +/- 1
+#|# 211.0 - 241.0 => 226 +/- 15 or? 230 +/- 20
+#|# 220.3 - 234.3 => 227 +/- 7
+#|# 220.3 - 300.3 => 260 +/- 40
+#|# 220.3 - 1000 => 610 +/- 390 or? 600 +/- 400
+#|# 0.022 - 0.024 => 0.023 +/- 0.001
+#|# 0.022 - 0.032 => 0.027 +/- 0.005
+#|# 0.022 - 1.000 => 0.5 +/- 0.5
+#|# In other words, take one significant digit of the error value and
+#|# display the mean to the same precision.
+#|sub formatDataset {
+#| my $ds = shift;
+#| my $lower = $ds->getMean() - $ds->getError();
+#| my $upper = $ds->getMean() + $ds->getError();
+#| my $scale = 0;
+#| # Find how many initial digits are the same
+#| while ($lower < 1 ||
+#| int($lower) == int($upper)) {
+#| $lower *= 10;
+#| $upper *= 10;
+#| $scale++;
+#| }
+#| while ($lower >= 10 &&
+#| int($lower) == int($upper)) {
+#| $lower /= 10;
+#| $upper /= 10;
+#| $scale--;
+#| }
+#|}
+
+#---------------------------------------------------------------------
+# Format a number, optionally with a +/- delta, to n significant
+# digits.
+#
+# @param significant digit, a value >= 1
+# @param multiplier
+# @param time in seconds to be formatted
+# @optional delta in seconds
+#
+# @return string of the form "23" or "23 +/- 10".
+#
+sub formatNumber {
+ my $sigdig = shift;
+ my $mult = shift;
+ my $a = shift;
+ my $delta = shift; # may be undef
+
+ my $result = formatSigDig($sigdig, $a*$mult);
+ if (defined($delta)) {
+ my $d = formatSigDig($sigdig, $delta*$mult);
+ # restrict PRECISION of delta to that of main number
+ if ($result =~ /\.(\d+)/) {
+ # TODO make this work for values with all significant
+ # digits to the left of the decimal, e.g., 1234000.
+
+ # TODO the other thing wrong with this is that it
+ # isn't rounding the $delta properly. Have to put
+ # this logic into formatSigDig().
+ my $x = length($1);
+ $d =~ s/\.(\d{$x})\d+/.$1/;
+ }
+ $result .= " $PLUS_MINUS " . $d;
+ }
+ $result;
+}
+
+#---------------------------------------------------------------------
+# Format a time, optionally with a +/- delta, to n significant
+# digits.
+#
+# @param significant digit, a value >= 1
+# @param time in seconds to be formatted
+# @optional delta in seconds
+#
+# @return string of the form "23 ms" or "23 +/- 10 ms".
+#
+sub formatSeconds {
+ my $sigdig = shift;
+ my $a = shift;
+ my $delta = shift; # may be undef
+
+ my @MULT = (1 , 1e3, 1e6, 1e9);
+ my @SUFF = ('s' , 'ms', 'us', 'ns');
+
+ # Determine our scale
+ my $i = 0;
+ ++$i while ($a*$MULT[$i] < 1 && $i < @MULT);
+
+ formatNumber($sigdig, $MULT[$i], $a, $delta) . ' ' . $SUFF[$i];
+}
+
+#---------------------------------------------------------------------
+# Format a percentage, optionally with a +/- delta, to n significant
+# digits.
+#
+# @param significant digit, a value >= 1
+# @param value to be formatted, as a fraction, e.g. 0.5 for 50%
+# @optional delta, as a fraction
+#
+# @return string of the form "23 %" or "23 +/- 10 %".
+#
+sub formatPercent {
+ my $sigdig = shift;
+ my $a = shift;
+ my $delta = shift; # may be undef
+
+ formatNumber($sigdig, 100, $a, $delta) . ' %';
+}
+
+#---------------------------------------------------------------------
+# Format a number to n significant digits without using exponential
+# notation.
+#
+# @param significant digit, a value >= 1
+# @param number to be formatted
+#
+# @return string of the form "1234" "12.34" or "0.001234". If
+# number was negative, prefixed by '-'.
+#
+sub formatSigDig {
+ my $n = shift() - 1;
+ my $a = shift;
+
+ local $_ = sprintf("%.${n}e", $a);
+ my $sign = (s/^-//) ? '-' : '';
+
+ my $a_e;
+ my $result;
+ if (/^(\d)\.(\d+)e([-+]\d+)$/) {
+ my ($d, $dn, $e) = ($1, $2, $3);
+ $a_e = $e;
+ $d .= $dn;
+ $e++;
+ $d .= '0' while ($e > length($d));
+ while ($e < 1) {
+ $e++;
+ $d = '0' . $d;
+ }
+ if ($e == length($d)) {
+ $result = $sign . $d;
+ } else {
+ $result = $sign . substr($d, 0, $e) . '.' . substr($d, $e);
+ }
+ } else {
+ die "Can't parse $_";
+ }
+ $result;
+}
+
+#eof