Do integer mod via sum-of-digits technique. This benefits the date

code. Review URL: http://codereview.chromium.org/2876011 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4964 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2010-06-28 11:47:23 +00:00 · 2010-06-28 11:47:23 +00:00 · bcfaba24c1
commit bcfaba24c1
parent eaaa2d8559
7 changed files with 393 additions and 24 deletions
--- a/src/arm/codegen-arm.cc
+++ b/src/arm/codegen-arm.cc
@ -6257,7 +6257,6 @@ bool CodeGenerator::HasValidEntryRegisters() { return true; }
 #undef __
 #define __ ACCESS_MASM(masm)

-
 Handle<String> Reference::GetName() {
  ASSERT(type_ == NAMED);
  Property* property = expression_->AsProperty();
@ -6621,7 +6620,7 @@ void ConvertToDoubleStub::Generate(MacroAssembler* masm) {
  __ bind(&not_special);
  // Count leading zeros.  Uses mantissa for a scratch register on pre-ARM5.
  // Gets the wrong answer for 0, but we already checked for that case above.
-  __ CountLeadingZeros(source_, mantissa, zeros_);
+  __ CountLeadingZeros(zeros_, source_, mantissa);
  // Compute exponent and or it into the exponent register.
  // We use mantissa as a scratch register here.  Use a fudge factor to
  // divide the constant 31 + HeapNumber::kExponentBias, 0x41d, into two parts
@ -7350,7 +7349,7 @@ void GenericBinaryOpStub::HandleBinaryOpSlowCases(

    // If we have floating point hardware, inline ADD, SUB, MUL, and DIV,
    // using registers d7 and d6 for the double values.
-    if (use_fp_registers) {
+    if (CpuFeatures::IsSupported(VFP3)) {
      CpuFeatures::Scope scope(VFP3);
      __ mov(r7, Operand(rhs, ASR, kSmiTagSize));
      __ vmov(s15, r7);
@ -7358,8 +7357,12 @@ void GenericBinaryOpStub::HandleBinaryOpSlowCases(
      __ mov(r7, Operand(lhs, ASR, kSmiTagSize));
      __ vmov(s13, r7);
      __ vcvt_f64_s32(d6, s13);
+      if (!use_fp_registers) {
+        __ vmov(r2, r3, d7);
+        __ vmov(r0, r1, d6);
+      }
    } else {
-      // Write Smi from rhs to r3 and r2 in double format.  r3 is scratch.
+      // Write Smi from rhs to r3 and r2 in double format.  r9 is scratch.
      __ mov(r7, Operand(rhs));
      ConvertToDoubleStub stub1(r3, r2, r7, r9);
      __ push(lr);
@ -7434,12 +7437,15 @@ void GenericBinaryOpStub::HandleBinaryOpSlowCases(
    __ AllocateHeapNumber(r5, r4, r7, heap_number_map, &slow);
    }

-    if (use_fp_registers) {
+    if (CpuFeatures::IsSupported(VFP3)) {
      CpuFeatures::Scope scope(VFP3);
      // Convert smi in r0 to double in d7.
      __ mov(r7, Operand(r0, ASR, kSmiTagSize));
      __ vmov(s15, r7);
      __ vcvt_f64_s32(d7, s15);
+      if (!use_fp_registers) {
+        __ vmov(r2, r3, d7);
+      }
    } else {
      // Write Smi from r0 to r3 and r2 in double format.
      __ mov(r7, Operand(r0));
@ -7490,12 +7496,15 @@ void GenericBinaryOpStub::HandleBinaryOpSlowCases(
    __ AllocateHeapNumber(r5, r4, r7, heap_number_map, &slow);
    }

-    if (use_fp_registers) {
+    if (CpuFeatures::IsSupported(VFP3)) {
      CpuFeatures::Scope scope(VFP3);
      // Convert smi in r1 to double in d6.
      __ mov(r7, Operand(r1, ASR, kSmiTagSize));
      __ vmov(s13, r7);
      __ vcvt_f64_s32(d6, s13);
+      if (!use_fp_registers) {
+        __ vmov(r0, r1, d6);
+      }
    } else {
      // Write Smi from r1 to r1 and r0 in double format.
      __ mov(r7, Operand(r1));
@ -7942,6 +7951,173 @@ static void MultiplyByKnownInt2(
 }


+// This uses versions of the sum-of-digits-to-see-if-a-number-is-divisible-by-3
+// trick.  See http://en.wikipedia.org/wiki/Divisibility_rule
+// Takes the sum of the digits base (mask + 1) repeatedly until we have a
+// number from 0 to mask.  On exit the 'eq' condition flags are set if the
+// answer is exactly the mask.
+void IntegerModStub::DigitSum(MacroAssembler* masm,
+                              Register lhs,
+                              int mask,
+                              int shift,
+                              Label* entry) {
+  ASSERT(mask > 0);
+  ASSERT(mask <= 0xff);  // This ensures we don't need ip to use it.
+  Label loop;
+  __ bind(&loop);
+  __ and_(ip, lhs, Operand(mask));
+  __ add(lhs, ip, Operand(lhs, LSR, shift));
+  __ bind(entry);
+  __ cmp(lhs, Operand(mask));
+  __ b(gt, &loop);
+}
+
+
+void IntegerModStub::DigitSum(MacroAssembler* masm,
+                              Register lhs,
+                              Register scratch,
+                              int mask,
+                              int shift1,
+                              int shift2,
+                              Label* entry) {
+  ASSERT(mask > 0);
+  ASSERT(mask <= 0xff);  // This ensures we don't need ip to use it.
+  Label loop;
+  __ bind(&loop);
+  __ bic(scratch, lhs, Operand(mask));
+  __ and_(ip, lhs, Operand(mask));
+  __ add(lhs, ip, Operand(lhs, LSR, shift1));
+  __ add(lhs, lhs, Operand(scratch, LSR, shift2));
+  __ bind(entry);
+  __ cmp(lhs, Operand(mask));
+  __ b(gt, &loop);
+}
+
+
+// Splits the number into two halves (bottom half has shift bits).  The top
+// half is subtracted from the bottom half.  If the result is negative then
+// rhs is added.
+void IntegerModStub::ModGetInRangeBySubtraction(MacroAssembler* masm,
+                                                Register lhs,
+                                                int shift,
+                                                int rhs) {
+  int mask = (1 << shift) - 1;
+  __ and_(ip, lhs, Operand(mask));
+  __ sub(lhs, ip, Operand(lhs, LSR, shift), SetCC);
+  __ add(lhs, lhs, Operand(rhs), LeaveCC, mi);
+}
+
+
+void IntegerModStub::ModReduce(MacroAssembler* masm,
+                               Register lhs,
+                               int max,
+                               int denominator) {
+  int limit = denominator;
+  while (limit * 2 <= max) limit *= 2;
+  while (limit >= denominator) {
+    __ cmp(lhs, Operand(limit));
+    __ sub(lhs, lhs, Operand(limit), LeaveCC, ge);
+    limit >>= 1;
+  }
+}
+
+
+void IntegerModStub::ModAnswer(MacroAssembler* masm,
+                               Register result,
+                               Register shift_distance,
+                               Register mask_bits,
+                               Register sum_of_digits) {
+  __ add(result, mask_bits, Operand(sum_of_digits, LSL, shift_distance));
+  __ Ret();
+}
+
+
+// See comment for class.
+void IntegerModStub::Generate(MacroAssembler* masm) {
+  __ mov(lhs_, Operand(lhs_, LSR, shift_distance_));
+  __ bic(odd_number_, odd_number_, Operand(1));
+  __ mov(odd_number_, Operand(odd_number_, LSL, 1));
+  // We now have (odd_number_ - 1) * 2 in the register.
+  // Build a switch out of branches instead of data because it avoids
+  // having to teach the assembler about intra-code-object pointers
+  // that are not in relative branch instructions.
+  Label mod3, mod5, mod7, mod9, mod11, mod13, mod15, mod17, mod19;
+  Label mod21, mod23, mod25;
+  { Assembler::BlockConstPoolScope block_const_pool(masm);
+    __ add(pc, pc, Operand(odd_number_));
+    // When you read pc it is always 8 ahead, but when you write it you always
+    // write the actual value.  So we put in two nops to take up the slack.
+    __ nop();
+    __ nop();
+    __ b(&mod3);
+    __ b(&mod5);
+    __ b(&mod7);
+    __ b(&mod9);
+    __ b(&mod11);
+    __ b(&mod13);
+    __ b(&mod15);
+    __ b(&mod17);
+    __ b(&mod19);
+    __ b(&mod21);
+    __ b(&mod23);
+    __ b(&mod25);
+  }
+
+  // For each denominator we find a multiple that is almost only ones
+  // when expressed in binary.  Then we do the sum-of-digits trick for
+  // that number.  If the multiple is not 1 then we have to do a little
+  // more work afterwards to get the answer into the 0-denominator-1
+  // range.
+  DigitSum(masm, lhs_, 3, 2, &mod3);  // 3 = b11.
+  __ sub(lhs_, lhs_, Operand(3), LeaveCC, eq);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0xf, 4, &mod5);  // 5 * 3 = b1111.
+  ModGetInRangeBySubtraction(masm, lhs_, 2, 5);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 7, 3, &mod7);  // 7 = b111.
+  __ sub(lhs_, lhs_, Operand(7), LeaveCC, eq);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0x3f, 6, &mod9);  // 7 * 9 = b111111.
+  ModGetInRangeBySubtraction(masm, lhs_, 3, 9);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0x3f, 6, 3, &mod11);  // 5 * 11 = b110111.
+  ModReduce(masm, lhs_, 0x3f, 11);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0xff, 8, 5, &mod13);  // 19 * 13 = b11110111.
+  ModReduce(masm, lhs_, 0xff, 13);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0xf, 4, &mod15);  // 15 = b1111.
+  __ sub(lhs_, lhs_, Operand(15), LeaveCC, eq);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0xff, 8, &mod17);  // 15 * 17 = b11111111.
+  ModGetInRangeBySubtraction(masm, lhs_, 4, 17);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0xff, 8, 5, &mod19);  // 13 * 19 = b11110111.
+  ModReduce(masm, lhs_, 0xff, 19);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, 0x3f, 6, &mod21);  // 3 * 21 = b111111.
+  ModReduce(masm, lhs_, 0x3f, 21);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0xff, 8, 7, &mod23);  // 11 * 23 = b11111101.
+  ModReduce(masm, lhs_, 0xff, 23);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+
+  DigitSum(masm, lhs_, r5, 0x7f, 7, 6, &mod25);  // 5 * 25 = b1111101.
+  ModReduce(masm, lhs_, 0x7f, 25);
+  ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_);
+}
+
+
 const char* GenericBinaryOpStub::GetName() {
  if (name_ != NULL) return name_;
  const int len = 100;
@ -8069,7 +8245,7 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
    case Token::MOD: {
      Label not_smi;
      if (ShouldGenerateSmiCode() && specialized_on_rhs_) {
-        Label smi_is_unsuitable;
+        Label lhs_is_unsuitable;
        __ BranchOnNotSmi(lhs, &not_smi);
        if (IsPowerOf2(constant_rhs_)) {
          if (op_ == Token::MOD) {
@ -8090,14 +8266,14 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
            __ eor(rhs, rhs, Operand(0x80000000u), SetCC);
            // Next two instructions are conditional on the answer being -0.
            __ mov(rhs, Operand(Smi::FromInt(constant_rhs_)), LeaveCC, eq);
-            __ b(eq, &smi_is_unsuitable);
+            __ b(eq, &lhs_is_unsuitable);
            // We need to subtract the dividend.  Eg. -3 % 4 == -3.
            __ sub(result, rhs, Operand(Smi::FromInt(constant_rhs_)));
          } else {
            ASSERT(op_ == Token::DIV);
            __ tst(lhs,
                   Operand(0x80000000u | ((constant_rhs_ << kSmiTagSize) - 1)));
-            __ b(ne, &smi_is_unsuitable);  // Go slow on negative or remainder.
+            __ b(ne, &lhs_is_unsuitable);  // Go slow on negative or remainder.
            int shift = 0;
            int d = constant_rhs_;
            while ((d & 1) == 0) {
@ -8110,7 +8286,7 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
        } else {
          // Not a power of 2.
          __ tst(lhs, Operand(0x80000000u));
-          __ b(ne, &smi_is_unsuitable);
+          __ b(ne, &lhs_is_unsuitable);
          // Find a fixed point reciprocal of the divisor so we can divide by
          // multiplying.
          double divisor = 1.0 / constant_rhs_;
@ -8145,7 +8321,7 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
          // (lhs / rhs) where / indicates integer division.
          if (op_ == Token::DIV) {
            __ cmp(lhs, Operand(scratch, LSL, required_scratch_shift));
-            __ b(ne, &smi_is_unsuitable);  // There was a remainder.
+            __ b(ne, &lhs_is_unsuitable);  // There was a remainder.
            __ mov(result, Operand(scratch2, LSL, kSmiTagSize));
          } else {
            ASSERT(op_ == Token::MOD);
@ -8153,14 +8329,21 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
          }
        }
        __ Ret();
-        __ bind(&smi_is_unsuitable);
+        __ bind(&lhs_is_unsuitable);
      } else if (op_ == Token::MOD &&
                 runtime_operands_type_ != BinaryOpIC::HEAP_NUMBERS &&
                 runtime_operands_type_ != BinaryOpIC::STRINGS) {
        // Do generate a bit of smi code for modulus even though the default for
        // modulus is not to do it, but as the ARM processor has no coprocessor
-        // support for modulus checking for smis makes sense.
+        // support for modulus checking for smis makes sense.  We can handle
+        // 1 to 25 times any power of 2.  This covers over half the numbers from
+        // 1 to 100 including all of the first 25.  (Actually the constants < 10
+        // are handled above by reciprocal multiplication.  We only get here for
+        // those cases if the right hand side is not a constant or for cases
+        // like 192 which is 3*2^6 and ends up in the 3 case in the integer mod
+        // stub.)
        Label slow;
+        Label not_power_of_2;
        ASSERT(!ShouldGenerateSmiCode());
        ASSERT(kSmiTag == 0);  // Adjust code below.
        // Check for two positive smis.
@ -8168,13 +8351,42 @@ void GenericBinaryOpStub::Generate(MacroAssembler* masm) {
        __ tst(smi_test_reg, Operand(0x80000000u | kSmiTagMask));
        __ b(ne, &slow);
        // Check that rhs is a power of two and not zero.
+        Register mask_bits = r3;
        __ sub(scratch, rhs, Operand(1), SetCC);
        __ b(mi, &slow);
-        __ tst(rhs, scratch);
-        __ b(ne, &slow);
+        __ and_(mask_bits, rhs, Operand(scratch), SetCC);
+        __ b(ne, &not_power_of_2);
        // Calculate power of two modulus.
        __ and_(result, lhs, Operand(scratch));
        __ Ret();
+
+        __ bind(&not_power_of_2);
+        __ eor(scratch, scratch, Operand(mask_bits));
+        // At least two bits are set in the modulus.  The high one(s) are in
+        // mask_bits and the low one is scratch + 1.
+        __ and_(mask_bits, scratch, Operand(lhs));
+        Register shift_distance = scratch;
+        scratch = no_reg;
+
+        // The rhs consists of a power of 2 multiplied by some odd number.
+        // The power-of-2 part we handle by putting the corresponding bits
+        // from the lhs in the mask_bits register, and the power in the
+        // shift_distance register.  Shift distance is never 0 due to Smi
+        // tagging.
+        __ CountLeadingZeros(r4, shift_distance, shift_distance);
+        __ rsb(shift_distance, r4, Operand(32));
+
+        // Now we need to find out what the odd number is. The last bit is
+        // always 1.
+        Register odd_number = r4;
+        __ mov(odd_number, Operand(rhs, LSR, shift_distance));
+        __ cmp(odd_number, Operand(25));
+        __ b(gt, &slow);
+
+        IntegerModStub stub(
+            result, shift_distance, odd_number, mask_bits, lhs, r5);
+        __ Jump(stub.GetCode(), RelocInfo::CODE_TARGET);  // Tail call.
+
        __ bind(&slow);
      }
      HandleBinaryOpSlowCases(
--- a/src/arm/codegen-arm.h
+++ b/src/arm/codegen-arm.h
@ -881,6 +881,102 @@ class StringCompareStub: public CodeStub {
 };


+// This stub can do a fast mod operation without using fp.
+// It is tail called from the GenericBinaryOpStub and it always
+// returns an answer.  It never causes GC so it doesn't need a real frame.
+//
+// The inputs are always positive Smis.  This is never called
+// where the denominator is a power of 2.  We handle that separately.
+//
+// If we consider the denominator as an odd number multiplied by a power of 2,
+// then:
+// * The exponent (power of 2) is in the shift_distance register.
+// * The odd number is in the odd_number register.  It is always in the range
+//   of 3 to 25.
+// * The bits from the numerator that are to be copied to the answer (there are
+//   shift_distance of them) are in the mask_bits register.
+// * The other bits of the numerator have been shifted down and are in the lhs
+//   register.
+class IntegerModStub : public CodeStub {
+ public:
+  IntegerModStub(Register result,
+                 Register shift_distance,
+                 Register odd_number,
+                 Register mask_bits,
+                 Register lhs,
+                 Register scratch)
+      : result_(result),
+        shift_distance_(shift_distance),
+        odd_number_(odd_number),
+        mask_bits_(mask_bits),
+        lhs_(lhs),
+        scratch_(scratch) {
+    // We don't code these in the minor key, so they should always be the same.
+    // We don't really want to fix that since this stub is rather large and we
+    // don't want many copies of it.
+    ASSERT(shift_distance_.is(r9));
+    ASSERT(odd_number_.is(r4));
+    ASSERT(mask_bits_.is(r3));
+    ASSERT(scratch_.is(r5));
+  }
+
+ private:
+  Register result_;
+  Register shift_distance_;
+  Register odd_number_;
+  Register mask_bits_;
+  Register lhs_;
+  Register scratch_;
+
+  // Minor key encoding in 16 bits.
+  class ResultRegisterBits: public BitField<int, 0, 4> {};
+  class LhsRegisterBits: public BitField<int, 4, 4> {};
+
+  Major MajorKey() { return IntegerMod; }
+  int MinorKey() {
+    // Encode the parameters in a unique 16 bit value.
+    return ResultRegisterBits::encode(result_.code())
+           | LhsRegisterBits::encode(lhs_.code());
+  }
+
+  void Generate(MacroAssembler* masm);
+
+  const char* GetName() { return "IntegerModStub"; }
+
+  // Utility functions.
+  void DigitSum(MacroAssembler* masm,
+                Register lhs,
+                int mask,
+                int shift,
+                Label* entry);
+  void DigitSum(MacroAssembler* masm,
+                Register lhs,
+                Register scratch,
+                int mask,
+                int shift1,
+                int shift2,
+                Label* entry);
+  void ModGetInRangeBySubtraction(MacroAssembler* masm,
+                                  Register lhs,
+                                  int shift,
+                                  int rhs);
+  void ModReduce(MacroAssembler* masm,
+                 Register lhs,
+                 int max,
+                 int denominator);
+  void ModAnswer(MacroAssembler* masm,
+                 Register result,
+                 Register shift_distance,
+                 Register mask_bits,
+                 Register sum_of_digits);
+
+
+#ifdef DEBUG
+  void Print() { PrintF("IntegerModStub\n"); }
+#endif
+};
+
+
 // This stub can convert a signed int32 to a heap number (double).  It does
 // not work for int32s that are in Smi range!  No GC occurs during this stub
 // so you don't have to set up the frame.
--- a/src/arm/ic-arm.cc
+++ b/src/arm/ic-arm.cc
@ -1722,7 +1722,7 @@ static void ConvertIntToFloat(MacroAssembler* masm,
    // Count leading zeros.
    // Gets the wrong answer for 0, but we already checked for that case above.
    Register zeros = scratch2;
-    __ CountLeadingZeros(ival, scratch1, zeros);
+    __ CountLeadingZeros(zeros, ival, scratch1);

    // Compute exponent and or it into the exponent register.
    __ rsb(scratch1,
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@ -1686,14 +1686,19 @@ void MacroAssembler::AllocateHeapNumber(Register result,
 }


-void MacroAssembler::CountLeadingZeros(Register source,
-                                       Register scratch,
-                                       Register zeros) {
+void MacroAssembler::CountLeadingZeros(Register zeros,   // Answer.
+                                       Register source,  // Input.
+                                       Register scratch) {
+  ASSERT(!zeros.is(source) || !source.is(zeros));
+  ASSERT(!zeros.is(scratch));
+  ASSERT(!scratch.is(ip));
+  ASSERT(!source.is(ip));
+  ASSERT(!zeros.is(ip));
 #ifdef CAN_USE_ARMV5_INSTRUCTIONS
  clz(zeros, source);  // This instruction is only supported after ARM5.
 #else
  mov(zeros, Operand(0));
-  mov(scratch, source);
+  Move(scratch, source);
  // Top 16.
  tst(scratch, Operand(0xffff0000));
  add(zeros, zeros, Operand(16), LeaveCC, eq);
--- a/src/arm/macro-assembler-arm.h
+++ b/src/arm/macro-assembler-arm.h
@ -471,10 +471,12 @@ class MacroAssembler: public Assembler {

  // Count leading zeros in a 32 bit word.  On ARM5 and later it uses the clz
  // instruction.  On pre-ARM5 hardware this routine gives the wrong answer
-  // for 0 (31 instead of 32).
-  void CountLeadingZeros(Register source,
-                         Register scratch,
-                         Register zeros);
+  // for 0 (31 instead of 32).  Source and scratch can be the same in which case
+  // the source is clobbered.  Source and zeros can also be the same in which
+  // case scratch should be a different register.
+  void CountLeadingZeros(Register zeros,
+                         Register source,
+                         Register scratch);

  // ---------------------------------------------------------------------------
  // Runtime calls
--- a/src/code-stubs.h
+++ b/src/code-stubs.h
@ -46,6 +46,7 @@ namespace internal {
  V(RecordWrite)                         \
  V(ConvertToDouble)                     \
  V(WriteInt32ToHeapNumber)              \
+  V(IntegerMod)                          \
  V(StackCheck)                          \
  V(FastNewClosure)                      \
  V(FastNewContext)                      \
--- a/test/mjsunit/mod.js
+++ b/test/mjsunit/mod.js
@ -0,0 +1,53 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+function foo() {
+  for (var i = 1; i < 100; i++) {
+    var answer = 1;
+    for (var j = 1; j < 100; j++) {
+      if (answer == i) answer = 0;
+      // Positive case.
+      print(j + " % " + i + " = " + answer);
+      m = j % i;
+      assertEquals(answer, m, j + " % " + i);
+      m = j % (-i);
+      assertEquals(answer, m, j + " % -" + i);
+      // Negative case.
+      m = (-j) % i;
+      assertEquals(-answer, m, j + " % " + i);
+      // Check for negative zero.
+      if (answer == 0) assertEquals(-Infinity, 1/m);
+      m = (-j) % (-i);
+      assertEquals(-answer, m, j + " % -" + i);
+      // Check for negative zero.
+      if (answer == 0) assertEquals(-Infinity, 1/m);
+      answer++;
+    }
+  }
+}
+
+foo();