scuffed-code/icu4c/source/i18n/digitlst.cpp
2003-11-17 17:47:54 +00:00

685 lines
20 KiB
C++

/*
**********************************************************************
* Copyright (C) 1997-2001, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File DIGITLST.CPP
*
* Modification History:
*
* Date Name Description
* 03/21/97 clhuang Converted from java.
* 03/21/97 clhuang Implemented with new APIs.
* 03/27/97 helena Updated to pass the simple test after code review.
* 03/31/97 aliu Moved isLONG_MIN to here, and fixed it.
* 04/15/97 aliu Changed MAX_COUNT to DBL_DIG. Changed Digit to char.
* Reworked representation by replacing fDecimalAt
* with fExponent.
* 04/16/97 aliu Rewrote set() and getDouble() to use sprintf/atof
* to do digit conversion.
* 09/09/97 aliu Modified for exponential notation support.
* 08/02/98 stephen Added nearest/even rounding
* Fixed bug in fitsIntoLong
******************************************************************************
*/
#include "unicode/putil.h"
#include "digitlst.h"
#include "cstring.h"
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>
// ***************************************************************************
// class DigitList
// This class handles the transcoding between numeric values and strings of
// characters. Only handles as non-negative numbers.
// ***************************************************************************
/**
* This is the zero digit. Array elements fDigits[i] have values from
* kZero to kZero + 9. Typically, this is '0'.
*/
#define kZero '0'
static char gDecimal = 0;
/* Only for 32 bit numbers. Ignore the negative sign. */
static const char LONG_MIN_REP[] = "2147483648";
static const char I64_MIN_REP[] = "9223372036854775808";
static const int64_t I64_MIN_VALUE = U_INT64_MIN;
enum {
LONG_MIN_REP_LENGTH = sizeof(LONG_MIN_REP) - 1, //Ignore the NULL at the end
I64_MIN_REP_LENGTH = sizeof(I64_MIN_REP) - 1 //Ignore the NULL at the end
};
U_NAMESPACE_BEGIN
// -------------------------------------
// default constructor
DigitList::DigitList()
{
clear();
}
// -------------------------------------
DigitList::~DigitList()
{
}
// -------------------------------------
// copy constructor
DigitList::DigitList(const DigitList &other)
{
fDigits = fDecimalDigits + 1; // skip the decimal
*this = other;
}
// -------------------------------------
// assignment operator
DigitList&
DigitList::operator=(const DigitList& other)
{
if (this != &other)
{
fDecimalAt = other.fDecimalAt;
fCount = other.fCount;
fIsPositive = other.fIsPositive;
uprv_strncpy(fDigits, other.fDigits, fCount);
}
return *this;
}
// -------------------------------------
UBool
DigitList::operator==(const DigitList& that) const
{
return ((this == &that) ||
(fDecimalAt == that.fDecimalAt &&
fCount == that.fCount &&
fIsPositive == that.fIsPositive &&
uprv_strncmp(fDigits, that.fDigits, fCount) == 0));
}
// -------------------------------------
// Resets the digit list; sets all the digits to zero.
void
DigitList::clear()
{
fDigits = fDecimalDigits + 1; // skip the decimal
fDecimalAt = 0;
fCount = 0;
fIsPositive = TRUE;
// Don't bother initializing fDigits because fCount is 0.
}
// -------------------------------------
/**
* Formats a number into a base 10 string representation, and NULL terminates it.
* @param number The number to format
* @param outputStr The string to output to
* @param outputLen The maximum number of characters to put into outputStr
* (including NULL).
* @return the number of digits written, not including the sign.
*/
static int32_t
formatBase10(int64_t number, char *outputStr, int32_t outputLen)
{
char buffer[MAX_DIGITS + 1];
int32_t bufferLen;
if (outputLen > MAX_DIGITS) {
outputLen = MAX_DIGITS; // Ignore NULL
}
else if (outputLen < 3) {
return 0; // Not enough room
}
bufferLen = outputLen;
if (number < 0) { // Negative numbers are slightly larger than a postive
buffer[bufferLen--] = (char)(-(number % 10) + kZero);
number /= -10;
*(outputStr++) = '-';
}
else {
*(outputStr++) = '+'; // allow +0
}
while (bufferLen >= 0 && number) { // Output the number
buffer[bufferLen--] = (char)(number % 10 + kZero);
number /= 10;
}
outputLen -= bufferLen++;
while (bufferLen <= MAX_DIGITS) { // Copy the number to output
*(outputStr++) = buffer[bufferLen++];
}
*outputStr = 0; // NULL terminate.
return outputLen;
}
/**
* Currently, getDouble() depends on atof() to do its conversion.
*
* WARNING!!
* This is an extremely costly function. ~1/2 of the conversion time
* can be linked to this function.
*/
double
DigitList::getDouble()
{
double value;
if (fCount == 0) {
value = 0.0;
}
else {
if (!gDecimal) {
char rep[MAX_DIGITS];
// For machines that decide to change the decimal on you,
// and try to be too smart with localization.
// This normally should be just a '.'.
sprintf(rep, "%+1.1f", 1.0);
gDecimal = rep[2];
}
*fDecimalDigits = gDecimal;
*(fDigits+fCount) = 'e'; // add an e after the digits.
formatBase10(fDecimalAt,
fDigits + fCount + 1, // skip the 'e'
MAX_DEC_DIGITS - fCount - 3); // skip the 'e' and '.'
value = atof(fDecimalDigits);
}
return fIsPositive ? value : -value;
}
// -------------------------------------
/**
* Make sure that fitsIntoLong() is called before calling this function.
*/
int32_t DigitList::getLong()
{
if (fCount == fDecimalAt) {
int32_t value;
fDigits[fCount] = 0; // NULL terminate
// This conversion is bad on 64-bit platforms when we want to
// be able to return a 64-bit number [grhoten]
*fDecimalDigits = fIsPositive ? '+' : '-';
value = (int32_t)atol(fDecimalDigits);
return value;
}
else {
// This is 100% accurate in c++ because if we are representing
// an integral value, we suffer nothing in the conversion to
// double. If we are to support 64-bit longs later, getLong()
// must be rewritten. [LIU]
return (int32_t)getDouble();
}
}
/**
* Make sure that fitsIntoInt64() is called before calling this function.
*/
int64_t DigitList::getInt64()
{
if (fCount == fDecimalAt) {
uint64_t value;
fDigits[fCount] = 0; // NULL terminate
// This conversion is bad on 64-bit platforms when we want to
// be able to return a 64-bit number [grhoten]
*fDecimalDigits = fIsPositive ? '+' : '-';
if (fCount < LONG_MIN_REP_LENGTH) {
return (int64_t)atol(fDecimalDigits);
}
// too big for atol, hand-roll atoi64
value = 0;
for (int i = 0; i < fCount; ++i) {
int v = fDigits[i] - kZero;
value = value * (uint64_t)10 + (uint64_t)v;
}
if (!fIsPositive) {
value = ~value;
value += 1;
}
int64_t svalue = (int64_t)value;
return svalue;
}
else {
// todo: figure out best approach
// This is 100% accurate in c++ because if we are representing
// an integral value, we suffer nothing in the conversion to
// double. If we are to support 64-bit longs later, getLong()
// must be rewritten. [LIU]
return (int64_t)getDouble();
}
}
/**
* Return true if the number represented by this object can fit into
* a long.
*/
UBool
DigitList::fitsIntoLong(UBool ignoreNegativeZero)
{
// Figure out if the result will fit in a long. We have to
// first look for nonzero digits after the decimal point;
// then check the size.
// Trim trailing zeros after the decimal point. This does not change
// the represented value.
while (fCount > fDecimalAt && fCount > 0 && fDigits[fCount - 1] == kZero)
--fCount;
if (fCount == 0) {
// Positive zero fits into a long, but negative zero can only
// be represented as a double. - bug 4162852
return fIsPositive || ignoreNegativeZero;
}
// initializeLONG_MIN_REP();
// If the digit list represents a double or this number is too
// big for a long.
if (fDecimalAt < fCount || fDecimalAt > LONG_MIN_REP_LENGTH)
return FALSE;
// If number is small enough to fit in a long
if (fDecimalAt < LONG_MIN_REP_LENGTH)
return TRUE;
// At this point we have fDecimalAt == fCount, and fCount == LONG_MIN_REP_LENGTH.
// The number will overflow if it is larger than LONG_MAX
// or smaller than LONG_MIN.
for (int32_t i=0; i<fCount; ++i)
{
char dig = fDigits[i],
max = LONG_MIN_REP[i];
if (dig > max)
return FALSE;
if (dig < max)
return TRUE;
}
// At this point the first count digits match. If fDecimalAt is less
// than count, then the remaining digits are zero, and we return true.
if (fCount < fDecimalAt)
return TRUE;
// Now we have a representation of Long.MIN_VALUE, without the leading
// negative sign. If this represents a positive value, then it does
// not fit; otherwise it fits.
return !fIsPositive;
}
/**
* Return true if the number represented by this object can fit into
* a long.
*/
UBool
DigitList::fitsIntoInt64(UBool ignoreNegativeZero)
{
// Figure out if the result will fit in a long. We have to
// first look for nonzero digits after the decimal point;
// then check the size.
// Trim trailing zeros after the decimal point. This does not change
// the represented value.
while (fCount > fDecimalAt && fCount > 0 && fDigits[fCount - 1] == kZero)
--fCount;
if (fCount == 0) {
// Positive zero fits into a long, but negative zero can only
// be represented as a double. - bug 4162852
return fIsPositive || ignoreNegativeZero;
}
// initializeLONG_MIN_REP();
// If the digit list represents a double or this number is too
// big for a long.
if (fDecimalAt < fCount || fDecimalAt > I64_MIN_REP_LENGTH)
return FALSE;
// If number is small enough to fit in an int64
if (fDecimalAt < I64_MIN_REP_LENGTH)
return TRUE;
// At this point we have fDecimalAt == fCount, and fCount == INT64_MIN_REP_LENGTH.
// The number will overflow if it is larger than U_INT64_MAX
// or smaller than U_INT64_MIN.
for (int32_t i=0; i<fCount; ++i)
{
char dig = fDigits[i],
max = I64_MIN_REP[i];
if (dig > max)
return FALSE;
if (dig < max)
return TRUE;
}
// At this point the first count digits match. If fDecimalAt is less
// than count, then the remaining digits are zero, and we return true.
if (fCount < fDecimalAt)
return TRUE;
// Now we have a representation of INT64_MIN_VALUE, without the leading
// negative sign. If this represents a positive value, then it does
// not fit; otherwise it fits.
return !fIsPositive;
}
// -------------------------------------
void
DigitList::set(int32_t source, int32_t maximumDigits)
{
set((int64_t)source, maximumDigits);
}
// -------------------------------------
/**
* @param maximumDigits The maximum digits to be generated. If zero,
* there is no maximum -- generate all digits.
*/
void
DigitList::set(int64_t source, int32_t maximumDigits)
{
fCount = fDecimalAt = formatBase10(source, fDecimalDigits, MAX_DIGITS);
fIsPositive = (*fDecimalDigits == '+');
// Don't copy trailing zeros
while (fCount > 1 && fDigits[fCount - 1] == kZero)
--fCount;
if(maximumDigits > 0)
round(maximumDigits);
}
/**
* Set the digit list to a representation of the given double value.
* This method supports both fixed-point and exponential notation.
* @param source Value to be converted; must not be Inf, -Inf, Nan,
* or a value <= 0.
* @param maximumDigits The most fractional or total digits which should
* be converted. If total digits, and the value is zero, then
* there is no maximum -- generate all digits.
* @param fixedPoint If true, then maximumDigits is the maximum
* fractional digits to be converted. If false, total digits.
*/
void
DigitList::set(double source, int32_t maximumDigits, UBool fixedPoint)
{
// for now, simple implementation; later, do proper IEEE stuff
char rep[MAX_DIGITS + 8]; // Extra space for '+', '.', e+NNN, and '\0' (actually +8 is enough)
char *digitPtr = fDigits;
char *repPtr = rep + 2; // +2 to skip the sign and decimal
int32_t exponent = 0;
fIsPositive = !uprv_isNegative(source); // Allow +0 and -0
// Generate a representation of the form /[+-][0-9]+e[+-][0-9]+/
sprintf(rep, "%+1.*e", MAX_DBL_DIGITS - 1, source);
fDecimalAt = 0;
rep[2] = rep[1]; // remove decimal
while (*repPtr == kZero) {
repPtr++;
fDecimalAt--; // account for leading zeros
}
while (*repPtr != 'e') {
*(digitPtr++) = *(repPtr++);
}
fCount = MAX_DBL_DIGITS + fDecimalAt;
// Parse an exponent of the form /[eE][+-][0-9]+/
UBool negExp = (*(++repPtr) == '-');
while (*(++repPtr) != 0) {
exponent = 10*exponent + *repPtr - kZero;
}
if (negExp) {
exponent = -exponent;
}
fDecimalAt += exponent + 1; // +1 for decimal removal
// The negative of the exponent represents the number of leading
// zeros between the decimal and the first non-zero digit, for
// a value < 0.1 (e.g., for 0.00123, -decimalAt == 2). If this
// is more than the maximum fraction digits, then we have an underflow
// for the printed representation.
if (fixedPoint && -fDecimalAt >= maximumDigits)
{
// If we round 0.0009 to 3 fractional digits, then we have to
// create a new one digit in the least significant location.
if (-fDecimalAt == maximumDigits && shouldRoundUp(0)) {
fCount = 1;
++fDecimalAt;
fDigits[0] = (char)'1';
} else {
// Handle an underflow to zero when we round something like
// 0.0009 to 2 fractional digits.
fCount = 0;
}
return;
}
// Eliminate digits beyond maximum digits to be displayed.
// Round up if appropriate. Do NOT round in the special
// case where maximumDigits == 0 and fixedPoint is FALSE.
if (fixedPoint || (0 < maximumDigits && maximumDigits < fCount)) {
round(fixedPoint ? (maximumDigits + fDecimalAt) : maximumDigits);
}
else {
// Eliminate trailing zeros.
while (fCount > 1 && fDigits[fCount - 1] == kZero)
--fCount;
}
}
// -------------------------------------
/**
* Round the representation to the given number of digits.
* @param maximumDigits The maximum number of digits to be shown.
* Upon return, count will be less than or equal to maximumDigits.
*/
void
DigitList::round(int32_t maximumDigits)
{
// Eliminate digits beyond maximum digits to be displayed.
// Round up if appropriate.
if (maximumDigits >= 0 && maximumDigits < fCount)
{
if (shouldRoundUp(maximumDigits)) {
// Rounding up involved incrementing digits from LSD to MSD.
// In most cases this is simple, but in a worst case situation
// (9999..99) we have to adjust the decimalAt value.
while (--maximumDigits >= 0 && ++fDigits[maximumDigits] > '9')
;
if (maximumDigits < 0)
{
// We have all 9's, so we increment to a single digit
// of one and adjust the exponent.
fDigits[0] = (char) '1';
++fDecimalAt;
maximumDigits = 1; // Adjust the count
}
else
{
++maximumDigits; // Increment for use as count
}
}
fCount = maximumDigits;
}
// Eliminate trailing zeros.
while (fCount > 1 && fDigits[fCount-1] == kZero) {
--fCount;
}
}
/**
* Return true if truncating the representation to the given number
* of digits will result in an increment to the last digit. This
* method implements half-even rounding, the default rounding mode.
* [bnf]
* @param maximumDigits the number of digits to keep, from 0 to
* <code>count-1</code>. If 0, then all digits are rounded away, and
* this method returns true if a one should be generated (e.g., formatting
* 0.09 with "#.#").
* @return true if digit <code>maximumDigits-1</code> should be
* incremented
*/
UBool DigitList::shouldRoundUp(int32_t maximumDigits) {
// Implement IEEE half-even rounding
if (fDigits[maximumDigits] == '5' ) {
for (int i=maximumDigits+1; i<fCount; ++i) {
if (fDigits[i] != kZero) {
return TRUE;
}
}
return maximumDigits > 0 && (fDigits[maximumDigits-1] % 2 != 0);
}
return (fDigits[maximumDigits] > '5');
}
// -------------------------------------
// In the Java implementation, we need a separate set(long) because 64-bit longs
// have too much precision to fit into a 64-bit double. In C++, longs can just
// be passed to set(double) as long as they are 32 bits in size. We currently
// don't implement 64-bit longs in C++, although the code below would work for
// that with slight modifications. [LIU]
/*
void
DigitList::set(long source)
{
// handle the special case of zero using a standard exponent of 0.
// mathematically, the exponent can be any value.
if (source == 0)
{
fcount = 0;
fDecimalAt = 0;
return;
}
// we don't accept negative numbers, with the exception of long_min.
// long_min is treated specially by being represented as long_max+1,
// which is actually an impossible signed long value, so there is no
// ambiguity. we do this for convenience, so digitlist can easily
// represent the digits of a long.
bool islongmin = (source == long_min);
if (islongmin)
{
source = -(source + 1); // that is, long_max
islongmin = true;
}
sprintf(fdigits, "%d", source);
// now we need to compute the exponent. it's easy in this case; it's
// just the same as the count. e.g., 0.123 * 10^3 = 123.
fcount = strlen(fdigits);
fDecimalAt = fcount;
// here's how we represent long_max + 1. note that we always know
// that the last digit of long_max will not be 9, because long_max
// is of the form (2^n)-1.
if (islongmin)
++fdigits[fcount-1];
// finally, we trim off trailing zeros. we don't alter fDecimalAt,
// so this has no effect on the represented value. we know the first
// digit is non-zero (see code above), so we only have to check down
// to fdigits[1].
while (fcount > 1 && fdigits[fcount-1] == kzero)
--fcount;
}
*/
/**
* Return true if this object represents the value zero. Anything with
* no digits, or all zero digits, is zero, regardless of fDecimalAt.
*/
UBool
DigitList::isZero() const
{
for (int32_t i=0; i<fCount; ++i)
if (fDigits[i] != kZero)
return FALSE;
return TRUE;
}
/**
* We represent LONG_MIN internally as LONG_MAX + 1. This is actually an impossible
* value, for positive long integers, so we are safe in doing so.
*/
/* // This code is unused.
UBool
DigitList::isLONG_MIN() const
{
// initializeLONG_MIN_REP();
if (fCount != LONG_MIN_REP_LENGTH)
return FALSE;
for (int32_t i = 0; i < LONG_MIN_REP_LENGTH; ++i)
{
if (fDigits[i] != LONG_MIN_REP[i+1])
return FALSE;
}
return TRUE;
}
*/
// Initialize the LONG_MIN representation buffer. Note that LONG_MIN
// is stored as LONG_MAX+1 (LONG_MIN without the negative sign).
/*void
DigitList::initializeLONG_MIN_REP()
{
if (LONG_MIN_REP_LENGTH == 0)
{
char buf[LONG_DIGITS];
sprintf(buf, "%d", INT32_MIN);
LONG_MIN_REP_LENGTH = strlen(buf) - 1;
// assert(LONG_MIN_REP_LENGTH == LONG_DIGITS);
for (int32_t i=1; i<=LONG_MIN_REP_LENGTH; ++i)
LONG_MIN_REP[i-1] = buf[i];
}
}*/
U_NAMESPACE_END
//eof