ICU-5297 Charset detector confidence, use natural log, removes gratuitous difference between C++ and Java versions.

X-SVN-Rev: 29932
This commit is contained in:
Andy Heninger 2011-04-29 22:44:44 +00:00
parent aa67d1675a
commit e19f8e0591

View File

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2005-2008, International Business Machines
* Copyright (C) 2005-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
@ -220,9 +220,9 @@ int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars
// Frequency of occurence statistics exist.
//
double maxVal = log10((double)doubleByteCharCount / 4); /*(float)?*/
double maxVal = log((double)doubleByteCharCount / 4); /*(float)?*/
double scaleFactor = 90.0 / maxVal;
confidence = (int32_t)(log10((double)commonCharCount+1) * scaleFactor + 10.0);
confidence = (int32_t)(log((double)commonCharCount+1) * scaleFactor + 10.0);
confidence = min(confidence, 100);
}