ICU-5297 Charset detector confidence, use natural log, removes gratuitous difference between C++ and Java versions.
X-SVN-Rev: 29932
This commit is contained in:
parent
aa67d1675a
commit
e19f8e0591
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2005-2008, International Business Machines
|
||||
* Copyright (C) 2005-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
@ -220,9 +220,9 @@ int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars
|
||||
// Frequency of occurence statistics exist.
|
||||
//
|
||||
|
||||
double maxVal = log10((double)doubleByteCharCount / 4); /*(float)?*/
|
||||
double maxVal = log((double)doubleByteCharCount / 4); /*(float)?*/
|
||||
double scaleFactor = 90.0 / maxVal;
|
||||
confidence = (int32_t)(log10((double)commonCharCount+1) * scaleFactor + 10.0);
|
||||
confidence = (int32_t)(log((double)commonCharCount+1) * scaleFactor + 10.0);
|
||||
|
||||
confidence = min(confidence, 100);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user