2005-10-07 21:58:27 +00:00
|
|
|
/*
|
2006-02-06 18:03:11 +00:00
|
|
|
********************************************************************************
|
2016-02-23 10:40:09 +00:00
|
|
|
* Copyright (C) 2005-2016, International Business Machines
|
2006-02-06 18:03:11 +00:00
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
********************************************************************************
|
|
|
|
*/
|
2005-10-07 21:58:27 +00:00
|
|
|
|
|
|
|
#include "unicode/utypes.h"
|
2006-02-06 18:03:11 +00:00
|
|
|
|
2006-05-09 18:06:10 +00:00
|
|
|
#if !UCONFIG_NO_CONVERSION
|
2005-10-07 21:58:27 +00:00
|
|
|
#include "unicode/ucsdet.h"
|
2006-02-06 18:03:11 +00:00
|
|
|
#include "csdetect.h"
|
|
|
|
#include "csmatch.h"
|
2013-09-17 06:57:53 +00:00
|
|
|
#include "csrsbcs.h"
|
|
|
|
#include "csrmbcs.h"
|
|
|
|
#include "csrutf8.h"
|
|
|
|
#include "csrucode.h"
|
|
|
|
#include "csr2022.h"
|
2006-02-06 18:03:11 +00:00
|
|
|
|
|
|
|
#include "cmemory.h"
|
|
|
|
|
2006-09-04 16:36:21 +00:00
|
|
|
U_NAMESPACE_USE
|
|
|
|
|
2006-02-06 18:03:11 +00:00
|
|
|
#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
|
|
|
|
#define DELETE_ARRAY(array) uprv_free((void *) (array))
|
|
|
|
|
2006-02-06 20:45:30 +00:00
|
|
|
U_CDECL_BEGIN
|
2006-02-06 18:03:11 +00:00
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI UCharsetDetector * U_EXPORT2
|
2006-02-06 18:03:11 +00:00
|
|
|
ucsdet_open(UErrorCode *status)
|
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) {
|
|
|
|
return 0;
|
|
|
|
}
|
2006-02-07 21:59:16 +00:00
|
|
|
|
2006-08-21 23:35:23 +00:00
|
|
|
CharsetDetector* csd = new CharsetDetector(*status);
|
|
|
|
|
|
|
|
if (U_FAILURE(*status)) {
|
|
|
|
delete csd;
|
|
|
|
csd = NULL;
|
|
|
|
}
|
2006-02-07 21:59:16 +00:00
|
|
|
|
2006-02-06 20:45:30 +00:00
|
|
|
return (UCharsetDetector *) csd;
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI void U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_close(UCharsetDetector *ucsd)
|
2006-02-06 18:03:11 +00:00
|
|
|
{
|
2006-02-06 20:45:30 +00:00
|
|
|
CharsetDetector *csd = (CharsetDetector *) ucsd;
|
2006-02-06 18:03:11 +00:00
|
|
|
delete csd;
|
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI void U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status)
|
2006-02-06 18:03:11 +00:00
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2007-06-11 18:42:29 +00:00
|
|
|
((CharsetDetector *) ucsd)->setText(textIn, len);
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI const char * U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status)
|
2006-02-06 18:03:11 +00:00
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) {
|
2006-02-07 21:59:16 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2007-06-11 18:42:29 +00:00
|
|
|
return ((CharsetMatch *) ucsm)->getName();
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status)
|
2006-02-06 18:03:11 +00:00
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) {
|
2006-02-07 21:59:16 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-06-11 18:42:29 +00:00
|
|
|
return ((CharsetMatch *) ucsm)->getConfidence();
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI const char * U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status)
|
2006-02-06 18:03:11 +00:00
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) {
|
2006-02-07 21:59:16 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2007-06-11 18:52:53 +00:00
|
|
|
return ((CharsetMatch *) ucsm)->getLanguage();
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI const UCharsetMatch * U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status)
|
2006-02-06 18:03:11 +00:00
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) {
|
2006-02-07 21:59:16 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2007-06-11 18:52:53 +00:00
|
|
|
return (const UCharsetMatch *) ((CharsetDetector *) ucsd)->detect(*status);
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI void U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status)
|
2006-02-06 18:03:11 +00:00
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2007-06-11 18:42:29 +00:00
|
|
|
((CharsetDetector *) ucsd)->setDeclaredEncoding(encoding,length);
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI const UCharsetMatch**
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_detectAll(UCharsetDetector *ucsd,
|
2006-02-06 18:03:11 +00:00
|
|
|
int32_t *maxMatchesFound, UErrorCode *status)
|
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) {
|
2006-02-07 21:59:16 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2006-02-06 20:45:30 +00:00
|
|
|
CharsetDetector *csd = (CharsetDetector *) ucsd;
|
|
|
|
|
|
|
|
return (const UCharsetMatch**)csd->detectAll(*maxMatchesFound,*status);
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
// U_CAPI const char * U_EXPORT2
|
2006-02-06 18:03:11 +00:00
|
|
|
// ucsdet_getDetectableCharsetName(const UCharsetDetector *csd, int32_t index, UErrorCode *status)
|
|
|
|
// {
|
|
|
|
// if(U_FAILURE(*status)) {
|
2006-08-19 21:27:08 +00:00
|
|
|
// return 0;
|
2006-02-06 18:03:11 +00:00
|
|
|
// }
|
|
|
|
// return csd->getCharsetName(index,*status);
|
|
|
|
// }
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
// U_CAPI int32_t U_EXPORT2
|
2006-02-06 18:03:11 +00:00
|
|
|
// ucsdet_getDetectableCharsetsCount(const UCharsetDetector *csd, UErrorCode *status)
|
|
|
|
// {
|
|
|
|
// if(U_FAILURE(*status)) {
|
2006-08-19 21:27:08 +00:00
|
|
|
// return -1;
|
2006-02-06 18:03:11 +00:00
|
|
|
// }
|
|
|
|
// return UCharsetDetector::getDetectableCount();
|
|
|
|
// }
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI UBool U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd)
|
2006-02-06 18:03:11 +00:00
|
|
|
{
|
2006-02-07 21:59:16 +00:00
|
|
|
// todo: could use an error return...
|
|
|
|
if (ucsd == NULL) {
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2007-06-11 18:42:29 +00:00
|
|
|
return ((CharsetDetector *) ucsd)->getStripTagsFlag();
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI UBool U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter)
|
2006-02-06 18:03:11 +00:00
|
|
|
{
|
2006-02-07 21:59:16 +00:00
|
|
|
// todo: could use an error return...
|
|
|
|
if (ucsd == NULL) {
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2006-02-06 20:45:30 +00:00
|
|
|
CharsetDetector *csd = (CharsetDetector *) ucsd;
|
2006-02-06 18:03:11 +00:00
|
|
|
UBool prev = csd->getStripTagsFlag();
|
2006-02-07 21:59:16 +00:00
|
|
|
|
2006-02-06 18:03:11 +00:00
|
|
|
csd->setStripTagsFlag(filter);
|
|
|
|
|
|
|
|
return prev;
|
|
|
|
}
|
|
|
|
|
2006-03-24 23:30:09 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
2006-02-06 20:45:30 +00:00
|
|
|
ucsdet_getUChars(const UCharsetMatch *ucsm,
|
2006-02-06 18:03:11 +00:00
|
|
|
UChar *buf, int32_t cap, UErrorCode *status)
|
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) {
|
2006-02-07 21:59:16 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-06-11 18:42:29 +00:00
|
|
|
return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
|
2006-02-06 18:03:11 +00:00
|
|
|
}
|
2013-09-17 06:57:53 +00:00
|
|
|
|
|
|
|
U_CAPI void U_EXPORT2
|
|
|
|
ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
|
|
|
|
{
|
|
|
|
((CharsetDetector *)ucsd)->setDetectableCharset(encoding, enabled, *status);
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI UEnumeration * U_EXPORT2
|
|
|
|
ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
|
|
|
|
{
|
|
|
|
return CharsetDetector::getAllDetectableCharsets(*status);
|
|
|
|
}
|
|
|
|
|
|
|
|
U_DRAFT UEnumeration * U_EXPORT2
|
|
|
|
ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status)
|
|
|
|
{
|
|
|
|
return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status);
|
|
|
|
}
|
|
|
|
|
2006-02-06 20:45:30 +00:00
|
|
|
U_CDECL_END
|
2005-10-07 21:58:27 +00:00
|
|
|
|
2013-09-17 06:57:53 +00:00
|
|
|
|
2006-05-09 18:06:10 +00:00
|
|
|
#endif
|