2016-06-15 18:58:17 +00:00
|
|
|
// Copyright (C) 2016 and later: Unicode, Inc. and others.
|
|
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
2006-02-06 18:03:11 +00:00
|
|
|
/*
|
|
|
|
**********************************************************************
|
2016-05-31 21:45:07 +00:00
|
|
|
* Copyright (C) 2005-2008, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
2006-02-06 18:03:11 +00:00
|
|
|
**********************************************************************
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __INPUTEXT_H
|
|
|
|
#define __INPUTEXT_H
|
|
|
|
|
2006-08-21 23:35:23 +00:00
|
|
|
/**
|
|
|
|
* \file
|
|
|
|
* \internal
|
|
|
|
*
|
|
|
|
* This is an internal header for the Character Set Detection code. The
|
|
|
|
* name is probably too generic...
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2006-02-06 18:03:11 +00:00
|
|
|
#include "unicode/uobject.h"
|
|
|
|
|
2006-05-09 18:06:10 +00:00
|
|
|
#if !UCONFIG_NO_CONVERSION
|
|
|
|
|
2006-02-06 18:03:11 +00:00
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
class InputText : public UMemory
|
|
|
|
{
|
2008-02-08 09:10:22 +00:00
|
|
|
// Prevent copying
|
|
|
|
InputText(const InputText &);
|
2006-02-06 18:03:11 +00:00
|
|
|
public:
|
2008-02-08 09:10:22 +00:00
|
|
|
InputText(UErrorCode &status);
|
2006-02-06 18:03:11 +00:00
|
|
|
~InputText();
|
|
|
|
|
|
|
|
void setText(const char *in, int32_t len);
|
|
|
|
void setDeclaredEncoding(const char *encoding, int32_t len);
|
|
|
|
UBool isSet() const;
|
|
|
|
void MungeInput(UBool fStripTags);
|
|
|
|
|
|
|
|
// The text to be checked. Markup will have been
|
|
|
|
// removed if appropriate.
|
|
|
|
uint8_t *fInputBytes;
|
2006-08-21 23:35:23 +00:00
|
|
|
int32_t fInputLen; // Length of the byte data in fInputBytes.
|
2006-02-06 18:03:11 +00:00
|
|
|
// byte frequency statistics for the input text.
|
|
|
|
// Value is percent, not absolute.
|
|
|
|
// Value is rounded up, so zero really means zero occurences.
|
|
|
|
int16_t *fByteStats;
|
|
|
|
UBool fC1Bytes; // True if any bytes in the range 0x80 - 0x9F are in the input;false by default
|
|
|
|
char *fDeclaredEncoding;
|
|
|
|
|
|
|
|
const uint8_t *fRawInput; // Original, untouched input bytes.
|
|
|
|
// If user gave us a byte array, this is it.
|
|
|
|
// If user gave us a stream, it's read to a
|
|
|
|
// buffer here.
|
|
|
|
int32_t fRawLength; // Length of data in fRawInput array.
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
U_NAMESPACE_END
|
|
|
|
|
2006-05-09 18:06:10 +00:00
|
|
|
#endif
|
2006-02-06 18:03:11 +00:00
|
|
|
#endif /* __INPUTEXT_H */
|