scuffed-code/icu4c/source/i18n/inputext.h

// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 **********************************************************************
 *   Copyright (C) 2005-2008, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */

#ifndef __INPUTEXT_H
#define __INPUTEXT_H

/**
 * \file
 * \internal
 *
 * This is an internal header for the Character Set Detection code. The
 * name is probably too generic...
 */


#include "unicode/uobject.h"

#if !UCONFIG_NO_CONVERSION

U_NAMESPACE_BEGIN 

class InputText : public UMemory
{
    // Prevent copying
    InputText(const InputText &);
public:
    InputText(UErrorCode &status);
    ~InputText();

    void setText(const char *in, int32_t len);
    void setDeclaredEncoding(const char *encoding, int32_t len);
    UBool isSet() const; 
    void MungeInput(UBool fStripTags);

    // The text to be checked.  Markup will have been
    //   removed if appropriate.
    uint8_t    *fInputBytes;
    int32_t     fInputLen;          // Length of the byte data in fInputBytes.
    // byte frequency statistics for the input text.
    //   Value is percent, not absolute.
    //   Value is rounded up, so zero really means zero occurences. 
    int16_t  *fByteStats;
    UBool     fC1Bytes;          // True if any bytes in the range 0x80 - 0x9F are in the input;false by default
    char     *fDeclaredEncoding;

    const uint8_t           *fRawInput;     // Original, untouched input bytes.
    //  If user gave us a byte array, this is it.
    //  If user gave us a stream, it's read to a 
    //   buffer here.
    int32_t                  fRawLength;    // Length of data in fRawInput array.

};

U_NAMESPACE_END

#endif
#endif /* __INPUTEXT_H */
ICU-12564 Update copyright notice in trunk X-SVN-Rev: 38848 2016-06-15 18:58:17 +00:00			`// Copyright (C) 2016 and later: Unicode, Inc. and others.`
			`// License & terms of use: http://www.unicode.org/copyright.html`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`/*`
			`**********************************************************************`
ICU-12564 Reverted r38761 and r38762, because we want to prepend the Unicode copyright for existing source files, instead of replacing copyright comments. X-SVN-Rev: 38776 2016-05-31 21:45:07 +00:00			`* Copyright (C) 2005-2008, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`**********************************************************************`
			`*/`

			`#ifndef __INPUTEXT_H`
			`#define __INPUTEXT_H`

ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`/**`
			`* \file`
			`* \internal`
			`*`
			`* This is an internal header for the Character Set Detection code. The`
			`* name is probably too generic...`
			`*/`


ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`#include "unicode/uobject.h"`

ICU-5198 Disable charset detection when UCONFIG_NO_CONVERSION is 1. X-SVN-Rev: 19622 2006-05-09 18:06:10 +00:00			`#if !UCONFIG_NO_CONVERSION`

ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`U_NAMESPACE_BEGIN`

			`class InputText : public UMemory`
			`{`
ICU-6132 Recover from allocation errors more gracefully. X-SVN-Rev: 23399 2008-02-08 09:10:22 +00:00			`// Prevent copying`
			`InputText(const InputText &);`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`public:`
ICU-6132 Recover from allocation errors more gracefully. X-SVN-Rev: 23399 2008-02-08 09:10:22 +00:00			`InputText(UErrorCode &status);`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`~InputText();`

			`void setText(const char *in, int32_t len);`
			`void setDeclaredEncoding(const char *encoding, int32_t len);`
			`UBool isSet() const;`
			`void MungeInput(UBool fStripTags);`

			`// The text to be checked. Markup will have been`
			`// removed if appropriate.`
			`uint8_t *fInputBytes;`
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`int32_t fInputLen; // Length of the byte data in fInputBytes.`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`// byte frequency statistics for the input text.`
			`// Value is percent, not absolute.`
			`// Value is rounded up, so zero really means zero occurences.`
			`int16_t *fByteStats;`
			`UBool fC1Bytes; // True if any bytes in the range 0x80 - 0x9F are in the input;false by default`
			`char *fDeclaredEncoding;`

			`const uint8_t *fRawInput; // Original, untouched input bytes.`
			`// If user gave us a byte array, this is it.`
			`// If user gave us a stream, it's read to a`
			`// buffer here.`
			`int32_t fRawLength; // Length of data in fRawInput array.`

			`};`

			`U_NAMESPACE_END`

ICU-5198 Disable charset detection when UCONFIG_NO_CONVERSION is 1. X-SVN-Rev: 19622 2006-05-09 18:06:10 +00:00			`#endif`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`#endif /* __INPUTEXT_H */`