v8/src/unicode-decoder.h
Clemens Hammacher 5f6510825a [cleanup] Fix remaining (D)CHECK macro usages
This CL fixes all occurences that don't require special OWNER reviews,
or can be reviewed by Michi.

After this one, we should be able to reenable the readability/check
cpplint check.

R=mstarzinger@chromium.org

Bug: v8:6837, v8:6921
Cq-Include-Trybots: master.tryserver.chromium.linux:linux_chromium_rel_ng;master.tryserver.v8:v8_linux_noi18n_rel_ng
Change-Id: Ic81d68d5534eaa795b7197fed5c41ed158361d62
Reviewed-on: https://chromium-review.googlesource.com/721120
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48670}
2017-10-18 10:12:31 +00:00

122 lines
3.8 KiB
C++

// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_UNICODE_DECODER_H_
#define V8_UNICODE_DECODER_H_
#include <sys/types.h>
#include "src/globals.h"
#include "src/utils.h"
namespace unibrow {
class V8_EXPORT_PRIVATE Utf8DecoderBase {
public:
// Initialization done in subclass.
inline Utf8DecoderBase();
inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
const uint8_t* stream, size_t stream_length);
inline size_t Utf16Length() const { return utf16_length_; }
protected:
// This reads all characters and sets the utf16_length_.
// The first buffer_length utf16 chars are cached in the buffer.
void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream,
size_t stream_length);
static void WriteUtf16Slow(const uint8_t* stream, size_t stream_length,
uint16_t* data, size_t length);
const uint8_t* unbuffered_start_;
size_t unbuffered_length_;
size_t utf16_length_;
bool last_byte_of_buffer_unused_;
private:
DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
};
template <size_t kBufferSize>
class Utf8Decoder : public Utf8DecoderBase {
public:
inline Utf8Decoder() {}
inline Utf8Decoder(const char* stream, size_t length);
inline void Reset(const char* stream, size_t length);
inline size_t WriteUtf16(uint16_t* data, size_t length) const;
private:
uint16_t buffer_[kBufferSize];
};
Utf8DecoderBase::Utf8DecoderBase()
: unbuffered_start_(nullptr),
unbuffered_length_(0),
utf16_length_(0),
last_byte_of_buffer_unused_(false) {}
Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
const uint8_t* stream, size_t stream_length) {
Reset(buffer, buffer_length, stream, stream_length);
}
template <size_t kBufferSize>
Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, size_t length)
: Utf8DecoderBase(buffer_, kBufferSize,
reinterpret_cast<const uint8_t*>(stream), length) {}
template <size_t kBufferSize>
void Utf8Decoder<kBufferSize>::Reset(const char* stream, size_t length) {
Utf8DecoderBase::Reset(buffer_, kBufferSize,
reinterpret_cast<const uint8_t*>(stream), length);
}
template <size_t kBufferSize>
size_t Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
size_t length) const {
DCHECK_GT(length, 0);
if (length > utf16_length_) length = utf16_length_;
// memcpy everything in buffer.
size_t buffer_length =
last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
size_t memcpy_length = length <= buffer_length ? length : buffer_length;
v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
if (length <= buffer_length) return length;
DCHECK_NOT_NULL(unbuffered_start_);
// Copy the rest the slow way.
WriteUtf16Slow(unbuffered_start_, unbuffered_length_, data + buffer_length,
length - buffer_length);
return length;
}
class Latin1 {
public:
static const unsigned kMaxChar = 0xff;
// Returns 0 if character does not convert to single latin-1 character
// or if the character doesn't not convert back to latin-1 via inverse
// operation (upper to lower, etc).
static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
};
uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
DCHECK_GT(c, Latin1::kMaxChar);
switch (c) {
// This are equivalent characters in unicode.
case 0x39c:
case 0x3bc:
return 0xb5;
// This is an uppercase of a Latin-1 character
// outside of Latin-1.
case 0x178:
return 0xff;
}
return 0;
}
} // namespace unibrow
#endif // V8_UNICODE_DECODER_H_