50 lines
1.5 KiB
C++
50 lines
1.5 KiB
C++
/***
|
|
Copyright (C) 2022 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: BOM.cpp
|
|
Date: 2022-2-14
|
|
Author: Reece
|
|
***/
|
|
#include <Source/RuntimeInternal.hpp>
|
|
#include "Encoding.hpp"
|
|
#include "BOM.hpp"
|
|
|
|
namespace Aurora::Locale::Encoding
|
|
{
|
|
#define ADD_PATTERN(str, code) {str, {ECodePage::code, AuArraySize(str) - 1}}
|
|
static const AuList<AuTuple<const char *, BOM>> gKnownBoms =
|
|
{
|
|
ADD_PATTERN("\xFF\xFE\x00\x00", eUTF32),
|
|
ADD_PATTERN("\x00\x00\xFE\xFF", eUTF32BE),
|
|
ADD_PATTERN("\x84\x31\x95\x33", e18030),
|
|
ADD_PATTERN("\xDD\x73\x66\x73", eEnumInvalid), // UTF-EBCDIC
|
|
ADD_PATTERN("\xEF\xBB\xBF", eUTF8),
|
|
ADD_PATTERN("\xF7\x64\x4C", eEnumInvalid), // UTF-1
|
|
ADD_PATTERN("\xFB\xEE\x28", eUTF7), // UTF-7
|
|
ADD_PATTERN("\x2B\x2F\x76", eEnumInvalid), // BOCU-1
|
|
ADD_PATTERN("\x0E\xFE\xFF", eEnumInvalid), // SCSU
|
|
ADD_PATTERN("\xFF\xFE", eUTF16), // UTF-16
|
|
ADD_PATTERN("\xFE\xFF", eUTF16BE) // UTF-16
|
|
};
|
|
#undef ADD_PATTERN
|
|
|
|
AUKN_SYM BOM DecodeBOM(const AuMemoryViewRead &binary)
|
|
{
|
|
for (const auto &[string, bom] : gKnownBoms)
|
|
{
|
|
if (binary.length < bom.length)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (AuMemcmp(binary.ptr, string, bom.length) != 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
return bom;
|
|
}
|
|
|
|
return {};
|
|
}
|
|
} |