/*** Copyright (C) 2022 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: BOM.cpp Date: 2022-2-14 Author: Reece ***/ #include #include "Encoding.hpp" #include "BOM.hpp" namespace Aurora::Locale::Encoding { #define ADD_PATTERN(str, code) {str, {ECodePage::code, AuArraySize(str) - 1}} static const AuList> gKnownBoms = { ADD_PATTERN("\xFF\xFE\x00\x00", eUTF32), ADD_PATTERN("\x00\x00\xFE\xFF", eUTF32BE), ADD_PATTERN("\x84\x31\x95\x33", e18030), ADD_PATTERN("\xDD\x73\x66\x73", eEnumInvalid), // UTF-EBCDIC ADD_PATTERN("\xEF\xBB\xBF", eUTF8), ADD_PATTERN("\xF7\x64\x4C", eEnumInvalid), // UTF-1 ADD_PATTERN("\xFB\xEE\x28", eUTF7), // UTF-7 ADD_PATTERN("\x2B\x2F\x76", eEnumInvalid), // BOCU-1 ADD_PATTERN("\x0E\xFE\xFF", eEnumInvalid), // SCSU ADD_PATTERN("\xFF\xFE", eUTF16), // UTF-16 ADD_PATTERN("\xFE\xFF", eUTF16BE) // UTF-16 }; #undef ADD_PATTERN AUKN_SYM BOM DecodeBOM(const AuMemoryViewRead &binary) { for (const auto &[string, bom] : gKnownBoms) { if (binary.length < bom.length) { continue; } if (AuMemcmp(binary.ptr, string, bom.length) != 0) { continue; } return bom; } return {}; } }