AuROXTL/Include/auROXTL/auStringUtils.hpp

/***
    Copyright (C) 2022 J Reece Wilson (a/k/a "Reece"). All rights reserved.

    File: auStringUtils.hpp
    Date: 2022-2-1
    File: AuroraUtils.hpp
    File: auROXTLUtils.hpp
    Date: 2021-6-9
    Author: Reece
    Purpose: Introduces UTF-8 utilities and the historically missing c++ string methods.
             AuCodepoints**** supports the initial UTF-8 specifications with codepoints up to 2^31 combinations (illegal UTF-16)

    Implements: AuStringContains, AuEndsWith, AuStartsWith, AuReplaceAll, AuSplitString (views), AuSplitStringLegacy (returns an array of strings instead of views)
                AuToLower(char), AuToUpper(char), AuToLower(view), AuToUpper(view).
    Implements: AuCodepointsTransform, AuCodepointsTransformASCIIOp, AuCodepointsForEach, AuCodepointsToLower, AuCodepointsToUpper,
                AuCodepointsCount, AuCodepointsNextLength, AuCodepointsDecodeOne, AuCodepointsDecode, AuCodepointsEncodeInto,
                AuCodepointsGetByteOffset(CodepointOffset_t),AuCodepointsGetByteLength(CodepointOffset_t),
                AuCodepointsFindByteOffset[Unsafe], AuCodepointsFindCodepointOffset(view, CodepointOffset_t), AuCodepointsFindCodepointOffset(CodepointByteOffset_t),
                AuCodepointsContains,
                AuCodepointsReplaceAll, AuCodepointsSplitString (views),
                AuCodepointsFindPreviousValidByteOffsetFromOffset, AuCodepointsFindPreviousValidByteOffsetFromByteOffset
                AuCodepointsIsEqualIgnoreCase, AuCodepointsStartsWithEqualIgnoreCase, AuCodepointsEndsWithEqualIgnoreCase,
                AuCodepointsReverseIterate, AuCodepointsReverseIterateSubStrPrefixView, AuCodepointsReverseIterateSubStrSuffixView

    Warning:   By codepoints, we mean UTF32.
               Aurora uses UTF8 strings everywhere by convention.

    Warning:   For translating between locales (including utf8-32), defer to AuLocale (Aurora::Locale) in the Aurora Runtime.
               Although, you can decode and encode UTF8 to 32 here (AuCodepointsDecodeOne, AuCodepointsDecode, AuCodepointsEncodeInto).
***/
#pragma once

// offset in bytes
/* using CodepointByteOffset_t = decltype(AuROString::npos); */

// offset in codepoints
/* using CodepointOffset_t = AuUInt; */

#if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8)
    static const AuUInt8 kAuCodepointUTF8MaxBytes = 4;
#elif defined(AURORA_UTF8_I_AM_REALLY_SPECIAL_AND_WANT_7_BYTE_UTF8)
    static const AuUInt8 kAuCodepointUTF8MaxBytes = 7;
#else
// default:
    static const AuUInt8 kAuCodepointUTF8MaxBytes = 6;
#endif

// none of these are defined by default
#if defined(AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW)
#if !defined(AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL)
#define AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL
#endif
#endif

// AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8_WANT_THROW | throws on 5-8 byte sequences, otherwise allows 5-6 byte sequences
//                                                                                    (We have use cases in ecosystem for non-utf16 strings, such as side-channel low-overhead text formatting)
//                                                                                    (Enable this if you're boring)
// AURORA_UTF8_THROW_WHEN_STUPIDILY_ILLEGAL           | throws on 7-8 byte sequences, otherwise tries to process 7 byte sequences, breaks early, returns 0, and/or returns empty (check impl)
// AURORA_UTF8_I_SUCK_AND_WANT_MODERN_UTF8            | disallows 5-8 byte sequence decodes. ...IsEqualIgnoreCase gets dumbs down to a memcmp assuming the entire block of memory is to be compared;
//                                                                                           ...ForEach will break early with false (usually implies a user break early condition);
//                                                                                           ...Translate will return an empty container


#include <auROXTL/Strings/auStringUtils.hpp>
#include <auROXTL/Strings/auCodepointsUTF8.hpp>
#include <auROXTL/Strings/auCodepointsUTF8.ipp>

#if !defined(AURORA_RUNTIME_TO_STRING)
    #define AURORA_RUNTIME_TO_STRING std::to_string
#endif

template <class T>
static auline AuString AuToString(const T &obj)
{
#if defined(_AUHAS_FMT)
    // locale independent and better optimized!
    return AuString(fmt::format("{}", obj));
#else
    // TODO: to_chars (locale independent)
    return AURORA_RUNTIME_TO_STRING(obj);
#endif
}