winpty/agent/UnicodeEncoding.h

#ifndef UNICODE_ENCODING_H
#define UNICODE_ENCODING_H

// Encode the Unicode codepoint with UTF-8.  The buffer must be at least 4
// bytes in size.
static inline int encodeUtf8(char *out, unsigned int code) {
    if (code < 0x80) {
        out[0] = code;
        return 1;
    } else if (code < 0x800) {
        out[0] = ((code >> 6) & 0x1F) | 0xC0;
        out[1] = ((code >> 0) & 0x3F) | 0x80;
        return 2;
    } else if (code < 0x10000) {
        out[0] = ((code >> 12) & 0x0F) | 0xE0;
        out[1] = ((code >>  6) & 0x3F) | 0x80;
        out[2] = ((code >>  0) & 0x3F) | 0x80;
        return 3;
    } else if (code < 0x110000) {
        out[0] = ((code >> 18) & 0x07) | 0xF0;
        out[1] = ((code >> 12) & 0x3F) | 0x80;
        out[2] = ((code >>  6) & 0x3F) | 0x80;
        out[3] = ((code >>  0) & 0x3F) | 0x80;
        return 4;
    } else {
        // Encoding error
        return 0;
    }
}

// Encode the Unicode codepoint with UTF-8.  The buffer must be at least 2
// elements in size.
static inline int encodeUtf16(wchar_t *out, unsigned int code) {
    if (code < 0x10000) {
        out[0] = code;
        return 1;
    } else if (code < 0x110000) {
        code -= 0x10000;
        out[0] = 0xD800 | (code >> 10);
        out[1] = 0xDC00 | (code & 0x3FF);
        return 2;
    } else {
        // Encoding error
        return 0;
    }
}

static inline unsigned int decodeSurrogatePair(wchar_t ch1, wchar_t ch2) {
    return ((ch1 - 0xD800) << 10) + (ch2 - 0xDC00) + 0x10000;
}

#endif // UNICODE_ENCODING_H