cpp-terminal 1.0.0
Small C++ library for writing multiplatform terminal applications
Loading...
Searching...
No Matches
conversion.cpp
Go to the documentation of this file.
1/*
2* cpp-terminal
3* C++ library for writing multi-platform terminal applications.
4*
5* SPDX-FileCopyrightText: 2019-2024 cpp-terminal
6*
7* SPDX-License-Identifier: MIT
8*/
9
11
13
14#include <array>
15#include <string>
16
17namespace Term
18{
19namespace Private
20{
21
22static constexpr std::uint8_t UTF8_ACCEPT{0};
23static constexpr std::uint8_t UTF8_REJECT{0xf};
24
25std::uint8_t utf8_decode_step(std::uint8_t state, std::uint8_t octet, std::uint32_t* cpp)
26{
27 static const constexpr std::array<std::uint32_t, 0x10> utf8ClassTab{0x88888888UL, 0x88888888UL, 0x99999999UL, 0x99999999UL, 0xaaaaaaaaUL, 0xaaaaaaaaUL, 0xaaaaaaaaUL, 0xaaaaaaaaUL, 0x222222ffUL, 0x22222222UL, 0x22222222UL, 0x22222222UL, 0x3333333bUL, 0x33433333UL, 0xfff5666cUL, 0xffffffffUL};
28
29 static const constexpr std::array<std::uint32_t, 0x10> utf8StateTab{0xfffffff0UL, 0xffffffffUL, 0xfffffff1UL, 0xfffffff3UL, 0xfffffff4UL, 0xfffffff7UL, 0xfffffff6UL, 0xffffffffUL, 0x33f11f0fUL, 0xf3311f0fUL, 0xf33f110fUL, 0xfffffff2UL, 0xfffffff5UL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL};
30
31 const std::uint8_t reject{static_cast<std::uint8_t>(state >> 3UL)};
32 const std::uint8_t nonAscii{static_cast<std::uint8_t>(octet >> 7UL)};
33 const std::uint8_t class_{static_cast<std::uint8_t>(!nonAscii ? 0 : (0xf & (utf8ClassTab[(octet >> 3) & 0xf] >> (4 * (octet & 7)))))};
34
35 *cpp = (state == UTF8_ACCEPT ? (octet & (0xffU >> class_)) : ((octet & 0x3fU) | (*cpp << 6)));
36
37 return (reject ? 0xf : (0xf & (utf8StateTab[class_] >> (4 * (state & 7)))));
38}
39
40std::u32string utf8_to_utf32(const std::string& str)
41{
42 std::uint32_t codepoint{0};
43 std::uint8_t state{UTF8_ACCEPT};
44 std::u32string ret;
45 for(char idx: str)
46 {
47 state = utf8_decode_step(state, static_cast<std::uint8_t>(idx), &codepoint);
48 if(state == UTF8_ACCEPT) { ret.push_back(codepoint); }
49 else if(state == UTF8_REJECT) { throw Term::Exception("Invalid byte in UTF8 encoded string"); }
50 }
51 if(state != UTF8_ACCEPT) { throw Term::Exception("Expected more bytes in UTF8 encoded string"); }
52 return ret;
53}
54
55bool is_valid_utf8_code_unit(const std::string& str)
56{
57 static const constexpr std::uint8_t b1OOOOOOO{128};
58 static const constexpr std::uint8_t b11OOOOOO{192};
59 static const constexpr std::uint8_t b111OOOOO{224};
60 static const constexpr std::uint8_t b1111OOOO{240};
61 static const constexpr std::uint8_t b11111OOO{248};
62 switch(str.size())
63 {
64 case 1: return (static_cast<std::uint8_t>(str[0]) & b1OOOOOOO) == 0;
65 case 2: return ((static_cast<std::uint8_t>(str[0]) & b111OOOOO) == b11OOOOOO) && ((static_cast<std::uint8_t>(str[1]) & b11OOOOOO) == b1OOOOOOO);
66 case 3: return ((static_cast<std::uint8_t>(str[0]) & b1111OOOO) == b111OOOOO) && ((static_cast<std::uint8_t>(str[1]) & b11OOOOOO) == b1OOOOOOO) && ((static_cast<std::uint8_t>(str[2]) & b11OOOOOO) == b1OOOOOOO);
67 case 4: return ((static_cast<std::uint8_t>(str[0]) & b11111OOO) == b1111OOOO) && ((static_cast<std::uint8_t>(str[1]) & b11OOOOOO) == b1OOOOOOO) && ((static_cast<std::uint8_t>(str[2]) & b11OOOOOO) == b1OOOOOOO) && ((static_cast<std::uint8_t>(str[3]) & b11OOOOOO) == b1OOOOOOO);
68 default: return false;
69 }
70}
71
72} // namespace Private
73
74} // namespace Term
std::u32string utf8_to_utf32(const std::string &str)
std::uint8_t utf8_decode_step(std::uint8_t state, std::uint8_t octet, std::uint32_t *cpp)
bool is_valid_utf8_code_unit(const std::string &str)
Definition args.cpp:13