cpp-terminal 1.0.0
Small C++ library for writing multiplatform terminal applications
Loading...
Searching...
No Matches
unicode.cpp
Go to the documentation of this file.
1/*
2* cpp-terminal
3* C++ library for writing multi-platform terminal applications.
4*
5* SPDX-FileCopyrightText: 2019-2025 cpp-terminal
6*
7* SPDX-License-Identifier: MIT
8*/
9
11
13
14#if defined(_WIN32)
15 #include <limits>
16 #pragma warning(push)
17 #pragma warning(disable : 4668)
18 #define WIN32_LEAN_AND_MEAN
19 #include <windows.h>
20 #pragma warning(pop)
21#endif
22
23#include <array>
24
25#if defined(_WIN32)
26std::string Term::Private::to_narrow(const std::wstring& in)
27{
28 if(in.empty()) return std::string();
29 static constexpr DWORD flag{WC_ERR_INVALID_CHARS};
30 std::size_t in_size{in.size()};
31 if(in_size > static_cast<size_t>((std::numeric_limits<int>::max)())) throw Term::Exception("String size is to big " + std::to_string(in_size) + "/" + std::to_string((std::numeric_limits<int>::max)()));
32 const int ret_size{::WideCharToMultiByte(CP_UTF8, flag, in.data(), static_cast<int>(in_size), nullptr, 0, nullptr, nullptr)};
33 if(ret_size == 0) throw Term::Private::WindowsException(::GetLastError());
34 std::string ret(static_cast<std::size_t>(ret_size), '\0');
35 int ret_error{::WideCharToMultiByte(CP_UTF8, flag, in.data(), static_cast<int>(in_size), &ret[0], ret_size, nullptr, nullptr)};
36 if(ret_error == 0) throw Term::Private::WindowsException(::GetLastError());
37 return ret;
38}
39
40std::wstring Term::Private::to_wide(const std::string& in)
41{
42 if(in.empty()) return std::wstring();
43 static constexpr DWORD flag{MB_ERR_INVALID_CHARS};
44 std::size_t in_size{in.size()};
45 if(in_size > static_cast<size_t>((std::numeric_limits<int>::max)())) throw Term::Exception("String size is to big " + std::to_string(in_size) + "/" + std::to_string((std::numeric_limits<int>::max)()));
46 const int ret_size{::MultiByteToWideChar(CP_UTF8, flag, in.data(), static_cast<int>(in_size), nullptr, 0)};
47 if(ret_size == 0) throw Term::Private::WindowsException(::GetLastError());
48 std::wstring ret(static_cast<std::size_t>(ret_size), '\0');
49 int ret_error{::MultiByteToWideChar(CP_UTF8, flag, in.data(), static_cast<int>(in_size), &ret[0], ret_size)};
50 if(ret_error == 0) throw Term::Private::WindowsException(::GetLastError());
51 return ret;
52}
53#endif
54
55std::string Term::Private::utf32_to_utf8(const char32_t& codepoint, const bool& exception)
56{
57 static const constexpr std::array<std::uint32_t, 4> size{0x7F, 0x07FF, 0xFFFF, 0x10FFFF};
58 static const constexpr std::uint8_t mask{0x80};
59 static const constexpr std::uint8_t add{0x3F};
60 static const constexpr std::array<std::uint8_t, 3> mask_first{0x1F, 0x0F, 0x07};
61 static const constexpr std::array<std::uint8_t, 3> add_first{0xC0, 0xE0, 0xF0};
62 static const constexpr std::array<std::uint8_t, 4> shift{0, 6, 12, 18};
63 static const constexpr std::uint8_t max_size{4};
64 std::string ret;
65 ret.reserve(max_size);
66 if(codepoint <= size[0]) { ret = {static_cast<char>(codepoint)}; } // Plain ASCII
67 else if(codepoint <= size[1]) { ret = {static_cast<char>(((codepoint >> shift[1]) & mask_first[0]) | add_first[0]), static_cast<char>(((codepoint >> shift[0]) & add) | mask)}; }
68 else if(codepoint <= size[2]) { ret = {static_cast<char>(((codepoint >> shift[2]) & mask_first[1]) | add_first[1]), static_cast<char>(((codepoint >> shift[1]) & add) | mask), static_cast<char>(((codepoint >> shift[0]) & add) | mask)}; }
69 else if(codepoint <= size[3]) { ret = {static_cast<char>(((codepoint >> shift[3]) & mask_first[2]) | add_first[2]), static_cast<char>(((codepoint >> shift[2]) & add) | mask), static_cast<char>(((codepoint >> shift[1]) & add) | mask), static_cast<char>(((codepoint >> shift[0]) & add) | mask)}; }
70 else if(exception) { throw Term::Exception("Invalid UTF32 codepoint."); }
71 else { ret = "\xEF\xBF\xBD"; }
72 return ret;
73}
74
75std::string Term::Private::utf32_to_utf8(const std::u32string& str, const bool& exception)
76{
77 std::string ret;
78 for(const char32_t codepoint: str) { ret.append(utf32_to_utf8(codepoint, exception)); }
79 return ret;
80}
std::wstring to_wide(const std::string &str)
Definition unicode.cpp:40
std::string utf32_to_utf8(const char32_t &codepoint, const bool &exception=false)
Encode a codepoint using UTF-8 std::string .
Definition unicode.cpp:55
std::string to_narrow(const std::wstring &wstr)
Definition unicode.cpp:26
InputFileHandler & in
Definition file.cpp:43