1 #pragma once // Source encoding: UTF-8 with BOM (π is a lowercase Greek "pi"). 17 CPPX_USE_STD( distance, invalid_argument, invoke, ref, runtime_error,
next );
21 int m_n_bad_groups = 0;
22 int m_n_noncanonical_groups = 0;
23 int m_incomplete_code_point = 0;
24 int m_n_completion_bytes = 0;
34 using runtime_error::runtime_error;
37 template<
class In_iterator >
41 auto& it = it_ref.get();
43 or
CPPX_FAIL_( invalid_argument,
"Called with an empty byte sequence `it == beyond`" );
45 const Byte byte_value = *it; ++it;
69 const int n_first_value_bits = (8 - (n_bytes + 1));
70 const uint32_t first_value_bits_mask = ((1 << n_first_value_bits) - 1);
72 uint32_t code_point = (byte_value & first_value_bits_mask);
73 for(
int i = 1; i < n_bytes; ++i )
77 m_incomplete_code_point = code_point;
78 m_n_completion_bytes = n_bytes - (i - 1);
82 const Byte continuation_byte = *it;
86 code_point = (code_point << 6) | (continuation_byte & 0x3F);
auto is_continuation_byte(const char ch) -> Truth
auto is_lead_byte(const char ch) -> Truth
auto hopefully(const Truth condition) -> Truth
auto n_bad_groups() const noexcept -> Size
auto n_noncanonical_groups() const noexcept -> Size
auto is_single_byte(const char ch) -> Truth
CPPX_USE_STD(distance, invalid_argument, invoke, ref, runtime_error, next)
auto group_size_for_lead_byte(const char lead_byte) -> int
auto next(P_< const char > p) -> P_< const char >
auto code_point_from_bytes(In_out_ref_< In_iterator > it_ref, const In_iterator beyond) -> uint32_t
unsigned char Byte
Default choice of byte type.
Signed_< size_t > Size
A Signed_ equivalent of size_t.
Macros for generating more concise and clear using statements, primarily $use_cppx and $use_std,...
auto is_valid_lead_byte(const char ch) -> Truth
Signed Size and Index, plus unsigned equivalents Unsigned_size and Unsigned_index.
#define CPPX_FAIL_(X,...)