1// <text_encoding> -*- C++ -*-
3// Copyright The GNU Toolchain Authors.
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
25/** @file include/text_encoding
26 * This is a Standard C++ Library header.
29#ifndef _GLIBCXX_TEXT_ENCODING
30#define _GLIBCXX_TEXT_ENCODING
33#pragma GCC system_header
36#include <bits/requires_hosted.h>
38#define __glibcxx_want_text_encoding
39#include <bits/version.h>
41#ifdef __cpp_lib_text_encoding
44#include <bits/functional_hash.h> // hash
45#include <bits/ranges_util.h> // view_interface
46#include <bits/unicode.h> // __charset_alias_match
47#include <ext/numeric_traits.h> // __int_traits
49namespace std _GLIBCXX_VISIBILITY(default)
51_GLIBCXX_BEGIN_NAMESPACE_VERSION
54 * @brief An interface for accessing the IANA Character Sets registry.
63 using id = __INT_LEAST32_TYPE__;
68 operator<(const _Rep& __r, id __m) noexcept
69 { return __r._M_id < __m; }
72 operator==(const _Rep& __r, string_view __name) noexcept
73 { return __r._M_name == __name; }
77 static constexpr size_t max_name_length = 63;
79 enum class id : _Rep::id
95 HalfWidthKatakana = 15,
98 EUCPkdFmtJapanese = 18,
99 EUCFixWidJapanese = 19,
100 ISO4UnitedKingdom = 20,
101 ISO11SwedishForNames = 21,
105 ISO60DanishNorwegian = 25,
108 ISO646basic1983 = 28,
110 ISO2IntlRefVersion = 30,
119 ISO13JISC6220jp = 41,
120 ISO14JISC6220ro = 42,
121 ISO16Portuguese = 43,
123 ISO19LatinGreek = 45,
125 ISO27LatinGreek1 = 47,
126 ISO5427Cyrillic = 48,
127 ISO42JISC62261978 = 49,
128 ISO47BSViewdata = 50,
131 ISO51INISCyrillic = 53,
136 ISO61Norwegian2 = 58,
137 ISO70VideotexSupp1 = 59,
138 ISO84Portuguese2 = 60,
145 ISO91JISC62291984a = 67,
146 ISO92JISC62991984b = 68,
147 ISO93JIS62291984badd = 69,
148 ISO94JIS62291984hand = 70,
149 ISO95JIS62291984handadd = 71,
150 ISO96JISC62291984kana = 72,
155 ISO111ECMACyrillic = 77,
156 ISO121Canadian1 = 78,
157 ISO122Canadian2 = 79,
158 ISO123CSAZ24341985gr = 80,
164 ISO139CSN369103 = 86,
165 ISO141JUSIB1002 = 87,
168 ISO147Macedonian = 90,
172 ISO153GOST1976874 = 94,
176 ISO159JISX02121990 = 98,
191 OSDEBCDICDF0415 = 115,
192 OSDEBCDICDF03IRV = 116,
193 OSDEBCDICDF041 = 117,
199 UnicodeLatin1 = 1003,
200 UnicodeJapanese = 1004,
201 UnicodeIBM1261 = 1005,
202 UnicodeIBM1268 = 1006,
203 UnicodeIBM1276 = 1007,
204 UnicodeIBM1264 = 1008,
205 UnicodeIBM1265 = 1009,
218 Windows30Latin1 = 2000,
219 Windows31Latin1 = 2001,
220 Windows31Latin2 = 2002,
221 Windows31Latin5 = 2003,
223 AdobeStandardEncoding = 2005,
225 VenturaInternational = 2007,
227 PC850Multilingual = 2009,
228 PC8DanishNorwegian = 2012,
229 PC862LatinHebrew = 2013,
239 MicrosoftPublishing = 2023,
260 PC8CodePage437 = 2011,
282 IBMEBCDICATDE = 2064,
342 constexpr text_encoding() = default;
345 text_encoding(string_view __enc) noexcept
346 : _M_rep(_S_find_name(__enc))
348 __enc.copy(_M_name, max_name_length);
351 // @pre i has the value of one of the enumerators of id.
353 text_encoding(id __i) noexcept
354 : _M_rep(_S_find_id(__i))
356 if (string_view __name(_M_rep->_M_name); !__name.empty())
357 __name.copy(_M_name, max_name_length);
360 constexpr id mib() const noexcept { return id(_M_rep->_M_id); }
362 constexpr const char* name() const noexcept { return _M_name; }
364 struct aliases_view : ranges::view_interface<aliases_view>
368 struct _Sentinel { };
371 constexpr _Iterator begin() const noexcept;
372 constexpr _Sentinel end() const noexcept { return {}; }
375 friend struct text_encoding;
377 constexpr explicit aliases_view(const _Rep* __r) : _M_begin(__r) { }
379 const _Rep* _M_begin = nullptr;
382 constexpr aliases_view
383 aliases() const noexcept
385 return _M_rep->_M_name[0] ? aliases_view(_M_rep) : aliases_view{nullptr};
388 friend constexpr bool
389 operator==(const text_encoding& __a,
390 const text_encoding& __b) noexcept
392 if (__a.mib() == id::other && __b.mib() == id::other) [[unlikely]]
393 return _S_comp(__a._M_name, __b._M_name);
395 return __a.mib() == __b.mib();
398 friend constexpr bool
399 operator==(const text_encoding& __encoding, id __i) noexcept
400 { return __encoding.mib() == __i; }
403 static consteval text_encoding
406#ifdef __GNUC_EXECUTION_CHARSET_NAME
407 return text_encoding(__GNUC_EXECUTION_CHARSET_NAME);
408#elif defined __clang_literal_encoding__
409 return text_encoding(__clang_literal_encoding__);
411 return text_encoding();
421 { return text_encoding(_Id)._M_is_environment(); }
423 static text_encoding literal() = delete;
424 static text_encoding environment() = delete;
425 template<id> static bool environment_is() = delete;
429 const _Rep* _M_rep = _S_reps + 1; // id::unknown
430 char _M_name[max_name_length + 1] = {0};
433 _M_is_environment() const;
435 static inline constexpr _Rep _S_reps[] = {
436 { 1, "" }, { 2, "" },
437#define _GLIBCXX_GET_ENCODING_DATA
438#include <bits/text_encoding-data.h>
439#ifdef _GLIBCXX_GET_ENCODING_DATA
440# error "Invalid text_encoding data"
442 { 9999, nullptr }, // sentinel
445 static constexpr bool
446 _S_comp(string_view __a, string_view __b)
447 { return __unicode::__charset_alias_match(__a, __b); }
449 static constexpr const _Rep*
450 _S_find_name(string_view __name) noexcept
452#ifdef _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET
453 // Optimize the common UTF-8 case to avoid a linear search through all
454 // strings in the table using the _S_comp function.
455 if (__name == "UTF-8")
456 return _S_reps + 2 + _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET;
459 // The first two array elements (other and unknown) don't have names.
460 // The last element is a sentinel that can never match anything.
461 const auto __first = _S_reps + 2, __end = std::end(_S_reps) - 1;
462 for (auto __r = __first; __r != __end; ++__r)
463 if (_S_comp(__r->_M_name, __name))
465 // Might have matched an alias. Find the first entry for this ID.
466 const auto __id = __r->_M_id;
467 while (__r[-1]._M_id == __id)
471 return _S_reps; // id::other
474 static constexpr const _Rep*
475 _S_find_id(id __id) noexcept
477 const auto __i = (_Rep::id)__id;
478 const auto __r = std::lower_bound(_S_reps, std::end(_S_reps) - 1, __i);
479 if (__r->_M_id == __i) [[likely]]
483 // Preconditions: i has the value of one of the enumerators of id.
484 __glibcxx_assert(__r->_M_id == __i);
485 return _S_reps + 1; // id::unknown
491 struct hash<text_encoding>
494 operator()(const text_encoding& __enc) const noexcept
495 { return std::hash<text_encoding::id>()(__enc.mib()); }
498 class text_encoding::aliases_view::_Iterator
501 using value_type = const char*;
502 using reference = const char*;
503 using difference_type = int;
505 constexpr _Iterator() = default;
510 if (_M_dereferenceable()) [[likely]]
511 return _M_rep->_M_name;
514 __glibcxx_assert(_M_dereferenceable());
522 if (_M_dereferenceable()) [[likely]]
526 __glibcxx_assert(_M_dereferenceable());
535 const bool __decrementable
536 = _M_rep != nullptr && _M_rep[-1]._M_id == _M_id;
537 if (__decrementable) [[likely]]
541 __glibcxx_assert(__decrementable);
564 operator[](difference_type __n) const
565 { return *(*this + __n); }
568 operator+=(difference_type __n)
570 if (_M_rep != nullptr)
574 if (__n < (std::end(_S_reps) - _M_rep)
575 && _M_rep[__n - 1]._M_id == _M_id) [[likely]]
582 if (__n > (_S_reps - _M_rep)
583 && _M_rep[__n]._M_id == _M_id) [[likely]]
590 __glibcxx_assert(_M_rep != nullptr);
595 operator-=(difference_type __n)
597 using _Traits = __gnu_cxx::__int_traits<difference_type>;
598 if (__n == _Traits::__min) [[unlikely]]
599 return operator+=(_Traits::__max);
600 return operator+=(-__n);
603 constexpr difference_type
604 operator-(const _Iterator& __i) const
606 if (_M_id == __i._M_id)
607 return _M_rep - __i._M_rep;
608 __glibcxx_assert(_M_id == __i._M_id);
609 return __gnu_cxx::__int_traits<difference_type>::__max;
613 operator==(const _Iterator&) const = default;
616 operator==(_Sentinel) const noexcept
617 { return !_M_dereferenceable(); }
619 constexpr strong_ordering
620 operator<=>(const _Iterator& __i) const
622 __glibcxx_assert(_M_id == __i._M_id);
623 return _M_rep <=> __i._M_rep;
626 friend constexpr _Iterator
627 operator+(_Iterator __i, difference_type __n)
633 friend constexpr _Iterator
634 operator+(difference_type __n, _Iterator __i)
640 friend constexpr _Iterator
641 operator-(_Iterator __i, difference_type __n)
648 friend struct text_encoding;
651 _Iterator(const _Rep* __r) noexcept
652 : _M_rep(__r), _M_id(__r ? __r->_M_id : 0)
656 _M_dereferenceable() const noexcept
657 { return _M_rep != nullptr && _M_rep->_M_id == _M_id; }
659 const _Rep* _M_rep = nullptr;
664 text_encoding::aliases_view::begin() const noexcept
666 { return _Iterator(_M_begin); }
670 // Opt-in to borrowed_range concept
672 inline constexpr bool
673 enable_borrowed_range<std::text_encoding::aliases_view> = true;
676_GLIBCXX_END_NAMESPACE_VERSION
679#endif // __cpp_lib_text_encoding
680#endif // _GLIBCXX_TEXT_ENCODING