libstdc++
locale_conv.h
Go to the documentation of this file.
1// wstring_convert implementation -*- C++ -*-
2
3// Copyright (C) 2015-2025 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file bits/locale_conv.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{locale}
28 */
29
30#ifndef _LOCALE_CONV_H
31#define _LOCALE_CONV_H 1
32
33#if __cplusplus < 201103L
34# include <bits/c++0x_warning.h>
35#else
36
37#include <streambuf>
38#include <bits/stringfwd.h>
39#include <bits/allocator.h>
40#include <bits/codecvt.h>
41
42namespace std _GLIBCXX_VISIBILITY(default)
43{
44_GLIBCXX_BEGIN_NAMESPACE_VERSION
45
46 /**
47 * @addtogroup locales
48 * @{
49 */
50
51 template<typename _OutStr, typename _InChar, typename _Codecvt,
52 typename _State, typename _Fn>
53 bool
54 __do_str_codecvt(const _InChar* __first, const _InChar* __last,
55 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
56 size_t& __count, _Fn __fn)
57 {
58 if (__first == __last)
59 {
60 __outstr.clear();
61 __count = 0;
62 return true;
63 }
64
65 size_t __outchars = 0;
66 auto __next = __first;
67 const auto __maxlen = __cvt.max_length() + 1;
68
69 codecvt_base::result __result;
70 do
71 {
72 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
73 auto __outnext = &__outstr.front() + __outchars;
74 auto const __outlast = &__outstr.back() + 1;
75 __result = (__cvt.*__fn)(__state, __next, __last, __next,
76 __outnext, __outlast, __outnext);
77 __outchars = __outnext - &__outstr.front();
78 }
79 while (__result == codecvt_base::partial && __next != __last
80 && ptrdiff_t(__outstr.size() - __outchars) < __maxlen);
81
82 if (__result == codecvt_base::error)
83 {
84 __count = __next - __first;
85 return false;
86 }
87
88#pragma GCC diagnostic push
89#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
90 // The codecvt facet will only return noconv when the types are
91 // the same, so avoid instantiating basic_string::assign otherwise
92 if constexpr (is_same<typename _Codecvt::intern_type,
93 typename _Codecvt::extern_type>::value)
94 if (__result == codecvt_base::noconv)
95 {
96 __outstr.assign(__first, __last);
97 __count = __last - __first;
98 return true;
99 }
100#pragma GCC diagnostic pop
101
102 __outstr.resize(__outchars);
103 __count = __next - __first;
104 return true;
105 }
106
107 // Convert narrow character string to wide.
108 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
109 inline bool
110 __str_codecvt_in(const char* __first, const char* __last,
113 _State& __state, size_t& __count)
114 {
115 using _Codecvt = codecvt<_CharT, char, _State>;
116 using _ConvFn
117 = codecvt_base::result
118 (_Codecvt::*)(_State&, const char*, const char*, const char*&,
119 _CharT*, _CharT*, _CharT*&) const;
120 _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
121 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
122 __count, __fn);
123 }
124
125 // As above, but with no __count parameter
126 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
127 inline bool
128 __str_codecvt_in(const char* __first, const char* __last,
131 {
132 _State __state = {};
133 size_t __n;
134 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
135 }
136
137 // As above, but returns false for partial conversion
138 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
139 inline bool
140 __str_codecvt_in_all(const char* __first, const char* __last,
143 {
144 _State __state = {};
145 size_t __n;
146 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
147 && (__n == size_t(__last - __first));
148 }
149
150 // Convert wide character string to narrow.
151 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
152 inline bool
153 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
156 _State& __state, size_t& __count)
157 {
158 using _Codecvt = codecvt<_CharT, char, _State>;
159 using _ConvFn
160 = codecvt_base::result
161 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
162 char*, char*, char*&) const;
164 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
165 __count, __fn);
166 }
167
168 // As above, but with no __count parameter
169 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
170 inline bool
171 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
174 {
175 _State __state = {};
176 size_t __n;
177 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
178 }
179
180 // As above, but returns false for partial conversions
181 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
182 inline bool
183 __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
186 {
187 _State __state = {};
188 size_t __n;
189 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
190 && (__n == size_t(__last - __first));
191 }
192
193#ifdef _GLIBCXX_USE_CHAR8_T
194
195 // Convert wide character string to narrow.
196 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
197 inline bool
198 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
201 _State& __state, size_t& __count)
202 {
203 using _Codecvt = codecvt<_CharT, char8_t, _State>;
204 using _ConvFn
205 = codecvt_base::result
206 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
207 char8_t*, char8_t*, char8_t*&) const;
209 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
210 __count, __fn);
211 }
212
213 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
214 inline bool
215 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
218 {
219 _State __state = {};
220 size_t __n;
221 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
222 }
223
224#endif // _GLIBCXX_USE_CHAR8_T
225
226 namespace __detail
227 {
228 template<typename _Tp>
229 struct _Scoped_ptr
230 {
231 __attribute__((__nonnull__(2)))
232 explicit
233 _Scoped_ptr(_Tp* __ptr) noexcept
234 : _M_ptr(__ptr)
235 { }
236
237 _Scoped_ptr(_Tp* __ptr, const char* __msg)
238 : _M_ptr(__ptr)
239 {
240 if (!__ptr)
241 __throw_logic_error(__msg);
242 }
243
244 ~_Scoped_ptr() { delete _M_ptr; }
245
246 _Scoped_ptr(const _Scoped_ptr&) = delete;
247 _Scoped_ptr& operator=(const _Scoped_ptr&) = delete;
248
249 __attribute__((__returns_nonnull__))
250 _Tp* operator->() const noexcept { return _M_ptr; }
251
252 _Tp& operator*() const noexcept { return *_M_ptr; }
253
254 private:
255 _Tp* _M_ptr;
256 };
257 }
258
259_GLIBCXX_BEGIN_NAMESPACE_CXX11
260
261 /// String conversions
262 template<typename _Codecvt, typename _Elem = wchar_t,
263 typename _Wide_alloc = allocator<_Elem>,
264 typename _Byte_alloc = allocator<char>>
265 class _GLIBCXX17_DEPRECATED wstring_convert
266 {
267 public:
268 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string;
269 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
270 typedef typename _Codecvt::state_type state_type;
271 typedef typename wide_string::traits_type::int_type int_type;
272
273 /// Default constructor.
274 wstring_convert() : _M_cvt(new _Codecvt()) { }
275
276 /** Constructor.
277 *
278 * @param __pcvt The facet to use for conversions.
279 *
280 * Takes ownership of @p __pcvt and will delete it in the destructor.
281 */
282 explicit
283 wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt, "wstring_convert")
284 { }
285
286 /** Construct with an initial converstion state.
287 *
288 * @param __pcvt The facet to use for conversions.
289 * @param __state Initial conversion state.
290 *
291 * Takes ownership of @p __pcvt and will delete it in the destructor.
292 * The object's conversion state will persist between conversions.
293 */
294 wstring_convert(_Codecvt* __pcvt, state_type __state)
295 : _M_cvt(__pcvt, "std::wstring_convert"),
296 _M_state(__state), _M_with_cvtstate(true)
297 { }
298
299 /** Construct with error strings.
300 *
301 * @param __byte_err A string to return on failed conversions.
302 * @param __wide_err A wide string to return on failed conversions.
303 */
304 explicit
305 wstring_convert(const byte_string& __byte_err,
306 const wide_string& __wide_err = wide_string())
307 : _M_cvt(new _Codecvt),
308 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
309 _M_with_strings(true)
310 { }
311
312 ~wstring_convert() = default;
313
314 // _GLIBCXX_RESOLVE_LIB_DEFECTS
315 // 2176. Special members for wstring_convert and wbuffer_convert
316 wstring_convert(const wstring_convert&) = delete;
317 wstring_convert& operator=(const wstring_convert&) = delete;
318
319 /// @{ Convert from bytes.
320 wide_string
321 from_bytes(char __byte)
322 {
323 char __bytes[2] = { __byte };
324 return from_bytes(__bytes, __bytes+1);
325 }
326
327 wide_string
328 from_bytes(const char* __ptr)
329 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
330
331 wide_string
332 from_bytes(const byte_string& __str)
333 {
334 auto __ptr = __str.data();
335 return from_bytes(__ptr, __ptr + __str.size());
336 }
337
338 wide_string
339 from_bytes(const char* __first, const char* __last)
340 {
341 if (!_M_with_cvtstate)
342 _M_state = state_type();
343 wide_string __out{ _M_wide_err_string.get_allocator() };
344 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
345 _M_count))
346 return __out;
347 if (_M_with_strings)
348 return _M_wide_err_string;
349 __throw_range_error("wstring_convert::from_bytes");
350 }
351 /// @}
352
353 /// @{ Convert to bytes.
354 byte_string
355 to_bytes(_Elem __wchar)
356 {
357 _Elem __wchars[2] = { __wchar };
358 return to_bytes(__wchars, __wchars+1);
359 }
360
361 byte_string
362 to_bytes(const _Elem* __ptr)
363 {
364 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
365 }
366
367 byte_string
368 to_bytes(const wide_string& __wstr)
369 {
370 auto __ptr = __wstr.data();
371 return to_bytes(__ptr, __ptr + __wstr.size());
372 }
373
374 byte_string
375 to_bytes(const _Elem* __first, const _Elem* __last)
376 {
377 if (!_M_with_cvtstate)
378 _M_state = state_type();
379 byte_string __out{ _M_byte_err_string.get_allocator() };
380 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
381 _M_count))
382 return __out;
383 if (_M_with_strings)
384 return _M_byte_err_string;
385 __throw_range_error("wstring_convert::to_bytes");
386 }
387 /// @}
388
389 // _GLIBCXX_RESOLVE_LIB_DEFECTS
390 // 2174. wstring_convert::converted() should be noexcept
391 /// The number of elements successfully converted in the last conversion.
392 size_t converted() const noexcept { return _M_count; }
393
394 /// The final conversion state of the last conversion.
395 state_type state() const { return _M_state; }
396
397 private:
398 __detail::_Scoped_ptr<_Codecvt> _M_cvt;
399 byte_string _M_byte_err_string;
400 wide_string _M_wide_err_string;
401 state_type _M_state = state_type();
402 size_t _M_count = 0;
403 bool _M_with_cvtstate = false;
404 bool _M_with_strings = false;
405 };
406
407_GLIBCXX_END_NAMESPACE_CXX11
408
409 /// Buffer conversions
410 template<typename _Codecvt, typename _Elem = wchar_t,
411 typename _Tr = char_traits<_Elem>>
412 class _GLIBCXX17_DEPRECATED wbuffer_convert
413 : public basic_streambuf<_Elem, _Tr>
414 {
415 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
416
417 public:
418 typedef typename _Codecvt::state_type state_type;
419
420 /// Default constructor.
422
423 /** Constructor.
424 *
425 * @param __bytebuf The underlying byte stream buffer.
426 * @param __pcvt The facet to use for conversions.
427 * @param __state Initial conversion state.
428 *
429 * Takes ownership of @p __pcvt and will delete it in the destructor.
430 */
431 explicit
432 wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
433 state_type __state = state_type())
434 : _M_buf(__bytebuf), _M_cvt(__pcvt, "std::wbuffer_convert"),
435 _M_state(__state), _M_always_noconv(_M_cvt->always_noconv())
436 {
437 if (_M_buf)
438 {
439 this->setp(_M_put_area, _M_put_area + _S_buffer_length);
440 this->setg(_M_get_area + _S_putback_length,
441 _M_get_area + _S_putback_length,
442 _M_get_area + _S_putback_length);
443 }
444 }
445
446 ~wbuffer_convert() = default;
447
448 // _GLIBCXX_RESOLVE_LIB_DEFECTS
449 // 2176. Special members for wstring_convert and wbuffer_convert
450 wbuffer_convert(const wbuffer_convert&) = delete;
451 wbuffer_convert& operator=(const wbuffer_convert&) = delete;
452
453 streambuf* rdbuf() const noexcept { return _M_buf; }
454
455 streambuf*
456 rdbuf(streambuf *__bytebuf) noexcept
457 {
458 auto __prev = _M_buf;
459 _M_buf = __bytebuf;
460 return __prev;
461 }
462
463 /// The conversion state following the last conversion.
464 state_type state() const noexcept { return _M_state; }
465
466 protected:
467 int
469 { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
470
471 typename _Wide_streambuf::int_type
473 {
474 if (!_M_buf || !_M_conv_put())
475 return _Tr::eof();
476 else if (!_Tr::eq_int_type(__out, _Tr::eof()))
477 return this->sputc(__out);
478 return _Tr::not_eof(__out);
479 }
480
481 typename _Wide_streambuf::int_type
483 {
484 if (!_M_buf)
485 return _Tr::eof();
486
487 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
488 return _Tr::to_int_type(*this->gptr());
489 else
490 return _Tr::eof();
491 }
492
494 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
495 {
496 if (!_M_buf || __n == 0)
497 return 0;
498 streamsize __done = 0;
499 do
500 {
501 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
502 __n - __done);
503 _Tr::copy(this->pptr(), __s + __done, __nn);
504 this->pbump(__nn);
505 __done += __nn;
506 } while (__done < __n && _M_conv_put());
507 return __done;
508 }
509
510 private:
511 // fill the get area from converted contents of the byte stream buffer
512 bool
513 _M_conv_get()
514 {
515 const streamsize __pb1 = this->gptr() - this->eback();
516 const streamsize __pb2 = _S_putback_length;
517 const streamsize __npb = std::min(__pb1, __pb2);
518
519 _Tr::move(_M_get_area + _S_putback_length - __npb,
520 this->gptr() - __npb, __npb);
521
522 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
523 __nbytes = std::min(__nbytes, _M_buf->in_avail());
524 if (__nbytes < 1)
525 __nbytes = 1;
526 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
527 if (__nbytes < 1)
528 return false;
529 __nbytes += _M_unconv;
530
531 // convert _M_get_buf into _M_get_area
532
533 _Elem* __outbuf = _M_get_area + _S_putback_length;
534 _Elem* __outnext = __outbuf;
535 const char* __bnext = _M_get_buf;
536
537 codecvt_base::result __result;
538 if (_M_always_noconv)
539 __result = codecvt_base::noconv;
540 else
541 {
542 _Elem* __outend = _M_get_area + _S_buffer_length;
543
544 __result = _M_cvt->in(_M_state,
545 __bnext, __bnext + __nbytes, __bnext,
546 __outbuf, __outend, __outnext);
547 }
548
549 if (__result == codecvt_base::noconv)
550 {
551 // cast is safe because noconv means _Elem is same type as char
552 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
553 _Tr::copy(__outbuf, __get_buf, __nbytes);
554 _M_unconv = 0;
555 return true;
556 }
557
558 if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
559 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
560
561 this->setg(__outbuf, __outbuf, __outnext);
562
563 return __result != codecvt_base::error;
564 }
565
566 // unused
567 bool
568 _M_put(...)
569 { return false; }
570
571 bool
572 _M_put(const char* __p, streamsize __n)
573 {
574 if (_M_buf->sputn(__p, __n) < __n)
575 return false;
576 return true;
577 }
578
579 // convert the put area and write to the byte stream buffer
580 bool
581 _M_conv_put()
582 {
583 _Elem* const __first = this->pbase();
584 const _Elem* const __last = this->pptr();
585 const streamsize __pending = __last - __first;
586
587 if (_M_always_noconv)
588 return _M_put(__first, __pending);
589
590 char __outbuf[2 * _S_buffer_length];
591
592 const _Elem* __next = __first;
593 const _Elem* __start;
594 do
595 {
596 __start = __next;
597 char* __outnext = __outbuf;
598 char* const __outlast = __outbuf + sizeof(__outbuf);
599 auto __result = _M_cvt->out(_M_state, __next, __last, __next,
600 __outnext, __outlast, __outnext);
601 if (__result == codecvt_base::error)
602 return false;
603 else if (__result == codecvt_base::noconv)
604 return _M_put(__next, __pending);
605
606 if (!_M_put(__outbuf, __outnext - __outbuf))
607 return false;
608 }
609 while (__next != __last && __next != __start);
610
611 if (__next != __last)
612 _Tr::move(__first, __next, __last - __next);
613
614 this->pbump(__first - __next);
615 return __next != __first;
616 }
617
618 streambuf* _M_buf;
619 __detail::_Scoped_ptr<_Codecvt> _M_cvt;
620 state_type _M_state;
621
622 static const streamsize _S_buffer_length = 32;
623 static const streamsize _S_putback_length = 3;
624 _Elem _M_put_area[_S_buffer_length];
625 _Elem _M_get_area[_S_buffer_length];
626 streamsize _M_unconv = 0;
627 char _M_get_buf[_S_buffer_length-_S_putback_length];
628 bool _M_always_noconv;
629 };
630
631 /// @} group locales
632
633_GLIBCXX_END_NAMESPACE_VERSION
634} // namespace
635
636#endif // __cplusplus
637
638#endif /* _LOCALE_CONV_H */
basic_streambuf< char > streambuf
Base class for char buffers.
Definition iosfwd:139
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
ISO C++ entities toplevel namespace is std.
ptrdiff_t streamsize
Integral type for I/O operation counts and buffer sizes.
Definition postypes.h:73
Implementation details not part of the namespace std interface.
traits_type::int_type int_type
Definition streambuf:137
void setg(char_type *__gbeg, char_type *__gnext, char_type *__gend)
Setting the three read area pointers.
Definition streambuf:518
char_type * egptr() const
Access to the get area.
Definition streambuf:497
char_type * gptr() const
Access to the get area.
Definition streambuf:494
void setp(char_type *__pbeg, char_type *__pend)
Setting the three write area pointers.
Definition streambuf:564
int_type sputc(char_type __c)
Entry point for all single-character output functions.
Definition streambuf:433
basic_streambuf()
Base constructor.
Definition streambuf:472
The standard allocator, as per C++03 [20.4.1].
Definition allocator.h:134
Managing sequences of characters and character-like objects.
constexpr size_type size() const noexcept
Returns the number of characters in the string, not including any null-termination.
constexpr const _CharT * data() const noexcept
Return const pointer to contents.
result in(state_type &__state, const extern_type *__from, const extern_type *__from_end, const extern_type *&__from_next, intern_type *__to, intern_type *__to_end, intern_type *&__to_next) const
Convert from external to internal character set.
Definition codecvt.h:204
result out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
Definition codecvt.h:124
Primary class template codecvt.
Definition codecvt.h:284
String conversions.
size_t converted() const noexcept
The number of elements successfully converted in the last conversion.
wstring_convert(_Codecvt *__pcvt)
wstring_convert()
Default constructor.
wide_string from_bytes(char __byte)
Convert from bytes.
byte_string to_bytes(_Elem __wchar)
Convert to bytes.
byte_string to_bytes(const _Elem *__ptr)
Convert to bytes.
byte_string to_bytes(const wide_string &__wstr)
Convert to bytes.
byte_string to_bytes(const _Elem *__first, const _Elem *__last)
Convert to bytes.
wstring_convert(const byte_string &__byte_err, const wide_string &__wide_err=wide_string())
wide_string from_bytes(const char *__first, const char *__last)
Convert from bytes.
state_type state() const
The final conversion state of the last conversion.
wide_string from_bytes(const char *__ptr)
Convert from bytes.
wstring_convert(_Codecvt *__pcvt, state_type __state)
wide_string from_bytes(const byte_string &__str)
Convert from bytes.
Buffer conversions.
_Wide_streambuf::int_type underflow()
Fetches more data from the controlled sequence.
state_type state() const noexcept
The conversion state following the last conversion.
wbuffer_convert(streambuf *__bytebuf, _Codecvt *__pcvt=new _Codecvt, state_type __state=state_type())
int sync()
Synchronizes the buffer arrays with the controlled sequences.
wbuffer_convert()
Default constructor.
_Wide_streambuf::int_type overflow(typename _Wide_streambuf::int_type __out)
Consumes data from the buffer; writes to the controlled sequence.