libstdc++
locale_conv.h
Go to the documentation of this file.
1// wstring_convert implementation -*- C++ -*-
2
3// Copyright (C) 2015-2026 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file bits/locale_conv.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{locale}
28 */
29
30#ifndef _LOCALE_CONV_H
31#define _LOCALE_CONV_H 1
32
33#if __cplusplus < 201103L
34# include <bits/c++0x_warning.h>
35#else
36
37#include <streambuf>
38#include <bits/stringfwd.h>
39#include <bits/allocator.h>
40#include <bits/codecvt.h>
41
42namespace std _GLIBCXX_VISIBILITY(default)
43{
44_GLIBCXX_BEGIN_NAMESPACE_VERSION
45
46 /**
47 * @addtogroup locales
48 * @{
49 */
50
51 /// @cond undocumented
52
53 template<typename _OutStr, typename _InChar, typename _Codecvt,
54 typename _State, typename _Fn>
55 bool
56 __do_str_codecvt(const _InChar* __first, const _InChar* __last,
57 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
58 size_t& __count, _Fn __fn)
59 {
60 if (__first == __last)
61 {
62 __outstr.clear();
63 __count = 0;
64 return true;
65 }
66
67 size_t __outchars = 0;
68 auto __next = __first;
69 const auto __maxlen = __cvt.max_length() + 1;
70
71 codecvt_base::result __result;
72 do
73 {
74 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
75 auto __outnext = &__outstr.front() + __outchars;
76 auto const __outlast = &__outstr.back() + 1;
77 __result = (__cvt.*__fn)(__state, __next, __last, __next,
78 __outnext, __outlast, __outnext);
79 __outchars = __outnext - &__outstr.front();
80 }
81 while (__result == codecvt_base::partial && __next != __last
82 && ptrdiff_t(__outstr.size() - __outchars) < __maxlen);
83
84 if (__result == codecvt_base::error)
85 {
86 __count = __next - __first;
87 return false;
88 }
89
90#pragma GCC diagnostic push
91#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
92 // The codecvt facet will only return noconv when the types are
93 // the same, so avoid instantiating basic_string::assign otherwise
94 if constexpr (is_same<typename _Codecvt::intern_type,
95 typename _Codecvt::extern_type>::value)
96 if (__result == codecvt_base::noconv)
97 {
98 __outstr.assign(__first, __last);
99 __count = __last - __first;
100 return true;
101 }
102#pragma GCC diagnostic pop
103
104 __outstr.resize(__outchars);
105 __count = __next - __first;
106 return true;
107 }
108
109 // Convert narrow character string to wide.
110 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
111 inline bool
112 __str_codecvt_in(const char* __first, const char* __last,
115 _State& __state, size_t& __count)
116 {
117 using _Codecvt = codecvt<_CharT, char, _State>;
118 using _ConvFn
119 = codecvt_base::result
120 (_Codecvt::*)(_State&, const char*, const char*, const char*&,
121 _CharT*, _CharT*, _CharT*&) const;
122 _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
123 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
124 __count, __fn);
125 }
126
127 // As above, but with no __count parameter
128 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
129 inline bool
130 __str_codecvt_in(const char* __first, const char* __last,
133 {
134 _State __state = {};
135 size_t __n;
136 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
137 }
138
139 // As above, but returns false for partial conversion
140 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
141 inline bool
142 __str_codecvt_in_all(const char* __first, const char* __last,
145 {
146 _State __state = {};
147 size_t __n;
148 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
149 && (__n == size_t(__last - __first));
150 }
151
152 // Convert wide character string to narrow.
153 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
154 inline bool
155 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
158 _State& __state, size_t& __count)
159 {
160 using _Codecvt = codecvt<_CharT, char, _State>;
161 using _ConvFn
162 = codecvt_base::result
163 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
164 char*, char*, char*&) const;
166 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
167 __count, __fn);
168 }
169
170 // As above, but with no __count parameter
171 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
172 inline bool
173 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
176 {
177 _State __state = {};
178 size_t __n;
179 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
180 }
181
182 // As above, but returns false for partial conversions
183 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
184 inline bool
185 __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
188 {
189 _State __state = {};
190 size_t __n;
191 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
192 && (__n == size_t(__last - __first));
193 }
194
195#ifdef _GLIBCXX_USE_CHAR8_T
196
197 // Convert wide character string to narrow.
198 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
199 inline bool
200 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
203 _State& __state, size_t& __count)
204 {
205 using _Codecvt = codecvt<_CharT, char8_t, _State>;
206 using _ConvFn
207 = codecvt_base::result
208 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
209 char8_t*, char8_t*, char8_t*&) const;
211 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
212 __count, __fn);
213 }
214
215 template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
216 inline bool
217 __str_codecvt_out(const _CharT* __first, const _CharT* __last,
220 {
221 _State __state = {};
222 size_t __n;
223 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
224 }
225
226#endif // _GLIBCXX_USE_CHAR8_T
227
228 namespace __detail
229 {
230 template<typename _Tp>
231 struct _Scoped_ptr
232 {
233 __attribute__((__nonnull__(2)))
234 explicit
235 _Scoped_ptr(_Tp* __ptr) noexcept
236 : _M_ptr(__ptr)
237 { }
238
239 _Scoped_ptr(_Tp* __ptr, const char* __msg)
240 : _M_ptr(__ptr)
241 {
242 if (!__ptr)
243 __throw_logic_error(__msg);
244 }
245
246 ~_Scoped_ptr() { delete _M_ptr; }
247
248 _Scoped_ptr(const _Scoped_ptr&) = delete;
249 _Scoped_ptr& operator=(const _Scoped_ptr&) = delete;
250
251 __attribute__((__returns_nonnull__))
252 _Tp* operator->() const noexcept { return _M_ptr; }
253
254 _Tp& operator*() const noexcept { return *_M_ptr; }
255
256 private:
257 _Tp* _M_ptr;
258 };
259 }
260 /// @endcond
261
262_GLIBCXX_BEGIN_NAMESPACE_CXX11
263
264 /// String conversions
265 template<typename _Codecvt, typename _Elem = wchar_t,
266 typename _Wide_alloc = allocator<_Elem>,
267 typename _Byte_alloc = allocator<char>>
268 class _GLIBCXX17_DEPRECATED wstring_convert
269 {
270 public:
271 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string;
272 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
273 typedef typename _Codecvt::state_type state_type;
274 typedef typename wide_string::traits_type::int_type int_type;
275
276 /// Default constructor.
277 wstring_convert() : _M_cvt(new _Codecvt()) { }
278
279 /** Constructor.
280 *
281 * @param __pcvt The facet to use for conversions.
282 *
283 * Takes ownership of @p __pcvt and will delete it in the destructor.
284 */
285 explicit
286 wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt, "wstring_convert")
287 { }
288
289 /** Construct with an initial converstion state.
290 *
291 * @param __pcvt The facet to use for conversions.
292 * @param __state Initial conversion state.
293 *
294 * Takes ownership of @p __pcvt and will delete it in the destructor.
295 * The object's conversion state will persist between conversions.
296 */
297 wstring_convert(_Codecvt* __pcvt, state_type __state)
298 : _M_cvt(__pcvt, "std::wstring_convert"),
299 _M_state(__state), _M_with_cvtstate(true)
300 { }
301
302 /** Construct with error strings.
303 *
304 * @param __byte_err A string to return on failed conversions.
305 * @param __wide_err A wide string to return on failed conversions.
306 */
307 explicit
308 wstring_convert(const byte_string& __byte_err,
309 const wide_string& __wide_err = wide_string())
310 : _M_cvt(new _Codecvt),
311 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
312 _M_with_strings(true)
313 { }
314
315 ~wstring_convert() = default;
316
317 // _GLIBCXX_RESOLVE_LIB_DEFECTS
318 // 2176. Special members for wstring_convert and wbuffer_convert
319 wstring_convert(const wstring_convert&) = delete;
320 wstring_convert& operator=(const wstring_convert&) = delete;
321
322 /// @{ Convert from bytes.
323 wide_string
324 from_bytes(char __byte)
325 {
326 char __bytes[2] = { __byte };
327 return from_bytes(__bytes, __bytes+1);
328 }
329
330 wide_string
331 from_bytes(const char* __ptr)
332 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
333
334 wide_string
335 from_bytes(const byte_string& __str)
336 {
337 auto __ptr = __str.data();
338 return from_bytes(__ptr, __ptr + __str.size());
339 }
340
341 wide_string
342 from_bytes(const char* __first, const char* __last)
343 {
344 if (!_M_with_cvtstate)
345 _M_state = state_type();
346 wide_string __out{ _M_wide_err_string.get_allocator() };
347 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
348 _M_count))
349 return __out;
350 if (_M_with_strings)
351 return _M_wide_err_string;
352 __throw_range_error("wstring_convert::from_bytes");
353 }
354 /// @}
355
356 /// @{ Convert to bytes.
357 byte_string
358 to_bytes(_Elem __wchar)
359 {
360 _Elem __wchars[2] = { __wchar };
361 return to_bytes(__wchars, __wchars+1);
362 }
363
364 byte_string
365 to_bytes(const _Elem* __ptr)
366 {
367 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
368 }
369
370 byte_string
371 to_bytes(const wide_string& __wstr)
372 {
373 auto __ptr = __wstr.data();
374 return to_bytes(__ptr, __ptr + __wstr.size());
375 }
376
377 byte_string
378 to_bytes(const _Elem* __first, const _Elem* __last)
379 {
380 if (!_M_with_cvtstate)
381 _M_state = state_type();
382 byte_string __out{ _M_byte_err_string.get_allocator() };
383 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
384 _M_count))
385 return __out;
386 if (_M_with_strings)
387 return _M_byte_err_string;
388 __throw_range_error("wstring_convert::to_bytes");
389 }
390 /// @}
391
392 // _GLIBCXX_RESOLVE_LIB_DEFECTS
393 // 2174. wstring_convert::converted() should be noexcept
394 /// The number of elements successfully converted in the last conversion.
395 size_t converted() const noexcept { return _M_count; }
396
397 /// The final conversion state of the last conversion.
398 state_type state() const { return _M_state; }
399
400 private:
401 __detail::_Scoped_ptr<_Codecvt> _M_cvt;
402 byte_string _M_byte_err_string;
403 wide_string _M_wide_err_string;
404 state_type _M_state = state_type();
405 size_t _M_count = 0;
406 bool _M_with_cvtstate = false;
407 bool _M_with_strings = false;
408 };
409
410_GLIBCXX_END_NAMESPACE_CXX11
411
412 /// Buffer conversions
413 template<typename _Codecvt, typename _Elem = wchar_t,
414 typename _Tr = char_traits<_Elem>>
415 class _GLIBCXX17_DEPRECATED wbuffer_convert
416 : public basic_streambuf<_Elem, _Tr>
417 {
418 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
419
420 public:
421 typedef typename _Codecvt::state_type state_type;
422
423 /// Default constructor.
425
426 /** Constructor.
427 *
428 * @param __bytebuf The underlying byte stream buffer.
429 * @param __pcvt The facet to use for conversions.
430 * @param __state Initial conversion state.
431 *
432 * Takes ownership of @p __pcvt and will delete it in the destructor.
433 */
434 explicit
435 wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
436 state_type __state = state_type())
437 : _M_buf(__bytebuf), _M_cvt(__pcvt, "std::wbuffer_convert"),
438 _M_state(__state), _M_always_noconv(_M_cvt->always_noconv())
439 {
440 if (_M_buf)
441 {
442 this->setp(_M_put_area, _M_put_area + _S_buffer_length);
443 this->setg(_M_get_area + _S_putback_length,
444 _M_get_area + _S_putback_length,
445 _M_get_area + _S_putback_length);
446 }
447 }
448
449 ~wbuffer_convert() = default;
450
451 // _GLIBCXX_RESOLVE_LIB_DEFECTS
452 // 2176. Special members for wstring_convert and wbuffer_convert
453 wbuffer_convert(const wbuffer_convert&) = delete;
454 wbuffer_convert& operator=(const wbuffer_convert&) = delete;
455
456 streambuf* rdbuf() const noexcept { return _M_buf; }
457
458 streambuf*
459 rdbuf(streambuf *__bytebuf) noexcept
460 {
461 auto __prev = _M_buf;
462 _M_buf = __bytebuf;
463 return __prev;
464 }
465
466 /// The conversion state following the last conversion.
467 state_type state() const noexcept { return _M_state; }
468
469 protected:
470 int
472 { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
473
474 typename _Wide_streambuf::int_type
476 {
477 if (!_M_buf || !_M_conv_put())
478 return _Tr::eof();
479 else if (!_Tr::eq_int_type(__out, _Tr::eof()))
480 return this->sputc(__out);
481 return _Tr::not_eof(__out);
482 }
483
484 typename _Wide_streambuf::int_type
486 {
487 if (!_M_buf)
488 return _Tr::eof();
489
490 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
491 return _Tr::to_int_type(*this->gptr());
492 else
493 return _Tr::eof();
494 }
495
497 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
498 {
499 if (!_M_buf || __n == 0)
500 return 0;
501 streamsize __done = 0;
502 do
503 {
504 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
505 __n - __done);
506 _Tr::copy(this->pptr(), __s + __done, __nn);
507 this->pbump(__nn);
508 __done += __nn;
509 } while (__done < __n && _M_conv_put());
510 return __done;
511 }
512
513 private:
514 // fill the get area from converted contents of the byte stream buffer
515 bool
516 _M_conv_get()
517 {
518 const streamsize __pb1 = this->gptr() - this->eback();
519 const streamsize __pb2 = _S_putback_length;
520 const streamsize __npb = std::min(__pb1, __pb2);
521
522 _Tr::move(_M_get_area + _S_putback_length - __npb,
523 this->gptr() - __npb, __npb);
524
525 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
526 __nbytes = std::min(__nbytes, _M_buf->in_avail());
527 if (__nbytes < 1)
528 __nbytes = 1;
529 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
530 if (__nbytes < 1)
531 return false;
532 __nbytes += _M_unconv;
533
534 // convert _M_get_buf into _M_get_area
535
536 _Elem* __outbuf = _M_get_area + _S_putback_length;
537 _Elem* __outnext = __outbuf;
538 const char* __bnext = _M_get_buf;
539
540 codecvt_base::result __result;
541 if (_M_always_noconv)
542 __result = codecvt_base::noconv;
543 else
544 {
545 _Elem* __outend = _M_get_area + _S_buffer_length;
546
547 __result = _M_cvt->in(_M_state,
548 __bnext, __bnext + __nbytes, __bnext,
549 __outbuf, __outend, __outnext);
550 }
551
552 if (__result == codecvt_base::noconv)
553 {
554 // cast is safe because noconv means _Elem is same type as char
555 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
556 _Tr::copy(__outbuf, __get_buf, __nbytes);
557 _M_unconv = 0;
558 return true;
559 }
560
561 if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
562 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
563
564 this->setg(__outbuf, __outbuf, __outnext);
565
566 return __result != codecvt_base::error;
567 }
568
569 // unused
570 bool
571 _M_put(...)
572 { return false; }
573
574 bool
575 _M_put(const char* __p, streamsize __n)
576 {
577 if (_M_buf->sputn(__p, __n) < __n)
578 return false;
579 return true;
580 }
581
582 // convert the put area and write to the byte stream buffer
583 bool
584 _M_conv_put()
585 {
586 _Elem* const __first = this->pbase();
587 const _Elem* const __last = this->pptr();
588 const streamsize __pending = __last - __first;
589
590 if (_M_always_noconv)
591 return _M_put(__first, __pending);
592
593 char __outbuf[2 * _S_buffer_length];
594
595 const _Elem* __next = __first;
596 const _Elem* __start;
597 do
598 {
599 __start = __next;
600 char* __outnext = __outbuf;
601 char* const __outlast = __outbuf + sizeof(__outbuf);
602 auto __result = _M_cvt->out(_M_state, __next, __last, __next,
603 __outnext, __outlast, __outnext);
604 if (__result == codecvt_base::error)
605 return false;
606 else if (__result == codecvt_base::noconv)
607 return _M_put(__next, __pending);
608
609 if (!_M_put(__outbuf, __outnext - __outbuf))
610 return false;
611 }
612 while (__next != __last && __next != __start);
613
614 if (__next != __last)
615 _Tr::move(__first, __next, __last - __next);
616
617 this->pbump(__first - __next);
618 return __next != __first;
619 }
620
621 streambuf* _M_buf;
622 __detail::_Scoped_ptr<_Codecvt> _M_cvt;
623 state_type _M_state;
624
625 static const streamsize _S_buffer_length = 32;
626 static const streamsize _S_putback_length = 3;
627 _Elem _M_put_area[_S_buffer_length];
628 _Elem _M_get_area[_S_buffer_length];
629 streamsize _M_unconv = 0;
630 char _M_get_buf[_S_buffer_length-_S_putback_length];
631 bool _M_always_noconv;
632 };
633
634 /// @} group locales
635
636_GLIBCXX_END_NAMESPACE_VERSION
637} // namespace
638
639#endif // __cplusplus
640
641#endif /* _LOCALE_CONV_H */
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition complex:434
basic_streambuf< char > streambuf
Base class for char buffers.
Definition iosfwd:139
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
ISO C++ entities toplevel namespace is std.
ptrdiff_t streamsize
Integral type for I/O operation counts and buffer sizes.
Definition postypes.h:73
Implementation details not part of the namespace std interface.
traits_type::int_type int_type
Definition streambuf:137
void setg(char_type *__gbeg, char_type *__gnext, char_type *__gend)
Setting the three read area pointers.
Definition streambuf:523
char_type * egptr() const
Access to the get area.
Definition streambuf:501
char_type * gptr() const
Access to the get area.
Definition streambuf:498
void setp(char_type *__pbeg, char_type *__pend)
Setting the three write area pointers.
Definition streambuf:569
int_type sputc(char_type __c)
Entry point for all single-character output functions.
Definition streambuf:433
basic_streambuf()
Base constructor.
Definition streambuf:472
The standard allocator, as per C++03 [20.4.1].
Definition allocator.h:134
Managing sequences of characters and character-like objects.
constexpr size_type size() const noexcept
Returns the number of characters in the string, not including any null-termination.
constexpr const _CharT * data() const noexcept
Return const pointer to contents.
result in(state_type &__state, const extern_type *__from, const extern_type *__from_end, const extern_type *&__from_next, intern_type *__to, intern_type *__to_end, intern_type *&__to_next) const
Convert from external to internal character set.
Definition codecvt.h:204
result out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
Definition codecvt.h:124
Primary class template codecvt.
Definition codecvt.h:284
String conversions.
size_t converted() const noexcept
The number of elements successfully converted in the last conversion.
wstring_convert(_Codecvt *__pcvt)
wstring_convert()
Default constructor.
wide_string from_bytes(char __byte)
Convert from bytes.
byte_string to_bytes(_Elem __wchar)
Convert to bytes.
byte_string to_bytes(const _Elem *__ptr)
Convert to bytes.
byte_string to_bytes(const wide_string &__wstr)
Convert to bytes.
byte_string to_bytes(const _Elem *__first, const _Elem *__last)
Convert to bytes.
wstring_convert(const byte_string &__byte_err, const wide_string &__wide_err=wide_string())
wide_string from_bytes(const char *__first, const char *__last)
Convert from bytes.
state_type state() const
The final conversion state of the last conversion.
wide_string from_bytes(const char *__ptr)
Convert from bytes.
wstring_convert(_Codecvt *__pcvt, state_type __state)
wide_string from_bytes(const byte_string &__str)
Convert from bytes.
Buffer conversions.
_Wide_streambuf::int_type underflow()
Fetches more data from the controlled sequence.
state_type state() const noexcept
The conversion state following the last conversion.
wbuffer_convert(streambuf *__bytebuf, _Codecvt *__pcvt=new _Codecvt, state_type __state=state_type())
int sync()
Synchronizes the buffer arrays with the controlled sequences.
wbuffer_convert()
Default constructor.
_Wide_streambuf::int_type overflow(typename _Wide_streambuf::int_type __out)
Consumes data from the buffer; writes to the controlled sequence.