|
libstdc++
|
00001 // wstring_convert implementation -*- C++ -*- 00002 00003 // Copyright (C) 2015-2019 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file bits/locale_conv.h 00026 * This is an internal header file, included by other library headers. 00027 * Do not attempt to use it directly. @headername{locale} 00028 */ 00029 00030 #ifndef _LOCALE_CONV_H 00031 #define _LOCALE_CONV_H 1 00032 00033 #if __cplusplus < 201103L 00034 # include <bits/c++0x_warning.h> 00035 #else 00036 00037 #include <streambuf> 00038 #include <bits/stringfwd.h> 00039 #include <bits/allocator.h> 00040 #include <bits/codecvt.h> 00041 #include <bits/unique_ptr.h> 00042 00043 namespace std _GLIBCXX_VISIBILITY(default) 00044 { 00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00046 00047 /** 00048 * @addtogroup locales 00049 * @{ 00050 */ 00051 00052 template<typename _OutStr, typename _InChar, typename _Codecvt, 00053 typename _State, typename _Fn> 00054 bool 00055 __do_str_codecvt(const _InChar* __first, const _InChar* __last, 00056 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state, 00057 size_t& __count, _Fn __fn) 00058 { 00059 if (__first == __last) 00060 { 00061 __outstr.clear(); 00062 __count = 0; 00063 return true; 00064 } 00065 00066 size_t __outchars = 0; 00067 auto __next = __first; 00068 const auto __maxlen = __cvt.max_length() + 1; 00069 00070 codecvt_base::result __result; 00071 do 00072 { 00073 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen); 00074 auto __outnext = &__outstr.front() + __outchars; 00075 auto const __outlast = &__outstr.back() + 1; 00076 __result = (__cvt.*__fn)(__state, __next, __last, __next, 00077 __outnext, __outlast, __outnext); 00078 __outchars = __outnext - &__outstr.front(); 00079 } 00080 while (__result == codecvt_base::partial && __next != __last 00081 && (__outstr.size() - __outchars) < __maxlen); 00082 00083 if (__result == codecvt_base::error) 00084 { 00085 __count = __next - __first; 00086 return false; 00087 } 00088 00089 // The codecvt facet will only return noconv when the types are 00090 // the same, so avoid instantiating basic_string::assign otherwise 00091 if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type, 00092 typename _Codecvt::extern_type>()) 00093 if (__result == codecvt_base::noconv) 00094 { 00095 __outstr.assign(__first, __last); 00096 __count = __last - __first; 00097 return true; 00098 } 00099 00100 __outstr.resize(__outchars); 00101 __count = __next - __first; 00102 return true; 00103 } 00104 00105 // Convert narrow character string to wide. 00106 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00107 inline bool 00108 __str_codecvt_in(const char* __first, const char* __last, 00109 basic_string<_CharT, _Traits, _Alloc>& __outstr, 00110 const codecvt<_CharT, char, _State>& __cvt, 00111 _State& __state, size_t& __count) 00112 { 00113 using _Codecvt = codecvt<_CharT, char, _State>; 00114 using _ConvFn 00115 = codecvt_base::result 00116 (_Codecvt::*)(_State&, const char*, const char*, const char*&, 00117 _CharT*, _CharT*, _CharT*&) const; 00118 _ConvFn __fn = &codecvt<_CharT, char, _State>::in; 00119 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00120 __count, __fn); 00121 } 00122 00123 // As above, but with no __count parameter 00124 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00125 inline bool 00126 __str_codecvt_in(const char* __first, const char* __last, 00127 basic_string<_CharT, _Traits, _Alloc>& __outstr, 00128 const codecvt<_CharT, char, _State>& __cvt) 00129 { 00130 _State __state = {}; 00131 size_t __n; 00132 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); 00133 } 00134 00135 // As above, but returns false for partial conversion 00136 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00137 inline bool 00138 __str_codecvt_in_all(const char* __first, const char* __last, 00139 basic_string<_CharT, _Traits, _Alloc>& __outstr, 00140 const codecvt<_CharT, char, _State>& __cvt) 00141 { 00142 _State __state = {}; 00143 size_t __n; 00144 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n) 00145 && (__n == (__last - __first)); 00146 } 00147 00148 // Convert wide character string to narrow. 00149 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00150 inline bool 00151 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00152 basic_string<char, _Traits, _Alloc>& __outstr, 00153 const codecvt<_CharT, char, _State>& __cvt, 00154 _State& __state, size_t& __count) 00155 { 00156 using _Codecvt = codecvt<_CharT, char, _State>; 00157 using _ConvFn 00158 = codecvt_base::result 00159 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, 00160 char*, char*, char*&) const; 00161 _ConvFn __fn = &codecvt<_CharT, char, _State>::out; 00162 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00163 __count, __fn); 00164 } 00165 00166 // As above, but with no __count parameter 00167 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00168 inline bool 00169 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00170 basic_string<char, _Traits, _Alloc>& __outstr, 00171 const codecvt<_CharT, char, _State>& __cvt) 00172 { 00173 _State __state = {}; 00174 size_t __n; 00175 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); 00176 } 00177 00178 // As above, but returns false for partial conversions 00179 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00180 inline bool 00181 __str_codecvt_out_all(const _CharT* __first, const _CharT* __last, 00182 basic_string<char, _Traits, _Alloc>& __outstr, 00183 const codecvt<_CharT, char, _State>& __cvt) 00184 { 00185 _State __state = {}; 00186 size_t __n; 00187 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n) 00188 && (__n == (__last - __first)); 00189 } 00190 00191 #ifdef _GLIBCXX_USE_CHAR8_T 00192 00193 // Convert wide character string to narrow. 00194 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00195 inline bool 00196 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00197 basic_string<char8_t, _Traits, _Alloc>& __outstr, 00198 const codecvt<_CharT, char8_t, _State>& __cvt, 00199 _State& __state, size_t& __count) 00200 { 00201 using _Codecvt = codecvt<_CharT, char8_t, _State>; 00202 using _ConvFn 00203 = codecvt_base::result 00204 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, 00205 char8_t*, char8_t*, char8_t*&) const; 00206 _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out; 00207 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00208 __count, __fn); 00209 } 00210 00211 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00212 inline bool 00213 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00214 basic_string<char8_t, _Traits, _Alloc>& __outstr, 00215 const codecvt<_CharT, char8_t, _State>& __cvt) 00216 { 00217 _State __state = {}; 00218 size_t __n; 00219 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); 00220 } 00221 00222 #endif // _GLIBCXX_USE_CHAR8_T 00223 00224 #ifdef _GLIBCXX_USE_WCHAR_T 00225 00226 _GLIBCXX_BEGIN_NAMESPACE_CXX11 00227 00228 /// String conversions 00229 template<typename _Codecvt, typename _Elem = wchar_t, 00230 typename _Wide_alloc = allocator<_Elem>, 00231 typename _Byte_alloc = allocator<char>> 00232 class wstring_convert 00233 { 00234 public: 00235 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string; 00236 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; 00237 typedef typename _Codecvt::state_type state_type; 00238 typedef typename wide_string::traits_type::int_type int_type; 00239 00240 /// Default constructor. 00241 wstring_convert() : _M_cvt(new _Codecvt()) { } 00242 00243 /** Constructor. 00244 * 00245 * @param __pcvt The facet to use for conversions. 00246 * 00247 * Takes ownership of @p __pcvt and will delete it in the destructor. 00248 */ 00249 explicit 00250 wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt) 00251 { 00252 if (!_M_cvt) 00253 __throw_logic_error("wstring_convert"); 00254 } 00255 00256 /** Construct with an initial converstion state. 00257 * 00258 * @param __pcvt The facet to use for conversions. 00259 * @param __state Initial conversion state. 00260 * 00261 * Takes ownership of @p __pcvt and will delete it in the destructor. 00262 * The object's conversion state will persist between conversions. 00263 */ 00264 wstring_convert(_Codecvt* __pcvt, state_type __state) 00265 : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true) 00266 { 00267 if (!_M_cvt) 00268 __throw_logic_error("wstring_convert"); 00269 } 00270 00271 /** Construct with error strings. 00272 * 00273 * @param __byte_err A string to return on failed conversions. 00274 * @param __wide_err A wide string to return on failed conversions. 00275 */ 00276 explicit 00277 wstring_convert(const byte_string& __byte_err, 00278 const wide_string& __wide_err = wide_string()) 00279 : _M_cvt(new _Codecvt), 00280 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err), 00281 _M_with_strings(true) 00282 { 00283 if (!_M_cvt) 00284 __throw_logic_error("wstring_convert"); 00285 } 00286 00287 ~wstring_convert() = default; 00288 00289 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00290 // 2176. Special members for wstring_convert and wbuffer_convert 00291 wstring_convert(const wstring_convert&) = delete; 00292 wstring_convert& operator=(const wstring_convert&) = delete; 00293 00294 /// @{ Convert from bytes. 00295 wide_string 00296 from_bytes(char __byte) 00297 { 00298 char __bytes[2] = { __byte }; 00299 return from_bytes(__bytes, __bytes+1); 00300 } 00301 00302 wide_string 00303 from_bytes(const char* __ptr) 00304 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); } 00305 00306 wide_string 00307 from_bytes(const byte_string& __str) 00308 { 00309 auto __ptr = __str.data(); 00310 return from_bytes(__ptr, __ptr + __str.size()); 00311 } 00312 00313 wide_string 00314 from_bytes(const char* __first, const char* __last) 00315 { 00316 if (!_M_with_cvtstate) 00317 _M_state = state_type(); 00318 wide_string __out{ _M_wide_err_string.get_allocator() }; 00319 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state, 00320 _M_count)) 00321 return __out; 00322 if (_M_with_strings) 00323 return _M_wide_err_string; 00324 __throw_range_error("wstring_convert::from_bytes"); 00325 } 00326 /// @} 00327 00328 /// @{ Convert to bytes. 00329 byte_string 00330 to_bytes(_Elem __wchar) 00331 { 00332 _Elem __wchars[2] = { __wchar }; 00333 return to_bytes(__wchars, __wchars+1); 00334 } 00335 00336 byte_string 00337 to_bytes(const _Elem* __ptr) 00338 { 00339 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr)); 00340 } 00341 00342 byte_string 00343 to_bytes(const wide_string& __wstr) 00344 { 00345 auto __ptr = __wstr.data(); 00346 return to_bytes(__ptr, __ptr + __wstr.size()); 00347 } 00348 00349 byte_string 00350 to_bytes(const _Elem* __first, const _Elem* __last) 00351 { 00352 if (!_M_with_cvtstate) 00353 _M_state = state_type(); 00354 byte_string __out{ _M_byte_err_string.get_allocator() }; 00355 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state, 00356 _M_count)) 00357 return __out; 00358 if (_M_with_strings) 00359 return _M_byte_err_string; 00360 __throw_range_error("wstring_convert::to_bytes"); 00361 } 00362 /// @} 00363 00364 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00365 // 2174. wstring_convert::converted() should be noexcept 00366 /// The number of elements successfully converted in the last conversion. 00367 size_t converted() const noexcept { return _M_count; } 00368 00369 /// The final conversion state of the last conversion. 00370 state_type state() const { return _M_state; } 00371 00372 private: 00373 unique_ptr<_Codecvt> _M_cvt; 00374 byte_string _M_byte_err_string; 00375 wide_string _M_wide_err_string; 00376 state_type _M_state = state_type(); 00377 size_t _M_count = 0; 00378 bool _M_with_cvtstate = false; 00379 bool _M_with_strings = false; 00380 }; 00381 00382 _GLIBCXX_END_NAMESPACE_CXX11 00383 00384 /// Buffer conversions 00385 template<typename _Codecvt, typename _Elem = wchar_t, 00386 typename _Tr = char_traits<_Elem>> 00387 class wbuffer_convert : public basic_streambuf<_Elem, _Tr> 00388 { 00389 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf; 00390 00391 public: 00392 typedef typename _Codecvt::state_type state_type; 00393 00394 /// Default constructor. 00395 wbuffer_convert() : wbuffer_convert(nullptr) { } 00396 00397 /** Constructor. 00398 * 00399 * @param __bytebuf The underlying byte stream buffer. 00400 * @param __pcvt The facet to use for conversions. 00401 * @param __state Initial conversion state. 00402 * 00403 * Takes ownership of @p __pcvt and will delete it in the destructor. 00404 */ 00405 explicit 00406 wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt, 00407 state_type __state = state_type()) 00408 : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state) 00409 { 00410 if (!_M_cvt) 00411 __throw_logic_error("wbuffer_convert"); 00412 00413 _M_always_noconv = _M_cvt->always_noconv(); 00414 00415 if (_M_buf) 00416 { 00417 this->setp(_M_put_area, _M_put_area + _S_buffer_length); 00418 this->setg(_M_get_area + _S_putback_length, 00419 _M_get_area + _S_putback_length, 00420 _M_get_area + _S_putback_length); 00421 } 00422 } 00423 00424 ~wbuffer_convert() = default; 00425 00426 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00427 // 2176. Special members for wstring_convert and wbuffer_convert 00428 wbuffer_convert(const wbuffer_convert&) = delete; 00429 wbuffer_convert& operator=(const wbuffer_convert&) = delete; 00430 00431 streambuf* rdbuf() const noexcept { return _M_buf; } 00432 00433 streambuf* 00434 rdbuf(streambuf *__bytebuf) noexcept 00435 { 00436 auto __prev = _M_buf; 00437 _M_buf = __bytebuf; 00438 return __prev; 00439 } 00440 00441 /// The conversion state following the last conversion. 00442 state_type state() const noexcept { return _M_state; } 00443 00444 protected: 00445 int 00446 sync() 00447 { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; } 00448 00449 typename _Wide_streambuf::int_type 00450 overflow(typename _Wide_streambuf::int_type __out) 00451 { 00452 if (!_M_buf || !_M_conv_put()) 00453 return _Tr::eof(); 00454 else if (!_Tr::eq_int_type(__out, _Tr::eof())) 00455 return this->sputc(__out); 00456 return _Tr::not_eof(__out); 00457 } 00458 00459 typename _Wide_streambuf::int_type 00460 underflow() 00461 { 00462 if (!_M_buf) 00463 return _Tr::eof(); 00464 00465 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get())) 00466 return _Tr::to_int_type(*this->gptr()); 00467 else 00468 return _Tr::eof(); 00469 } 00470 00471 streamsize 00472 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n) 00473 { 00474 if (!_M_buf || __n == 0) 00475 return 0; 00476 streamsize __done = 0; 00477 do 00478 { 00479 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(), 00480 __n - __done); 00481 _Tr::copy(this->pptr(), __s + __done, __nn); 00482 this->pbump(__nn); 00483 __done += __nn; 00484 } while (__done < __n && _M_conv_put()); 00485 return __done; 00486 } 00487 00488 private: 00489 // fill the get area from converted contents of the byte stream buffer 00490 bool 00491 _M_conv_get() 00492 { 00493 const streamsize __pb1 = this->gptr() - this->eback(); 00494 const streamsize __pb2 = _S_putback_length; 00495 const streamsize __npb = std::min(__pb1, __pb2); 00496 00497 _Tr::move(_M_get_area + _S_putback_length - __npb, 00498 this->gptr() - __npb, __npb); 00499 00500 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv; 00501 __nbytes = std::min(__nbytes, _M_buf->in_avail()); 00502 if (__nbytes < 1) 00503 __nbytes = 1; 00504 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes); 00505 if (__nbytes < 1) 00506 return false; 00507 __nbytes += _M_unconv; 00508 00509 // convert _M_get_buf into _M_get_area 00510 00511 _Elem* __outbuf = _M_get_area + _S_putback_length; 00512 _Elem* __outnext = __outbuf; 00513 const char* __bnext = _M_get_buf; 00514 00515 codecvt_base::result __result; 00516 if (_M_always_noconv) 00517 __result = codecvt_base::noconv; 00518 else 00519 { 00520 _Elem* __outend = _M_get_area + _S_buffer_length; 00521 00522 __result = _M_cvt->in(_M_state, 00523 __bnext, __bnext + __nbytes, __bnext, 00524 __outbuf, __outend, __outnext); 00525 } 00526 00527 if (__result == codecvt_base::noconv) 00528 { 00529 // cast is safe because noconv means _Elem is same type as char 00530 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf); 00531 _Tr::copy(__outbuf, __get_buf, __nbytes); 00532 _M_unconv = 0; 00533 return true; 00534 } 00535 00536 if ((_M_unconv = _M_get_buf + __nbytes - __bnext)) 00537 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv); 00538 00539 this->setg(__outbuf, __outbuf, __outnext); 00540 00541 return __result != codecvt_base::error; 00542 } 00543 00544 // unused 00545 bool 00546 _M_put(...) 00547 { return false; } 00548 00549 bool 00550 _M_put(const char* __p, streamsize __n) 00551 { 00552 if (_M_buf->sputn(__p, __n) < __n) 00553 return false; 00554 return true; 00555 } 00556 00557 // convert the put area and write to the byte stream buffer 00558 bool 00559 _M_conv_put() 00560 { 00561 _Elem* const __first = this->pbase(); 00562 const _Elem* const __last = this->pptr(); 00563 const streamsize __pending = __last - __first; 00564 00565 if (_M_always_noconv) 00566 return _M_put(__first, __pending); 00567 00568 char __outbuf[2 * _S_buffer_length]; 00569 00570 const _Elem* __next = __first; 00571 const _Elem* __start; 00572 do 00573 { 00574 __start = __next; 00575 char* __outnext = __outbuf; 00576 char* const __outlast = __outbuf + sizeof(__outbuf); 00577 auto __result = _M_cvt->out(_M_state, __next, __last, __next, 00578 __outnext, __outlast, __outnext); 00579 if (__result == codecvt_base::error) 00580 return false; 00581 else if (__result == codecvt_base::noconv) 00582 return _M_put(__next, __pending); 00583 00584 if (!_M_put(__outbuf, __outnext - __outbuf)) 00585 return false; 00586 } 00587 while (__next != __last && __next != __start); 00588 00589 if (__next != __last) 00590 _Tr::move(__first, __next, __last - __next); 00591 00592 this->pbump(__first - __next); 00593 return __next != __first; 00594 } 00595 00596 streambuf* _M_buf; 00597 unique_ptr<_Codecvt> _M_cvt; 00598 state_type _M_state; 00599 00600 static const streamsize _S_buffer_length = 32; 00601 static const streamsize _S_putback_length = 3; 00602 _Elem _M_put_area[_S_buffer_length]; 00603 _Elem _M_get_area[_S_buffer_length]; 00604 streamsize _M_unconv = 0; 00605 char _M_get_buf[_S_buffer_length-_S_putback_length]; 00606 bool _M_always_noconv; 00607 }; 00608 00609 #endif // _GLIBCXX_USE_WCHAR_T 00610 00611 /// @} group locales 00612 00613 _GLIBCXX_END_NAMESPACE_VERSION 00614 } // namespace 00615 00616 #endif // __cplusplus 00617 00618 #endif /* _LOCALE_CONV_H */