|
libstdc++
|
00001 // wstring_convert implementation -*- C++ -*- 00002 00003 // Copyright (C) 2012 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file bits/locale_conv.h 00026 * This is an internal header file, included by other library headers. 00027 * Do not attempt to use it directly. @headername{locale} 00028 */ 00029 00030 #ifndef _LOCALE_CONV_H 00031 #define _LOCALE_CONV_H 1 00032 00033 #if __cplusplus < 201103L 00034 # include <bits/c++0x_warning.h> 00035 #else 00036 00037 #include <streambuf> 00038 #include "stringfwd.h" 00039 #include "allocator.h" 00040 #include "codecvt.h" 00041 #include "unique_ptr.h" 00042 00043 namespace std _GLIBCXX_VISIBILITY(default) 00044 { 00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00046 00047 #ifdef _GLIBCXX_USE_WCHAR_T 00048 00049 /** 00050 * @addtogroup locales 00051 * @{ 00052 */ 00053 00054 /// String conversions 00055 template<typename _Codecvt, typename _Elem = wchar_t, 00056 typename _Wide_alloc = allocator<_Elem>, 00057 typename _Byte_alloc = allocator<char>> 00058 class wstring_convert 00059 { 00060 public: 00061 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string; 00062 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; 00063 typedef typename _Codecvt::state_type state_type; 00064 typedef typename wide_string::traits_type::int_type int_type; 00065 00066 /** Default constructor. 00067 * 00068 * @param __pcvt The facet to use for conversions. 00069 * 00070 * Takes ownership of @p __pcvt and will delete it in the destructor. 00071 */ 00072 explicit 00073 wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt) 00074 { 00075 if (!_M_cvt) 00076 __throw_logic_error("wstring_convert"); 00077 } 00078 00079 /** Construct with an initial converstion state. 00080 * 00081 * @param __pcvt The facet to use for conversions. 00082 * @param __state Initial conversion state. 00083 * 00084 * Takes ownership of @p __pcvt and will delete it in the destructor. 00085 * The object's conversion state will persist between conversions. 00086 */ 00087 wstring_convert(_Codecvt* __pcvt, state_type __state) 00088 : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true) 00089 { 00090 if (!_M_cvt) 00091 __throw_logic_error("wstring_convert"); 00092 } 00093 00094 /** Construct with error strings. 00095 * 00096 * @param __byte_err A string to return on failed conversions. 00097 * @param __wide_err A wide string to return on failed conversions. 00098 */ 00099 explicit 00100 wstring_convert(const byte_string& __byte_err, 00101 const wide_string& __wide_err = wide_string()) 00102 : _M_cvt(new _Codecvt), 00103 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err), 00104 _M_with_strings(true) 00105 { 00106 if (!_M_cvt) 00107 __throw_logic_error("wstring_convert"); 00108 } 00109 00110 ~wstring_convert() = default; 00111 00112 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00113 // 2176. Special members for wstring_convert and wbuffer_convert 00114 wstring_convert(const wstring_convert&) = delete; 00115 wstring_convert& operator=(const wstring_convert&) = delete; 00116 00117 /// @{ Convert from bytes. 00118 wide_string 00119 from_bytes(char __byte) 00120 { 00121 char __bytes[2] = { __byte }; 00122 return from_bytes(__bytes, __bytes+1); 00123 } 00124 00125 wide_string 00126 from_bytes(const char* __ptr) 00127 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); } 00128 00129 wide_string 00130 from_bytes(const byte_string& __str) 00131 { 00132 auto __ptr = __str.data(); 00133 return from_bytes(__ptr, __ptr + __str.size()); 00134 } 00135 00136 wide_string 00137 from_bytes(const char* __first, const char* __last) 00138 { 00139 auto __errstr = _M_with_strings ? &_M_wide_err_string : nullptr; 00140 _ConvFn<char, _Elem> __fn = &_Codecvt::in; 00141 return _M_conv(__first, __last, __errstr, __fn); 00142 } 00143 /// @} 00144 00145 /// @{ Convert to bytes. 00146 byte_string 00147 to_bytes(_Elem __wchar) 00148 { 00149 _Elem __wchars[2] = { __wchar }; 00150 return to_bytes(__wchars, __wchars+1); 00151 } 00152 00153 byte_string 00154 to_bytes(const _Elem* __ptr) 00155 { 00156 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr)); 00157 } 00158 00159 byte_string 00160 to_bytes(const wide_string& __wstr) 00161 { 00162 auto __ptr = __wstr.data(); 00163 return to_bytes(__ptr, __ptr + __wstr.size()); 00164 } 00165 00166 byte_string 00167 to_bytes(const _Elem* __first, const _Elem* __last) 00168 { 00169 auto __errstr = _M_with_strings ? &_M_byte_err_string : nullptr; 00170 _ConvFn<_Elem, char> __fn = &_Codecvt::out; 00171 return _M_conv(__first, __last, __errstr, __fn); 00172 } 00173 /// @} 00174 00175 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00176 // 2174. wstring_convert::converted() should be noexcept 00177 /// The number of elements successfully converted in the last conversion. 00178 size_t converted() const noexcept { return _M_count; } 00179 00180 /// The final conversion state of the last conversion. 00181 state_type state() const { return _M_state; } 00182 00183 private: 00184 template<typename _InC, typename _OutC> 00185 using _ConvFn 00186 = codecvt_base::result 00187 (_Codecvt::*)(state_type&, const _InC*, const _InC*, const _InC*&, 00188 _OutC*, _OutC*, _OutC*&) const; 00189 00190 template<typename _InChar, typename _OutStr, typename _MemFn> 00191 _OutStr 00192 _M_conv(const _InChar* __first, const _InChar* __last, 00193 const _OutStr* __err, _MemFn __memfn) 00194 { 00195 if (!_M_with_cvtstate) 00196 _M_state = state_type(); 00197 00198 auto __outstr = __err ? _OutStr(__err->get_allocator()) : _OutStr(); 00199 size_t __outchars = 0; 00200 auto __next = __first; 00201 const auto __maxlen = _M_cvt->max_length(); 00202 00203 codecvt_base::result __result; 00204 do 00205 { 00206 __outstr.resize(__outstr.size() + (__last - __next) + __maxlen); 00207 auto __outnext = &__outstr.front() + __outchars; 00208 auto const __outlast = &__outstr.back() + 1; 00209 __result = ((*_M_cvt).*__memfn)(_M_state, __next, __last, __next, 00210 __outnext, __outlast, __outnext); 00211 __outchars = __outnext - &__outstr.front(); 00212 } 00213 while (__result == codecvt_base::partial && __next != __last 00214 && (__outstr.size() - __outchars) < __maxlen); 00215 00216 if (__result == codecvt_base::noconv) 00217 { 00218 __outstr.assign(__first, __last); 00219 _M_count = __outstr.size(); 00220 return __outstr; 00221 } 00222 00223 __outstr.resize(__outchars); 00224 _M_count = __next - __first; 00225 00226 if (__result != codecvt_base::error) 00227 return __outstr; 00228 else if (__err) 00229 return *__err; 00230 else 00231 __throw_range_error("wstring_convert"); 00232 } 00233 00234 unique_ptr<_Codecvt> _M_cvt; 00235 byte_string _M_byte_err_string; 00236 wide_string _M_wide_err_string; 00237 state_type _M_state = state_type(); 00238 size_t _M_count = 0; 00239 bool _M_with_cvtstate = false; 00240 bool _M_with_strings = false; 00241 }; 00242 00243 /// Buffer conversions 00244 template<typename _Codecvt, typename _Elem = wchar_t, 00245 typename _Tr = char_traits<_Elem>> 00246 class wbuffer_convert : public basic_streambuf<_Elem, _Tr> 00247 { 00248 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf; 00249 00250 public: 00251 typedef typename _Codecvt::state_type state_type; 00252 00253 /** Default constructor. 00254 * 00255 * @param __bytebuf The underlying byte stream buffer. 00256 * @param __pcvt The facet to use for conversions. 00257 * @param __state Initial conversion state. 00258 * 00259 * Takes ownership of @p __pcvt and will delete it in the destructor. 00260 */ 00261 explicit 00262 wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt, 00263 state_type __state = state_type()) 00264 : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state) 00265 { 00266 if (!_M_cvt) 00267 __throw_logic_error("wstring_convert"); 00268 00269 _M_always_noconv = _M_cvt->always_noconv(); 00270 00271 if (_M_buf) 00272 { 00273 this->setp(_M_put_area, _M_put_area + _S_buffer_length); 00274 this->setg(_M_get_area + _S_putback_length, 00275 _M_get_area + _S_putback_length, 00276 _M_get_area + _S_putback_length); 00277 } 00278 } 00279 00280 ~wbuffer_convert() = default; 00281 00282 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00283 // 2176. Special members for wstring_convert and wbuffer_convert 00284 wbuffer_convert(const wbuffer_convert&) = delete; 00285 wbuffer_convert& operator=(const wbuffer_convert&) = delete; 00286 00287 streambuf* rdbuf() const noexcept { return _M_buf; } 00288 00289 streambuf* 00290 rdbuf(streambuf *__bytebuf) noexcept 00291 { 00292 auto __prev = _M_buf; 00293 _M_buf = __bytebuf; 00294 return __prev; 00295 } 00296 00297 /// The conversion state following the last conversion. 00298 state_type state() const noexcept { return _M_state; } 00299 00300 protected: 00301 int 00302 sync() 00303 { return _M_buf && _M_conv_put() && _M_buf->pubsync() ? 0 : -1; } 00304 00305 typename _Wide_streambuf::int_type 00306 overflow(typename _Wide_streambuf::int_type __out) 00307 { 00308 if (!_M_buf || !_M_conv_put()) 00309 return _Tr::eof(); 00310 else if (!_Tr::eq_int_type(__out, _Tr::eof())) 00311 return this->sputc(__out); 00312 return _Tr::not_eof(__out); 00313 } 00314 00315 typename _Wide_streambuf::int_type 00316 underflow() 00317 { 00318 if (!_M_buf) 00319 return _Tr::eof(); 00320 00321 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get())) 00322 return _Tr::to_int_type(*this->gptr()); 00323 else 00324 return _Tr::eof(); 00325 } 00326 00327 streamsize 00328 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n) 00329 { 00330 if (!_M_buf || __n == 0) 00331 return 0; 00332 streamsize __done = 0; 00333 do 00334 { 00335 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(), 00336 __n - __done); 00337 _Tr::copy(this->pptr(), __s + __done, __nn); 00338 this->pbump(__nn); 00339 __done += __nn; 00340 } while (__done < __n && _M_conv_put()); 00341 return __done; 00342 } 00343 00344 private: 00345 // fill the get area from converted contents of the byte stream buffer 00346 bool 00347 _M_conv_get() 00348 { 00349 const streamsize __pb1 = this->gptr() - this->eback(); 00350 const streamsize __pb2 = _S_putback_length; 00351 const streamsize __npb = std::min(__pb1, __pb2); 00352 00353 _Tr::move(_M_get_area + _S_putback_length - __npb, 00354 this->gptr() - __npb, __npb); 00355 00356 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv; 00357 __nbytes = std::min(__nbytes, _M_buf->in_avail()); 00358 if (__nbytes < 1) 00359 __nbytes == 1; 00360 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes); 00361 if (__nbytes < 1) 00362 return false; 00363 __nbytes += _M_unconv; 00364 00365 // convert _M_get_buf into _M_get_area 00366 00367 _Elem* __outbuf = _M_get_area + _S_putback_length; 00368 _Elem* __outnext = __outbuf; 00369 const char* __bnext = _M_get_buf; 00370 00371 codecvt_base::result __result; 00372 if (_M_always_noconv) 00373 __result = codecvt_base::noconv; 00374 else 00375 { 00376 _Elem* __outend = _M_get_area + _S_buffer_length; 00377 00378 __result = _M_cvt->in(_M_state, 00379 __bnext, __bnext + __nbytes, __bnext, 00380 __outbuf, __outend, __outnext); 00381 } 00382 00383 if (__result == codecvt_base::noconv) 00384 { 00385 // cast is safe because noconv means _Elem is same type as char 00386 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf); 00387 _Tr::copy(__outbuf, __get_buf, __nbytes); 00388 _M_unconv = 0; 00389 return true; 00390 } 00391 00392 if ((_M_unconv = _M_get_buf + __nbytes - __bnext)) 00393 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv); 00394 00395 this->setg(__outbuf, __outbuf, __outnext); 00396 00397 return __result != codecvt_base::error; 00398 } 00399 00400 // unused 00401 bool 00402 _M_put(...) 00403 { return false; } 00404 00405 bool 00406 _M_put(const char* __p, streamsize __n) 00407 { 00408 if (_M_buf->sputn(__p, __n) < __n) 00409 return false; 00410 } 00411 00412 // convert the put area and write to the byte stream buffer 00413 bool 00414 _M_conv_put() 00415 { 00416 _Elem* const __first = this->pbase(); 00417 const _Elem* const __last = this->pptr(); 00418 const streamsize __pending = __last - __first; 00419 00420 if (_M_always_noconv) 00421 return _M_put(__first, __pending); 00422 00423 char __outbuf[2 * _S_buffer_length]; 00424 00425 const _Elem* __next = __first; 00426 const _Elem* __start; 00427 do 00428 { 00429 __start = __next; 00430 char* __outnext = __outbuf; 00431 char* const __outlast = __outbuf + sizeof(__outbuf); 00432 auto __result = _M_cvt->out(_M_state, __next, __last, __next, 00433 __outnext, __outlast, __outnext); 00434 if (__result == codecvt_base::error) 00435 return false; 00436 else if (__result == codecvt_base::noconv) 00437 return _M_put(__next, __pending); 00438 00439 if (!_M_put(__outbuf, __outnext - __outbuf)) 00440 return false; 00441 } 00442 while (__next != __last && __next != __start); 00443 00444 if (__next != __last) 00445 _Tr::move(__first, __next, __last - __next); 00446 00447 this->pbump(__first - __next); 00448 return __next != __first; 00449 } 00450 00451 streambuf* _M_buf; 00452 unique_ptr<_Codecvt> _M_cvt; 00453 state_type _M_state; 00454 00455 static const streamsize _S_buffer_length = 32; 00456 static const streamsize _S_putback_length = 3; 00457 _Elem _M_put_area[_S_buffer_length]; 00458 _Elem _M_get_area[_S_buffer_length]; 00459 streamsize _M_unconv = 0; 00460 char _M_get_buf[_S_buffer_length-_S_putback_length]; 00461 bool _M_always_noconv; 00462 }; 00463 00464 /// @} group locales 00465 00466 #endif // _GLIBCXX_USE_WCHAR_T 00467 00468 _GLIBCXX_END_NAMESPACE_VERSION 00469 } // namespace 00470 00471 #endif // __cplusplus 00472 00473 #endif /* _LOCALE_CONV_H */