libstdc++
locale_conv.h
Go to the documentation of this file.
00001 // wstring_convert implementation -*- C++ -*-
00002 
00003 // Copyright (C) 2015-2019 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file bits/locale_conv.h
00026  *  This is an internal header file, included by other library headers.
00027  *  Do not attempt to use it directly. @headername{locale}
00028  */
00029 
00030 #ifndef _LOCALE_CONV_H
00031 #define _LOCALE_CONV_H 1
00032 
00033 #if __cplusplus < 201103L
00034 # include <bits/c++0x_warning.h>
00035 #else
00036 
00037 #include <streambuf>
00038 #include <bits/stringfwd.h>
00039 #include <bits/allocator.h>
00040 #include <bits/codecvt.h>
00041 #include <bits/unique_ptr.h>
00042 
00043 namespace std _GLIBCXX_VISIBILITY(default)
00044 {
00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00046 
00047   /**
00048    * @addtogroup locales
00049    * @{
00050    */
00051 
00052   template<typename _OutStr, typename _InChar, typename _Codecvt,
00053            typename _State, typename _Fn>
00054     bool
00055     __do_str_codecvt(const _InChar* __first, const _InChar* __last,
00056                      _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
00057                      size_t& __count, _Fn __fn)
00058     {
00059       if (__first == __last)
00060         {
00061           __outstr.clear();
00062           __count = 0;
00063           return true;
00064         }
00065 
00066       size_t __outchars = 0;
00067       auto __next = __first;
00068       const auto __maxlen = __cvt.max_length() + 1;
00069 
00070       codecvt_base::result __result;
00071       do
00072         {
00073           __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
00074           auto __outnext = &__outstr.front() + __outchars;
00075           auto const __outlast = &__outstr.back() + 1;
00076           __result = (__cvt.*__fn)(__state, __next, __last, __next,
00077                                         __outnext, __outlast, __outnext);
00078           __outchars = __outnext - &__outstr.front();
00079         }
00080       while (__result == codecvt_base::partial && __next != __last
00081              && (__outstr.size() - __outchars) < __maxlen);
00082 
00083       if (__result == codecvt_base::error)
00084         {
00085           __count = __next - __first;
00086           return false;
00087         }
00088 
00089       // The codecvt facet will only return noconv when the types are
00090       // the same, so avoid instantiating basic_string::assign otherwise
00091       if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type,
00092                                        typename _Codecvt::extern_type>())
00093         if (__result == codecvt_base::noconv)
00094           {
00095             __outstr.assign(__first, __last);
00096             __count = __last - __first;
00097             return true;
00098           }
00099 
00100       __outstr.resize(__outchars);
00101       __count = __next - __first;
00102       return true;
00103     }
00104 
00105   // Convert narrow character string to wide.
00106   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00107     inline bool
00108     __str_codecvt_in(const char* __first, const char* __last,
00109                      basic_string<_CharT, _Traits, _Alloc>& __outstr,
00110                      const codecvt<_CharT, char, _State>& __cvt,
00111                      _State& __state, size_t& __count)
00112     {
00113       using _Codecvt = codecvt<_CharT, char, _State>;
00114       using _ConvFn
00115         = codecvt_base::result
00116           (_Codecvt::*)(_State&, const char*, const char*, const char*&,
00117                         _CharT*, _CharT*, _CharT*&) const;
00118       _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
00119       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
00120                               __count, __fn);
00121     }
00122 
00123   // As above, but with no __count parameter
00124   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00125     inline bool
00126     __str_codecvt_in(const char* __first, const char* __last,
00127                      basic_string<_CharT, _Traits, _Alloc>& __outstr,
00128                      const codecvt<_CharT, char, _State>& __cvt)
00129     {
00130       _State __state = {};
00131       size_t __n;
00132       return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
00133     }
00134 
00135   // As above, but returns false for partial conversion
00136   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00137     inline bool
00138     __str_codecvt_in_all(const char* __first, const char* __last,
00139                          basic_string<_CharT, _Traits, _Alloc>& __outstr,
00140                          const codecvt<_CharT, char, _State>& __cvt)
00141     {
00142       _State __state = {};
00143       size_t __n;
00144       return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
00145         && (__n == (__last - __first));
00146     }
00147 
00148   // Convert wide character string to narrow.
00149   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00150     inline bool
00151     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00152                       basic_string<char, _Traits, _Alloc>& __outstr,
00153                       const codecvt<_CharT, char, _State>& __cvt,
00154                       _State& __state, size_t& __count)
00155     {
00156       using _Codecvt = codecvt<_CharT, char, _State>;
00157       using _ConvFn
00158         = codecvt_base::result
00159           (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
00160                         char*, char*, char*&) const;
00161       _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
00162       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
00163                               __count, __fn);
00164     }
00165 
00166   // As above, but with no __count parameter
00167   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00168     inline bool
00169     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00170                       basic_string<char, _Traits, _Alloc>& __outstr,
00171                       const codecvt<_CharT, char, _State>& __cvt)
00172     {
00173       _State __state = {};
00174       size_t __n;
00175       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
00176     }
00177 
00178   // As above, but returns false for partial conversions
00179   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00180     inline bool
00181     __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
00182                           basic_string<char, _Traits, _Alloc>& __outstr,
00183                           const codecvt<_CharT, char, _State>& __cvt)
00184     {
00185       _State __state = {};
00186       size_t __n;
00187       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
00188         && (__n == (__last - __first));
00189     }
00190 
00191 #ifdef _GLIBCXX_USE_CHAR8_T
00192 
00193   // Convert wide character string to narrow.
00194   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00195     inline bool
00196     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00197                       basic_string<char8_t, _Traits, _Alloc>& __outstr,
00198                       const codecvt<_CharT, char8_t, _State>& __cvt,
00199                       _State& __state, size_t& __count)
00200     {
00201       using _Codecvt = codecvt<_CharT, char8_t, _State>;
00202       using _ConvFn
00203         = codecvt_base::result
00204           (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
00205                         char8_t*, char8_t*, char8_t*&) const;
00206       _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out;
00207       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
00208                               __count, __fn);
00209     }
00210 
00211   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
00212     inline bool
00213     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
00214                       basic_string<char8_t, _Traits, _Alloc>& __outstr,
00215                       const codecvt<_CharT, char8_t, _State>& __cvt)
00216     {
00217       _State __state = {};
00218       size_t __n;
00219       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
00220     }
00221 
00222 #endif  // _GLIBCXX_USE_CHAR8_T
00223 
00224 #ifdef _GLIBCXX_USE_WCHAR_T
00225 
00226 _GLIBCXX_BEGIN_NAMESPACE_CXX11
00227 
00228   /// String conversions
00229   template<typename _Codecvt, typename _Elem = wchar_t,
00230            typename _Wide_alloc = allocator<_Elem>,
00231            typename _Byte_alloc = allocator<char>>
00232     class wstring_convert
00233     {
00234     public:
00235       typedef basic_string<char, char_traits<char>, _Byte_alloc>   byte_string;
00236       typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
00237       typedef typename _Codecvt::state_type                        state_type;
00238       typedef typename wide_string::traits_type::int_type          int_type;
00239 
00240       /// Default constructor.
00241       wstring_convert() : _M_cvt(new _Codecvt()) { }
00242 
00243       /** Constructor.
00244        *
00245        * @param  __pcvt The facet to use for conversions.
00246        *
00247        * Takes ownership of @p __pcvt and will delete it in the destructor.
00248        */
00249       explicit
00250       wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt)
00251       {
00252         if (!_M_cvt)
00253           __throw_logic_error("wstring_convert");
00254       }
00255 
00256       /** Construct with an initial converstion state.
00257        *
00258        * @param  __pcvt The facet to use for conversions.
00259        * @param  __state Initial conversion state.
00260        *
00261        * Takes ownership of @p __pcvt and will delete it in the destructor.
00262        * The object's conversion state will persist between conversions.
00263        */
00264       wstring_convert(_Codecvt* __pcvt, state_type __state)
00265       : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
00266       {
00267         if (!_M_cvt)
00268           __throw_logic_error("wstring_convert");
00269       }
00270 
00271       /** Construct with error strings.
00272        *
00273        * @param  __byte_err A string to return on failed conversions.
00274        * @param  __wide_err A wide string to return on failed conversions.
00275        */
00276       explicit
00277       wstring_convert(const byte_string& __byte_err,
00278                       const wide_string& __wide_err = wide_string())
00279       : _M_cvt(new _Codecvt),
00280         _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
00281         _M_with_strings(true)
00282       {
00283         if (!_M_cvt)
00284           __throw_logic_error("wstring_convert");
00285       }
00286 
00287       ~wstring_convert() = default;
00288 
00289       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00290       // 2176. Special members for wstring_convert and wbuffer_convert
00291       wstring_convert(const wstring_convert&) = delete;
00292       wstring_convert& operator=(const wstring_convert&) = delete;
00293 
00294       /// @{ Convert from bytes.
00295       wide_string
00296       from_bytes(char __byte)
00297       {
00298         char __bytes[2] = { __byte };
00299         return from_bytes(__bytes, __bytes+1);
00300       }
00301 
00302       wide_string
00303       from_bytes(const char* __ptr)
00304       { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
00305 
00306       wide_string
00307       from_bytes(const byte_string& __str)
00308       {
00309         auto __ptr = __str.data();
00310         return from_bytes(__ptr, __ptr + __str.size());
00311       }
00312 
00313       wide_string
00314       from_bytes(const char* __first, const char* __last)
00315       {
00316         if (!_M_with_cvtstate)
00317           _M_state = state_type();
00318         wide_string __out{ _M_wide_err_string.get_allocator() };
00319         if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
00320                              _M_count))
00321           return __out;
00322         if (_M_with_strings)
00323           return _M_wide_err_string;
00324         __throw_range_error("wstring_convert::from_bytes");
00325       }
00326       /// @}
00327 
00328       /// @{ Convert to bytes.
00329       byte_string
00330       to_bytes(_Elem __wchar)
00331       {
00332         _Elem __wchars[2] = { __wchar };
00333         return to_bytes(__wchars, __wchars+1);
00334       }
00335 
00336       byte_string
00337       to_bytes(const _Elem* __ptr)
00338       {
00339         return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
00340       }
00341 
00342       byte_string
00343       to_bytes(const wide_string& __wstr)
00344       {
00345         auto __ptr = __wstr.data();
00346         return to_bytes(__ptr, __ptr + __wstr.size());
00347       }
00348 
00349       byte_string
00350       to_bytes(const _Elem* __first, const _Elem* __last)
00351       {
00352         if (!_M_with_cvtstate)
00353           _M_state = state_type();
00354         byte_string __out{ _M_byte_err_string.get_allocator() };
00355         if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
00356                               _M_count))
00357           return __out;
00358         if (_M_with_strings)
00359           return _M_byte_err_string;
00360         __throw_range_error("wstring_convert::to_bytes");
00361       }
00362       /// @}
00363 
00364       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00365       // 2174. wstring_convert::converted() should be noexcept
00366       /// The number of elements successfully converted in the last conversion.
00367       size_t converted() const noexcept { return _M_count; }
00368 
00369       /// The final conversion state of the last conversion.
00370       state_type state() const { return _M_state; }
00371 
00372     private:
00373       unique_ptr<_Codecvt>      _M_cvt;
00374       byte_string               _M_byte_err_string;
00375       wide_string               _M_wide_err_string;
00376       state_type                _M_state = state_type();
00377       size_t                    _M_count = 0;
00378       bool                      _M_with_cvtstate = false;
00379       bool                      _M_with_strings = false;
00380     };
00381 
00382 _GLIBCXX_END_NAMESPACE_CXX11
00383 
00384   /// Buffer conversions
00385   template<typename _Codecvt, typename _Elem = wchar_t,
00386            typename _Tr = char_traits<_Elem>>
00387     class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
00388     {
00389       typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
00390 
00391     public:
00392       typedef typename _Codecvt::state_type state_type;
00393 
00394       /// Default constructor.
00395       wbuffer_convert() : wbuffer_convert(nullptr) { }
00396 
00397       /** Constructor.
00398        *
00399        * @param  __bytebuf The underlying byte stream buffer.
00400        * @param  __pcvt    The facet to use for conversions.
00401        * @param  __state   Initial conversion state.
00402        *
00403        * Takes ownership of @p __pcvt and will delete it in the destructor.
00404        */
00405       explicit
00406       wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
00407                       state_type __state = state_type())
00408       : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
00409       {
00410         if (!_M_cvt)
00411           __throw_logic_error("wbuffer_convert");
00412 
00413         _M_always_noconv = _M_cvt->always_noconv();
00414 
00415         if (_M_buf)
00416           {
00417             this->setp(_M_put_area, _M_put_area + _S_buffer_length);
00418             this->setg(_M_get_area + _S_putback_length,
00419                        _M_get_area + _S_putback_length,
00420                        _M_get_area + _S_putback_length);
00421           }
00422       }
00423 
00424       ~wbuffer_convert() = default;
00425 
00426       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00427       // 2176. Special members for wstring_convert and wbuffer_convert
00428       wbuffer_convert(const wbuffer_convert&) = delete;
00429       wbuffer_convert& operator=(const wbuffer_convert&) = delete;
00430 
00431       streambuf* rdbuf() const noexcept { return _M_buf; }
00432 
00433       streambuf*
00434       rdbuf(streambuf *__bytebuf) noexcept
00435       {
00436         auto __prev = _M_buf;
00437         _M_buf = __bytebuf;
00438         return __prev;
00439       }
00440 
00441       /// The conversion state following the last conversion.
00442       state_type state() const noexcept { return _M_state; }
00443 
00444     protected:
00445       int
00446       sync()
00447       { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
00448 
00449       typename _Wide_streambuf::int_type
00450       overflow(typename _Wide_streambuf::int_type __out)
00451       {
00452         if (!_M_buf || !_M_conv_put())
00453           return _Tr::eof();
00454         else if (!_Tr::eq_int_type(__out, _Tr::eof()))
00455           return this->sputc(__out);
00456         return _Tr::not_eof(__out);
00457       }
00458 
00459       typename _Wide_streambuf::int_type
00460       underflow()
00461       {
00462         if (!_M_buf)
00463           return _Tr::eof();
00464 
00465         if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
00466           return _Tr::to_int_type(*this->gptr());
00467         else
00468           return _Tr::eof();
00469       }
00470 
00471       streamsize
00472       xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
00473       {
00474         if (!_M_buf || __n == 0)
00475           return 0;
00476         streamsize __done = 0;
00477         do
00478         {
00479           auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
00480                                            __n - __done);
00481           _Tr::copy(this->pptr(), __s + __done, __nn);
00482           this->pbump(__nn);
00483           __done += __nn;
00484         } while (__done < __n && _M_conv_put());
00485         return __done;
00486       }
00487 
00488     private:
00489       // fill the get area from converted contents of the byte stream buffer
00490       bool
00491       _M_conv_get()
00492       {
00493         const streamsize __pb1 = this->gptr() - this->eback();
00494         const streamsize __pb2 = _S_putback_length;
00495         const streamsize __npb = std::min(__pb1, __pb2);
00496 
00497         _Tr::move(_M_get_area + _S_putback_length - __npb,
00498                   this->gptr() - __npb, __npb);
00499 
00500         streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
00501         __nbytes = std::min(__nbytes, _M_buf->in_avail());
00502         if (__nbytes < 1)
00503           __nbytes = 1;
00504         __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
00505         if (__nbytes < 1)
00506           return false;
00507         __nbytes += _M_unconv;
00508 
00509         // convert _M_get_buf into _M_get_area
00510 
00511         _Elem* __outbuf = _M_get_area + _S_putback_length;
00512         _Elem* __outnext = __outbuf;
00513         const char* __bnext = _M_get_buf;
00514 
00515         codecvt_base::result __result;
00516         if (_M_always_noconv)
00517           __result = codecvt_base::noconv;
00518         else
00519           {
00520             _Elem* __outend = _M_get_area + _S_buffer_length;
00521 
00522             __result = _M_cvt->in(_M_state,
00523                                   __bnext, __bnext + __nbytes, __bnext,
00524                                   __outbuf, __outend, __outnext);
00525           }
00526 
00527         if (__result == codecvt_base::noconv)
00528           {
00529             // cast is safe because noconv means _Elem is same type as char
00530             auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
00531             _Tr::copy(__outbuf, __get_buf, __nbytes);
00532             _M_unconv = 0;
00533             return true;
00534           }
00535 
00536         if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
00537           char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
00538 
00539         this->setg(__outbuf, __outbuf, __outnext);
00540 
00541         return __result != codecvt_base::error;
00542       }
00543 
00544       // unused
00545       bool
00546       _M_put(...)
00547       { return false; }
00548 
00549       bool
00550       _M_put(const char* __p, streamsize __n)
00551       {
00552         if (_M_buf->sputn(__p, __n) < __n)
00553           return false;
00554         return true;
00555       }
00556 
00557       // convert the put area and write to the byte stream buffer
00558       bool
00559       _M_conv_put()
00560       {
00561         _Elem* const __first = this->pbase();
00562         const _Elem* const __last = this->pptr();
00563         const streamsize __pending = __last - __first;
00564 
00565         if (_M_always_noconv)
00566           return _M_put(__first, __pending);
00567 
00568         char __outbuf[2 * _S_buffer_length];
00569 
00570         const _Elem* __next = __first;
00571         const _Elem* __start;
00572         do
00573           {
00574             __start = __next;
00575             char* __outnext = __outbuf;
00576             char* const __outlast = __outbuf + sizeof(__outbuf);
00577             auto __result = _M_cvt->out(_M_state, __next, __last, __next,
00578                                         __outnext, __outlast, __outnext);
00579             if (__result == codecvt_base::error)
00580               return false;
00581             else if (__result == codecvt_base::noconv)
00582               return _M_put(__next, __pending);
00583 
00584             if (!_M_put(__outbuf, __outnext - __outbuf))
00585               return false;
00586           }
00587         while (__next != __last && __next != __start);
00588 
00589         if (__next != __last)
00590           _Tr::move(__first, __next, __last - __next);
00591 
00592         this->pbump(__first - __next);
00593         return __next != __first;
00594       }
00595 
00596       streambuf*                _M_buf;
00597       unique_ptr<_Codecvt>      _M_cvt;
00598       state_type                _M_state;
00599 
00600       static const streamsize   _S_buffer_length = 32;
00601       static const streamsize   _S_putback_length = 3;
00602       _Elem                     _M_put_area[_S_buffer_length];
00603       _Elem                     _M_get_area[_S_buffer_length];
00604       streamsize                _M_unconv = 0;
00605       char                      _M_get_buf[_S_buffer_length-_S_putback_length];
00606       bool                      _M_always_noconv;
00607     };
00608 
00609 #endif  // _GLIBCXX_USE_WCHAR_T
00610 
00611   /// @} group locales
00612 
00613 _GLIBCXX_END_NAMESPACE_VERSION
00614 } // namespace
00615 
00616 #endif // __cplusplus
00617 
00618 #endif /* _LOCALE_CONV_H */