libstdc++

locale_conv.h

Go to the documentation of this file.
00001 // wstring_convert implementation -*- C++ -*-
00002 
00003 // Copyright (C) 2012 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file bits/locale_conv.h
00026  *  This is an internal header file, included by other library headers.
00027  *  Do not attempt to use it directly. @headername{locale}
00028  */
00029 
00030 #ifndef _LOCALE_CONV_H
00031 #define _LOCALE_CONV_H 1
00032 
00033 #if __cplusplus < 201103L
00034 # include <bits/c++0x_warning.h>
00035 #else
00036 
00037 #include <streambuf>
00038 #include "stringfwd.h"
00039 #include "allocator.h"
00040 #include "codecvt.h"
00041 #include "unique_ptr.h"
00042 
00043 namespace std _GLIBCXX_VISIBILITY(default)
00044 {
00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00046 
00047 #ifdef _GLIBCXX_USE_WCHAR_T
00048 
00049   /**
00050    * @addtogroup locales
00051    * @{
00052    */
00053 
00054   /// String conversions
00055   template<typename _Codecvt, typename _Elem = wchar_t,
00056            typename _Wide_alloc = allocator<_Elem>,
00057            typename _Byte_alloc = allocator<char>>
00058     class wstring_convert
00059     {
00060     public:
00061       typedef basic_string<char, char_traits<char>, _Byte_alloc>   byte_string;
00062       typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
00063       typedef typename _Codecvt::state_type                        state_type;
00064       typedef typename wide_string::traits_type::int_type          int_type;
00065 
00066       /** Default constructor.
00067        *
00068        * @param  __pcvt The facet to use for conversions.
00069        *
00070        * Takes ownership of @p __pcvt and will delete it in the destructor.
00071        */
00072       explicit
00073       wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt)
00074       {
00075         if (!_M_cvt)
00076           __throw_logic_error("wstring_convert");
00077       }
00078 
00079       /** Construct with an initial converstion state.
00080        *
00081        * @param  __pcvt The facet to use for conversions.
00082        * @param  __state Initial conversion state.
00083        *
00084        * Takes ownership of @p __pcvt and will delete it in the destructor.
00085        * The object's conversion state will persist between conversions.
00086        */
00087       wstring_convert(_Codecvt* __pcvt, state_type __state)
00088       : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
00089       {
00090         if (!_M_cvt)
00091           __throw_logic_error("wstring_convert");
00092       }
00093 
00094       /** Construct with error strings.
00095        *
00096        * @param  __byte_err A string to return on failed conversions.
00097        * @param  __wide_err A wide string to return on failed conversions.
00098        */
00099       explicit
00100       wstring_convert(const byte_string& __byte_err,
00101                       const wide_string& __wide_err = wide_string())
00102       : _M_cvt(new _Codecvt),
00103         _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
00104         _M_with_strings(true)
00105       {
00106         if (!_M_cvt)
00107           __throw_logic_error("wstring_convert");
00108       }
00109 
00110       ~wstring_convert() = default;
00111 
00112       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00113       // 2176. Special members for wstring_convert and wbuffer_convert
00114       wstring_convert(const wstring_convert&) = delete;
00115       wstring_convert& operator=(const wstring_convert&) = delete;
00116 
00117       /// @{ Convert from bytes.
00118       wide_string
00119       from_bytes(char __byte)
00120       {
00121         char __bytes[2] = { __byte };
00122         return from_bytes(__bytes, __bytes+1);
00123       }
00124 
00125       wide_string
00126       from_bytes(const char* __ptr)
00127       { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
00128 
00129       wide_string
00130       from_bytes(const byte_string& __str)
00131       {
00132         auto __ptr = __str.data();
00133         return from_bytes(__ptr, __ptr + __str.size());
00134       }
00135 
00136       wide_string
00137       from_bytes(const char* __first, const char* __last)
00138       {
00139         auto __errstr = _M_with_strings ? &_M_wide_err_string : nullptr;
00140         _ConvFn<char, _Elem> __fn = &_Codecvt::in;
00141         return _M_conv(__first, __last, __errstr, __fn);
00142       }
00143       /// @}
00144 
00145       /// @{ Convert to bytes.
00146       byte_string
00147       to_bytes(_Elem __wchar)
00148       {
00149         _Elem __wchars[2] = { __wchar };
00150         return to_bytes(__wchars, __wchars+1);
00151       }
00152 
00153       byte_string
00154       to_bytes(const _Elem* __ptr)
00155       {
00156         return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
00157       }
00158 
00159       byte_string
00160       to_bytes(const wide_string& __wstr)
00161       {
00162         auto __ptr = __wstr.data();
00163         return to_bytes(__ptr, __ptr + __wstr.size());
00164       }
00165 
00166       byte_string
00167       to_bytes(const _Elem* __first, const _Elem* __last)
00168       {
00169         auto __errstr = _M_with_strings ? &_M_byte_err_string : nullptr;
00170         _ConvFn<_Elem, char> __fn = &_Codecvt::out;
00171         return _M_conv(__first, __last, __errstr, __fn);
00172       }
00173       /// @}
00174 
00175       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00176       // 2174. wstring_convert::converted() should be noexcept
00177       /// The number of elements successfully converted in the last conversion.
00178       size_t converted() const noexcept { return _M_count; }
00179 
00180       /// The final conversion state of the last conversion.
00181       state_type state() const { return _M_state; }
00182 
00183     private:
00184       template<typename _InC, typename _OutC>
00185         using _ConvFn
00186           = codecvt_base::result
00187             (_Codecvt::*)(state_type&, const _InC*, const _InC*, const _InC*&,
00188                           _OutC*, _OutC*, _OutC*&) const;
00189 
00190       template<typename _InChar, typename _OutStr, typename _MemFn>
00191         _OutStr
00192         _M_conv(const _InChar* __first, const _InChar* __last,
00193                 const _OutStr* __err, _MemFn __memfn)
00194         {
00195           if (!_M_with_cvtstate)
00196             _M_state = state_type();
00197 
00198           auto __outstr = __err ? _OutStr(__err->get_allocator()) : _OutStr();
00199           size_t __outchars = 0;
00200           auto __next = __first;
00201           const auto __maxlen = _M_cvt->max_length();
00202 
00203           codecvt_base::result __result;
00204           do
00205             {
00206               __outstr.resize(__outstr.size() + (__last - __next) + __maxlen);
00207               auto __outnext = &__outstr.front() + __outchars;
00208               auto const __outlast = &__outstr.back() + 1;
00209               __result = ((*_M_cvt).*__memfn)(_M_state, __next, __last, __next,
00210                                             __outnext, __outlast, __outnext);
00211               __outchars = __outnext - &__outstr.front();
00212             }
00213           while (__result == codecvt_base::partial && __next != __last
00214                  && (__outstr.size() - __outchars) < __maxlen);
00215 
00216           if (__result == codecvt_base::noconv)
00217             {
00218               __outstr.assign(__first, __last);
00219               _M_count = __outstr.size();
00220               return __outstr;
00221             }
00222 
00223           __outstr.resize(__outchars);
00224           _M_count = __next - __first;
00225 
00226           if (__result != codecvt_base::error)
00227             return __outstr;
00228           else if (__err)
00229             return *__err;
00230           else
00231             __throw_range_error("wstring_convert");
00232         }
00233 
00234       unique_ptr<_Codecvt>      _M_cvt;
00235       byte_string               _M_byte_err_string;
00236       wide_string               _M_wide_err_string;
00237       state_type                _M_state = state_type();
00238       size_t                    _M_count = 0;
00239       bool                      _M_with_cvtstate = false;
00240       bool                      _M_with_strings = false;
00241     };
00242 
00243   /// Buffer conversions
00244   template<typename _Codecvt, typename _Elem = wchar_t,
00245            typename _Tr = char_traits<_Elem>>
00246     class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
00247     {
00248       typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
00249 
00250     public:
00251       typedef typename _Codecvt::state_type state_type;
00252 
00253       /** Default constructor.
00254        *
00255        * @param  __bytebuf The underlying byte stream buffer.
00256        * @param  __pcvt    The facet to use for conversions.
00257        * @param  __state   Initial conversion state.
00258        *
00259        * Takes ownership of @p __pcvt and will delete it in the destructor.
00260        */
00261       explicit
00262       wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt,
00263                       state_type __state = state_type())
00264       : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
00265       {
00266         if (!_M_cvt)
00267           __throw_logic_error("wstring_convert");
00268 
00269         _M_always_noconv = _M_cvt->always_noconv();
00270 
00271         if (_M_buf)
00272           {
00273             this->setp(_M_put_area, _M_put_area + _S_buffer_length);
00274             this->setg(_M_get_area + _S_putback_length,
00275                        _M_get_area + _S_putback_length,
00276                        _M_get_area + _S_putback_length);
00277           }
00278       }
00279 
00280       ~wbuffer_convert() = default;
00281 
00282       // _GLIBCXX_RESOLVE_LIB_DEFECTS
00283       // 2176. Special members for wstring_convert and wbuffer_convert
00284       wbuffer_convert(const wbuffer_convert&) = delete;
00285       wbuffer_convert& operator=(const wbuffer_convert&) = delete;
00286 
00287       streambuf* rdbuf() const noexcept { return _M_buf; }
00288 
00289       streambuf*
00290       rdbuf(streambuf *__bytebuf) noexcept
00291       {
00292         auto __prev = _M_buf;
00293         _M_buf = __bytebuf;
00294         return __prev;
00295       }
00296 
00297       /// The conversion state following the last conversion.
00298       state_type state() const noexcept { return _M_state; }
00299 
00300     protected:
00301       int
00302       sync()
00303       { return _M_buf && _M_conv_put() && _M_buf->pubsync() ? 0 : -1; }
00304 
00305       typename _Wide_streambuf::int_type
00306       overflow(typename _Wide_streambuf::int_type __out)
00307       {
00308         if (!_M_buf || !_M_conv_put())
00309           return _Tr::eof();
00310         else if (!_Tr::eq_int_type(__out, _Tr::eof()))
00311           return this->sputc(__out);
00312         return _Tr::not_eof(__out);
00313       }
00314 
00315       typename _Wide_streambuf::int_type
00316       underflow()
00317       {
00318         if (!_M_buf)
00319           return _Tr::eof();
00320 
00321         if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
00322           return _Tr::to_int_type(*this->gptr());
00323         else
00324           return _Tr::eof();
00325       }
00326 
00327       streamsize
00328       xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
00329       {
00330         if (!_M_buf || __n == 0)
00331           return 0;
00332         streamsize __done = 0;
00333         do
00334         {
00335           auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
00336                                            __n - __done);
00337           _Tr::copy(this->pptr(), __s + __done, __nn);
00338           this->pbump(__nn);
00339           __done += __nn;
00340         } while (__done < __n && _M_conv_put());
00341         return __done;
00342       }
00343 
00344     private:
00345       // fill the get area from converted contents of the byte stream buffer
00346       bool
00347       _M_conv_get()
00348       {
00349         const streamsize __pb1 = this->gptr() - this->eback();
00350         const streamsize __pb2 = _S_putback_length;
00351         const streamsize __npb = std::min(__pb1, __pb2);
00352 
00353         _Tr::move(_M_get_area + _S_putback_length - __npb,
00354                   this->gptr() - __npb, __npb);
00355 
00356         streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
00357         __nbytes = std::min(__nbytes, _M_buf->in_avail());
00358         if (__nbytes < 1)
00359           __nbytes == 1;
00360         __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
00361         if (__nbytes < 1)
00362           return false;
00363         __nbytes += _M_unconv;
00364 
00365         // convert _M_get_buf into _M_get_area
00366 
00367         _Elem* __outbuf = _M_get_area + _S_putback_length;
00368         _Elem* __outnext = __outbuf;
00369         const char* __bnext = _M_get_buf;
00370 
00371         codecvt_base::result __result;
00372         if (_M_always_noconv)
00373           __result = codecvt_base::noconv;
00374         else
00375           {
00376             _Elem* __outend = _M_get_area + _S_buffer_length;
00377 
00378             __result = _M_cvt->in(_M_state,
00379                                   __bnext, __bnext + __nbytes, __bnext,
00380                                   __outbuf, __outend, __outnext);
00381           }
00382 
00383         if (__result == codecvt_base::noconv)
00384           {
00385             // cast is safe because noconv means _Elem is same type as char
00386             auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
00387             _Tr::copy(__outbuf, __get_buf, __nbytes);
00388             _M_unconv = 0;
00389             return true;
00390           }
00391 
00392         if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
00393           char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
00394 
00395         this->setg(__outbuf, __outbuf, __outnext);
00396 
00397         return __result != codecvt_base::error;
00398       }
00399 
00400       // unused
00401       bool
00402       _M_put(...)
00403       { return false; }
00404 
00405       bool
00406       _M_put(const char* __p, streamsize __n)
00407       {
00408         if (_M_buf->sputn(__p, __n) < __n)
00409           return false;
00410       }
00411 
00412       // convert the put area and write to the byte stream buffer
00413       bool
00414       _M_conv_put()
00415       {
00416         _Elem* const __first = this->pbase();
00417         const _Elem* const __last = this->pptr();
00418         const streamsize __pending = __last - __first;
00419 
00420         if (_M_always_noconv)
00421           return _M_put(__first, __pending);
00422 
00423         char __outbuf[2 * _S_buffer_length];
00424 
00425         const _Elem* __next = __first;
00426         const _Elem* __start;
00427         do
00428           {
00429             __start = __next;
00430             char* __outnext = __outbuf;
00431             char* const __outlast = __outbuf + sizeof(__outbuf);
00432             auto __result = _M_cvt->out(_M_state, __next, __last, __next,
00433                                         __outnext, __outlast, __outnext);
00434             if (__result == codecvt_base::error)
00435               return false;
00436             else if (__result == codecvt_base::noconv)
00437               return _M_put(__next, __pending);
00438 
00439             if (!_M_put(__outbuf, __outnext - __outbuf))
00440               return false;
00441           }
00442         while (__next != __last && __next != __start);
00443 
00444         if (__next != __last)
00445           _Tr::move(__first, __next, __last - __next);
00446 
00447         this->pbump(__first - __next);
00448         return __next != __first;
00449       }
00450 
00451       streambuf*                _M_buf;
00452       unique_ptr<_Codecvt>      _M_cvt;
00453       state_type                _M_state;
00454 
00455       static const streamsize   _S_buffer_length = 32;
00456       static const streamsize   _S_putback_length = 3;
00457       _Elem                     _M_put_area[_S_buffer_length];
00458       _Elem                     _M_get_area[_S_buffer_length];
00459       streamsize                _M_unconv = 0;
00460       char                      _M_get_buf[_S_buffer_length-_S_putback_length];
00461       bool                      _M_always_noconv;
00462     };
00463 
00464   /// @} group locales
00465 
00466 #endif  // _GLIBCXX_USE_WCHAR_T
00467 
00468 _GLIBCXX_END_NAMESPACE_VERSION
00469 } // namespace
00470 
00471 #endif // __cplusplus
00472 
00473 #endif /* _LOCALE_CONV_H */