libstdc++
codecvt_specializations.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 //
26 // ISO C++ 14882: 22.2.1.5 Template class codecvt
27 //
28 
29 // Written by Benjamin Kosnik <bkoz@redhat.com>
30 
31 /** @file ext/codecvt_specializations.h
32  * This file is a GNU extension to the Standard C++ Library.
33  */
34 
35 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
36 #define _EXT_CODECVT_SPECIALIZATIONS_H 1
37 
38 #include <bits/c++config.h>
39 #include <locale>
40 #include <iconv.h>
41 
42 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
43 {
44 _GLIBCXX_BEGIN_NAMESPACE_VERSION
45 
46  /// Extension to use iconv for dealing with character encodings.
47  // This includes conversions and comparisons between various character
48  // sets. This object encapsulates data that may need to be shared between
49  // char_traits, codecvt and ctype.
51  {
52  public:
53  // Types:
54  // NB: A conversion descriptor subsumes and enhances the
55  // functionality of a simple state type such as mbstate_t.
56  typedef iconv_t descriptor_type;
57 
58  protected:
59  // Name of internal character set encoding.
60  std::string _M_int_enc;
61 
62  // Name of external character set encoding.
63  std::string _M_ext_enc;
64 
65  // Conversion descriptor between external encoding to internal encoding.
66  descriptor_type _M_in_desc;
67 
68  // Conversion descriptor between internal encoding to external encoding.
69  descriptor_type _M_out_desc;
70 
71  // The byte-order marker for the external encoding, if necessary.
72  int _M_ext_bom;
73 
74  // The byte-order marker for the internal encoding, if necessary.
75  int _M_int_bom;
76 
77  // Number of external bytes needed to construct one complete
78  // character in the internal encoding.
79  // NB: -1 indicates variable, or stateful, encodings.
80  int _M_bytes;
81 
82  public:
83  explicit
85  : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
86  { }
87 
88  explicit
89  encoding_state(const char* __int, const char* __ext,
90  int __ibom = 0, int __ebom = 0, int __bytes = 1)
91  : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
92  _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
93  { init(); }
94 
95  // 21.1.2 traits typedefs
96  // p4
97  // typedef STATE_T state_type
98  // requires: state_type shall meet the requirements of
99  // CopyConstructible types (20.1.3)
100  // NB: This does not preserve the actual state of the conversion
101  // descriptor member, but it does duplicate the encoding
102  // information.
103  encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
104  { construct(__obj); }
105 
106  // Need assignment operator as well.
108  operator=(const encoding_state& __obj)
109  {
110  construct(__obj);
111  return *this;
112  }
113 
114  ~encoding_state()
115  { destroy(); }
116 
117  bool
118  good() const throw()
119  {
120  const descriptor_type __err = (iconv_t)(-1);
121  bool __test = _M_in_desc && _M_in_desc != __err;
122  __test &= _M_out_desc && _M_out_desc != __err;
123  return __test;
124  }
125 
126  int
127  character_ratio() const
128  { return _M_bytes; }
129 
130  const std::string
131  internal_encoding() const
132  { return _M_int_enc; }
133 
134  int
135  internal_bom() const
136  { return _M_int_bom; }
137 
138  const std::string
139  external_encoding() const
140  { return _M_ext_enc; }
141 
142  int
143  external_bom() const
144  { return _M_ext_bom; }
145 
146  const descriptor_type&
147  in_descriptor() const
148  { return _M_in_desc; }
149 
150  const descriptor_type&
151  out_descriptor() const
152  { return _M_out_desc; }
153 
154  protected:
155  void
156  init()
157  {
158  const descriptor_type __err = (iconv_t)(-1);
159  const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
160  if (!_M_in_desc && __have_encodings)
161  {
162  _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
163  if (_M_in_desc == __err)
164  std::__throw_runtime_error(__N("encoding_state::_M_init "
165  "creating iconv input descriptor failed"));
166  }
167  if (!_M_out_desc && __have_encodings)
168  {
169  _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
170  if (_M_out_desc == __err)
171  std::__throw_runtime_error(__N("encoding_state::_M_init "
172  "creating iconv output descriptor failed"));
173  }
174  }
175 
176  void
177  construct(const encoding_state& __obj)
178  {
179  destroy();
180  _M_int_enc = __obj._M_int_enc;
181  _M_ext_enc = __obj._M_ext_enc;
182  _M_ext_bom = __obj._M_ext_bom;
183  _M_int_bom = __obj._M_int_bom;
184  _M_bytes = __obj._M_bytes;
185  init();
186  }
187 
188  void
189  destroy() throw()
190  {
191  const descriptor_type __err = (iconv_t)(-1);
192  if (_M_in_desc && _M_in_desc != __err)
193  {
194  iconv_close(_M_in_desc);
195  _M_in_desc = 0;
196  }
197  if (_M_out_desc && _M_out_desc != __err)
198  {
199  iconv_close(_M_out_desc);
200  _M_out_desc = 0;
201  }
202  }
203  };
204 
205  /// encoding_char_traits
206  // Custom traits type with encoding_state for the state type, and the
207  // associated fpos<encoding_state> for the position type, all other
208  // bits equivalent to the required char_traits instantiations.
209  template<typename _CharT>
210  struct encoding_char_traits : public std::char_traits<_CharT>
211  {
212  typedef encoding_state state_type;
213  typedef typename std::fpos<state_type> pos_type;
214  };
215 
216 _GLIBCXX_END_NAMESPACE_VERSION
217 } // namespace
218 
219 
220 namespace std _GLIBCXX_VISIBILITY(default)
221 {
222 _GLIBCXX_BEGIN_NAMESPACE_VERSION
223 
225 
226  /// codecvt<InternT, _ExternT, encoding_state> specialization.
227  // This partial specialization takes advantage of iconv to provide
228  // code conversions between a large number of character encodings.
229  template<typename _InternT, typename _ExternT>
230  class codecvt<_InternT, _ExternT, encoding_state>
231  : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
232  {
233  public:
234  // Types:
235  typedef codecvt_base::result result;
236  typedef _InternT intern_type;
237  typedef _ExternT extern_type;
238  typedef __gnu_cxx::encoding_state state_type;
239  typedef state_type::descriptor_type descriptor_type;
240 
241  // Data Members:
242  static locale::id id;
243 
244  explicit
245  codecvt(size_t __refs = 0)
247  { }
248 
249  explicit
250  codecvt(state_type& __enc, size_t __refs = 0)
252  { }
253 
254  protected:
255  virtual
256  ~codecvt() { }
257 
258  virtual result
259  do_out(state_type& __state, const intern_type* __from,
260  const intern_type* __from_end, const intern_type*& __from_next,
261  extern_type* __to, extern_type* __to_end,
262  extern_type*& __to_next) const;
263 
264  virtual result
265  do_unshift(state_type& __state, extern_type* __to,
266  extern_type* __to_end, extern_type*& __to_next) const;
267 
268  virtual result
269  do_in(state_type& __state, const extern_type* __from,
270  const extern_type* __from_end, const extern_type*& __from_next,
271  intern_type* __to, intern_type* __to_end,
272  intern_type*& __to_next) const;
273 
274  virtual int
275  do_encoding() const throw();
276 
277  virtual bool
278  do_always_noconv() const throw();
279 
280  virtual int
281  do_length(state_type&, const extern_type* __from,
282  const extern_type* __end, size_t __max) const;
283 
284  virtual int
285  do_max_length() const throw();
286  };
287 
288  template<typename _InternT, typename _ExternT>
289  locale::id
291 
292  // This adaptor works around the signature problems of the second
293  // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2
294  // uses 'char**', which matches the POSIX 1003.1-2001 standard.
295  // Using this adaptor, g++ will do the work for us.
296  template<typename _Tp>
297  inline size_t
298  __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
299  iconv_t __cd, char** __inbuf, size_t* __inbytes,
300  char** __outbuf, size_t* __outbytes)
301  { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
302 
303  template<typename _InternT, typename _ExternT>
304  codecvt_base::result
306  do_out(state_type& __state, const intern_type* __from,
307  const intern_type* __from_end, const intern_type*& __from_next,
308  extern_type* __to, extern_type* __to_end,
309  extern_type*& __to_next) const
310  {
311  result __ret = codecvt_base::error;
312  if (__state.good())
313  {
314  const descriptor_type& __desc = __state.out_descriptor();
315  const size_t __fmultiple = sizeof(intern_type);
316  size_t __fbytes = __fmultiple * (__from_end - __from);
317  const size_t __tmultiple = sizeof(extern_type);
318  size_t __tbytes = __tmultiple * (__to_end - __to);
319 
320  // Argument list for iconv specifies a byte sequence. Thus,
321  // all to/from arrays must be brutally casted to char*.
322  char* __cto = reinterpret_cast<char*>(__to);
323  char* __cfrom;
324  size_t __conv;
325 
326  // Some encodings need a byte order marker as the first item
327  // in the byte stream, to designate endian-ness. The default
328  // value for the byte order marker is NULL, so if this is
329  // the case, it's not necessary and we can just go on our
330  // merry way.
331  int __int_bom = __state.internal_bom();
332  if (__int_bom)
333  {
334  size_t __size = __from_end - __from;
335  intern_type* __cfixed = static_cast<intern_type*>
336  (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
337  __cfixed[0] = static_cast<intern_type>(__int_bom);
338  char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
339  __cfrom = reinterpret_cast<char*>(__cfixed);
340  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
341  &__fbytes, &__cto, &__tbytes);
342  }
343  else
344  {
345  intern_type* __cfixed = const_cast<intern_type*>(__from);
346  __cfrom = reinterpret_cast<char*>(__cfixed);
347  __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
348  &__cto, &__tbytes);
349  }
350 
351  if (__conv != size_t(-1))
352  {
353  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
354  __to_next = reinterpret_cast<extern_type*>(__cto);
355  __ret = codecvt_base::ok;
356  }
357  else
358  {
359  if (__fbytes < __fmultiple * (__from_end - __from))
360  {
361  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
362  __to_next = reinterpret_cast<extern_type*>(__cto);
363  __ret = codecvt_base::partial;
364  }
365  else
366  __ret = codecvt_base::error;
367  }
368  }
369  return __ret;
370  }
371 
372  template<typename _InternT, typename _ExternT>
373  codecvt_base::result
375  do_unshift(state_type& __state, extern_type* __to,
376  extern_type* __to_end, extern_type*& __to_next) const
377  {
378  result __ret = codecvt_base::error;
379  if (__state.good())
380  {
381  const descriptor_type& __desc = __state.in_descriptor();
382  const size_t __tmultiple = sizeof(intern_type);
383  size_t __tlen = __tmultiple * (__to_end - __to);
384 
385  // Argument list for iconv specifies a byte sequence. Thus,
386  // all to/from arrays must be brutally casted to char*.
387  char* __cto = reinterpret_cast<char*>(__to);
388  size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
389  &__cto, &__tlen);
390 
391  if (__conv != size_t(-1))
392  {
393  __to_next = reinterpret_cast<extern_type*>(__cto);
394  if (__tlen == __tmultiple * (__to_end - __to))
395  __ret = codecvt_base::noconv;
396  else if (__tlen == 0)
397  __ret = codecvt_base::ok;
398  else
399  __ret = codecvt_base::partial;
400  }
401  else
402  __ret = codecvt_base::error;
403  }
404  return __ret;
405  }
406 
407  template<typename _InternT, typename _ExternT>
408  codecvt_base::result
409  codecvt<_InternT, _ExternT, encoding_state>::
410  do_in(state_type& __state, const extern_type* __from,
411  const extern_type* __from_end, const extern_type*& __from_next,
412  intern_type* __to, intern_type* __to_end,
413  intern_type*& __to_next) const
414  {
415  result __ret = codecvt_base::error;
416  if (__state.good())
417  {
418  const descriptor_type& __desc = __state.in_descriptor();
419  const size_t __fmultiple = sizeof(extern_type);
420  size_t __flen = __fmultiple * (__from_end - __from);
421  const size_t __tmultiple = sizeof(intern_type);
422  size_t __tlen = __tmultiple * (__to_end - __to);
423 
424  // Argument list for iconv specifies a byte sequence. Thus,
425  // all to/from arrays must be brutally casted to char*.
426  char* __cto = reinterpret_cast<char*>(__to);
427  char* __cfrom;
428  size_t __conv;
429 
430  // Some encodings need a byte order marker as the first item
431  // in the byte stream, to designate endian-ness. The default
432  // value for the byte order marker is NULL, so if this is
433  // the case, it's not necessary and we can just go on our
434  // merry way.
435  int __ext_bom = __state.external_bom();
436  if (__ext_bom)
437  {
438  size_t __size = __from_end - __from;
439  extern_type* __cfixed = static_cast<extern_type*>
440  (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
441  __cfixed[0] = static_cast<extern_type>(__ext_bom);
442  char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
443  __cfrom = reinterpret_cast<char*>(__cfixed);
444  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
445  &__flen, &__cto, &__tlen);
446  }
447  else
448  {
449  extern_type* __cfixed = const_cast<extern_type*>(__from);
450  __cfrom = reinterpret_cast<char*>(__cfixed);
451  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
452  &__flen, &__cto, &__tlen);
453  }
454 
455 
456  if (__conv != size_t(-1))
457  {
458  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
459  __to_next = reinterpret_cast<intern_type*>(__cto);
460  __ret = codecvt_base::ok;
461  }
462  else
463  {
464  if (__flen < static_cast<size_t>(__from_end - __from))
465  {
466  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
467  __to_next = reinterpret_cast<intern_type*>(__cto);
468  __ret = codecvt_base::partial;
469  }
470  else
471  __ret = codecvt_base::error;
472  }
473  }
474  return __ret;
475  }
476 
477  template<typename _InternT, typename _ExternT>
478  int
479  codecvt<_InternT, _ExternT, encoding_state>::
480  do_encoding() const throw()
481  {
482  int __ret = 0;
483  if (sizeof(_ExternT) <= sizeof(_InternT))
484  __ret = sizeof(_InternT) / sizeof(_ExternT);
485  return __ret;
486  }
487 
488  template<typename _InternT, typename _ExternT>
489  bool
490  codecvt<_InternT, _ExternT, encoding_state>::
491  do_always_noconv() const throw()
492  { return false; }
493 
494  template<typename _InternT, typename _ExternT>
495  int
496  codecvt<_InternT, _ExternT, encoding_state>::
497  do_length(state_type&, const extern_type* __from,
498  const extern_type* __end, size_t __max) const
499  { return std::min(__max, static_cast<size_t>(__end - __from)); }
500 
501  // _GLIBCXX_RESOLVE_LIB_DEFECTS
502  // 74. Garbled text for codecvt::do_max_length
503  template<typename _InternT, typename _ExternT>
504  int
505  codecvt<_InternT, _ExternT, encoding_state>::
506  do_max_length() const throw()
507  { return 1; }
508 
509 _GLIBCXX_END_NAMESPACE_VERSION
510 } // namespace
511 
512 #endif
Primary class template codecvt.NB: Generic, mostly useless implementation.
Definition: codecvt.h:276
GNU extensions for public use.
size_type size() const noexcept
Returns the number of characters in the string, not including any null-termination.
Definition: basic_string.h:724
const _CharT * c_str() const noexcept
Return const pointer to null-terminated contents.
Facet ID class.The ID class provides facets with an index used to identify them. Every facet class mu...
ISO C++ entities toplevel namespace is std.
const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:194
Basis for explicit traits specializations.
Definition: char_traits.h:227
Class representing stream positions.
Definition: postypes.h:112
Extension to use iconv for dealing with character encodings.
Common base for codecvt functions.
Definition: codecvt.h:68
virtual result do_out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.