Qore Programming Language  0.8.3
include/qore/QoreEncoding.h (4311)
Go to the documentation of this file.
00001 /* -*- mode: c++; indent-tabs-mode: nil -*- */
00002 /*
00003   QoreEncoding.h
00004 
00005   Qore Programming Language
00006 
00007   Copyright 2003 - 2011 David Nichols
00008 
00009   This library is free software; you can redistribute it and/or
00010   modify it under the terms of the GNU Lesser General Public
00011   License as published by the Free Software Foundation; either
00012   version 2.1 of the License, or (at your option) any later version.
00013 
00014   This library is distributed in the hope that it will be useful,
00015   but WITHOUT ANY WARRANTY; without even the implied warranty of
00016   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017   Lesser General Public License for more details.
00018 
00019   You should have received a copy of the GNU Lesser General Public
00020   License along with this library; if not, write to the Free Software
00021   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00022 */
00023 
00024 #ifndef _QORE_CHARSET_H
00025 
00026 #define _QORE_CHARSET_H
00027 
00033 #include <qore/common.h>
00034 #include <qore/QoreThreadLock.h>
00035 
00036 #include <strings.h>
00037 #include <string.h>
00038 
00039 #include <map>
00040 
00041 #include <string>
00042 
00044 typedef qore_size_t (*mbcs_length_t)(const char *str, const char *end, bool &invalid);
00045 
00047 typedef qore_size_t (*mbcs_end_t)(const char *str, const char *end, qore_size_t num_chars, bool &invalid);
00048 
00050 typedef qore_size_t (*mbcs_pos_t)(const char *str, const char *ptr, bool &invalid);
00051 
00053 
00057 typedef qore_size_t (*mbcs_charlen_t)(const char *str, qore_size_t valid_len);
00058 
00059 class ExceptionSink;
00060 
00062 
00072 class QoreEncoding {
00073 private:
00074       std::string code;
00075       std::string desc;
00076       mbcs_length_t flength;
00077       mbcs_end_t fend;
00078       mbcs_pos_t fpos;
00079       mbcs_charlen_t fcharlen;
00080       unsigned char maxwidth;
00081 
00082 public:
00083       DLLLOCAL QoreEncoding(const char *n_code, const char *n_desc = 0, unsigned char n_maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t c = 0) : code(n_code), desc(n_desc ? n_desc : ""), flength(l), fend(e), fpos(p), fcharlen(c), maxwidth(n_maxwidth) {
00084       }
00085 
00086       DLLLOCAL ~QoreEncoding() {
00087       }
00088 
00090 
00095       DLLLOCAL qore_size_t getLength(const char *p, const char *end, bool &invalid) const {
00096          return flength ? flength(p, end, invalid) : strlen(p);
00097       }
00098 
00100 
00105       DLLLOCAL qore_size_t getLength(const char *p, const char *end, ExceptionSink *xsink) const;
00106 
00108 
00114       DLLLOCAL qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, bool &invalid) const {
00115          return fend ? fend(p, end, c, invalid) : c;
00116       }
00117 
00119 
00125       DLLLOCAL qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, ExceptionSink *xsink) const;
00126 
00128 
00133       DLLLOCAL qore_size_t getCharPos(const char *p, const char *end, bool &invalid) const {
00134          return fpos ? fpos(p, end, invalid) : end - p;
00135       }
00136 
00138 
00143       DLLLOCAL qore_size_t getCharPos(const char *p, const char *end, ExceptionSink *xsink) const;
00144 
00146 
00151       DLLLOCAL qore_size_t getCharLen(const char *p, qore_size_t valid_len) const {
00152          return fcharlen ? fcharlen(p, valid_len) : 1;
00153       }
00154       
00156       DLLLOCAL bool isMultiByte() const {
00157          return (bool)flength;
00158       }
00159 
00161       DLLLOCAL const char *getCode() const {
00162          return code.c_str();
00163       }
00164 
00166       DLLLOCAL const char *getDesc() const {
00167          return desc.empty() ? "<no description available>" : desc.c_str();
00168       }
00169 
00171       DLLLOCAL int getMaxCharWidth() const {
00172           return maxwidth;
00173       }
00174 };
00175 
00176 // case-insensitive maps for encodings
00177 typedef std::map<const char *, QoreEncoding *, class ltcstrcase> encoding_map_t;
00178 typedef std::map<const char *, const QoreEncoding *, class ltcstrcase> const_encoding_map_t;
00179 
00180 class QoreString;
00181 
00183 
00185 class QoreEncodingManager
00186 {
00187    private:
00188       DLLLOCAL static encoding_map_t emap;
00189       DLLLOCAL static const_encoding_map_t amap;
00190       DLLLOCAL static class QoreThreadLock mutex;
00191    
00192       DLLLOCAL static const QoreEncoding *addUnlocked(const char *code, const char *desc, unsigned char maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t = 0);
00193       DLLLOCAL static const QoreEncoding *findUnlocked(const char *name);
00194 
00195    public:
00197       DLLEXPORT static void addAlias(const QoreEncoding *qcs, const char *alias);
00198 
00200       DLLEXPORT static const QoreEncoding *findCreate(const char *name);
00201 
00203       DLLEXPORT static const QoreEncoding *findCreate(const QoreString *str);
00204 
00206       DLLEXPORT static void showEncodings();
00207 
00209       DLLEXPORT static void showAliases();
00210 
00212       DLLEXPORT static const QoreEncoding *add(const char *code, const char *desc = 0, unsigned char maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t = 0);
00213 
00214       DLLLOCAL static void init(const char *def);
00215       DLLLOCAL QoreEncodingManager();
00216       DLLLOCAL ~QoreEncodingManager();
00217 };
00218 
00220 DLLEXPORT extern QoreEncodingManager QEM;
00221 
00222 // builtin character encodings
00223 DLLEXPORT extern const QoreEncoding *QCS_DEFAULT, 
00224    *QCS_USASCII,                                  
00225    *QCS_UTF8,                                     
00226    *QCS_ISO_8859_1,                               
00227    *QCS_ISO_8859_2,                               
00228    *QCS_ISO_8859_3,                               
00229    *QCS_ISO_8859_4,                               
00230    *QCS_ISO_8859_5,                               
00231    *QCS_ISO_8859_6,                               
00232    *QCS_ISO_8859_7,                               
00233    *QCS_ISO_8859_8,                               
00234    *QCS_ISO_8859_9,                               
00235    *QCS_ISO_8859_10,                              
00236    *QCS_ISO_8859_11,                              
00237    *QCS_ISO_8859_13,                              
00238    *QCS_ISO_8859_14,                              
00239    *QCS_ISO_8859_15,                              
00240    *QCS_ISO_8859_16,                              
00241    *QCS_KOI8_R,                                   
00242    *QCS_KOI8_U,                                   
00243    *QCS_KOI7;                                     
00244 
00245 #endif // _QORE_CHARSET_H
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Defines