|
Qore Programming Language
0.8.3
|
00001 /* -*- mode: c++; indent-tabs-mode: nil -*- */ 00002 /* 00003 QoreEncoding.h 00004 00005 Qore Programming Language 00006 00007 Copyright 2003 - 2011 David Nichols 00008 00009 This library is free software; you can redistribute it and/or 00010 modify it under the terms of the GNU Lesser General Public 00011 License as published by the Free Software Foundation; either 00012 version 2.1 of the License, or (at your option) any later version. 00013 00014 This library is distributed in the hope that it will be useful, 00015 but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 Lesser General Public License for more details. 00018 00019 You should have received a copy of the GNU Lesser General Public 00020 License along with this library; if not, write to the Free Software 00021 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00022 */ 00023 00024 #ifndef _QORE_CHARSET_H 00025 00026 #define _QORE_CHARSET_H 00027 00033 #include <qore/common.h> 00034 #include <qore/QoreThreadLock.h> 00035 00036 #include <strings.h> 00037 #include <string.h> 00038 00039 #include <map> 00040 00041 #include <string> 00042 00044 typedef qore_size_t (*mbcs_length_t)(const char *str, const char *end, bool &invalid); 00045 00047 typedef qore_size_t (*mbcs_end_t)(const char *str, const char *end, qore_size_t num_chars, bool &invalid); 00048 00050 typedef qore_size_t (*mbcs_pos_t)(const char *str, const char *ptr, bool &invalid); 00051 00053 00057 typedef qore_size_t (*mbcs_charlen_t)(const char *str, qore_size_t valid_len); 00058 00059 class ExceptionSink; 00060 00062 00072 class QoreEncoding { 00073 private: 00074 std::string code; 00075 std::string desc; 00076 mbcs_length_t flength; 00077 mbcs_end_t fend; 00078 mbcs_pos_t fpos; 00079 mbcs_charlen_t fcharlen; 00080 unsigned char maxwidth; 00081 00082 public: 00083 DLLLOCAL QoreEncoding(const char *n_code, const char *n_desc = 0, unsigned char n_maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t c = 0) : code(n_code), desc(n_desc ? n_desc : ""), flength(l), fend(e), fpos(p), fcharlen(c), maxwidth(n_maxwidth) { 00084 } 00085 00086 DLLLOCAL ~QoreEncoding() { 00087 } 00088 00090 00095 DLLLOCAL qore_size_t getLength(const char *p, const char *end, bool &invalid) const { 00096 return flength ? flength(p, end, invalid) : strlen(p); 00097 } 00098 00100 00105 DLLLOCAL qore_size_t getLength(const char *p, const char *end, ExceptionSink *xsink) const; 00106 00108 00114 DLLLOCAL qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, bool &invalid) const { 00115 return fend ? fend(p, end, c, invalid) : c; 00116 } 00117 00119 00125 DLLLOCAL qore_size_t getByteLen(const char *p, const char *end, qore_size_t c, ExceptionSink *xsink) const; 00126 00128 00133 DLLLOCAL qore_size_t getCharPos(const char *p, const char *end, bool &invalid) const { 00134 return fpos ? fpos(p, end, invalid) : end - p; 00135 } 00136 00138 00143 DLLLOCAL qore_size_t getCharPos(const char *p, const char *end, ExceptionSink *xsink) const; 00144 00146 00151 DLLLOCAL qore_size_t getCharLen(const char *p, qore_size_t valid_len) const { 00152 return fcharlen ? fcharlen(p, valid_len) : 1; 00153 } 00154 00156 DLLLOCAL bool isMultiByte() const { 00157 return (bool)flength; 00158 } 00159 00161 DLLLOCAL const char *getCode() const { 00162 return code.c_str(); 00163 } 00164 00166 DLLLOCAL const char *getDesc() const { 00167 return desc.empty() ? "<no description available>" : desc.c_str(); 00168 } 00169 00171 DLLLOCAL int getMaxCharWidth() const { 00172 return maxwidth; 00173 } 00174 }; 00175 00176 // case-insensitive maps for encodings 00177 typedef std::map<const char *, QoreEncoding *, class ltcstrcase> encoding_map_t; 00178 typedef std::map<const char *, const QoreEncoding *, class ltcstrcase> const_encoding_map_t; 00179 00180 class QoreString; 00181 00183 00185 class QoreEncodingManager 00186 { 00187 private: 00188 DLLLOCAL static encoding_map_t emap; 00189 DLLLOCAL static const_encoding_map_t amap; 00190 DLLLOCAL static class QoreThreadLock mutex; 00191 00192 DLLLOCAL static const QoreEncoding *addUnlocked(const char *code, const char *desc, unsigned char maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t = 0); 00193 DLLLOCAL static const QoreEncoding *findUnlocked(const char *name); 00194 00195 public: 00197 DLLEXPORT static void addAlias(const QoreEncoding *qcs, const char *alias); 00198 00200 DLLEXPORT static const QoreEncoding *findCreate(const char *name); 00201 00203 DLLEXPORT static const QoreEncoding *findCreate(const QoreString *str); 00204 00206 DLLEXPORT static void showEncodings(); 00207 00209 DLLEXPORT static void showAliases(); 00210 00212 DLLEXPORT static const QoreEncoding *add(const char *code, const char *desc = 0, unsigned char maxwidth = 1, mbcs_length_t l = 0, mbcs_end_t e = 0, mbcs_pos_t p = 0, mbcs_charlen_t = 0); 00213 00214 DLLLOCAL static void init(const char *def); 00215 DLLLOCAL QoreEncodingManager(); 00216 DLLLOCAL ~QoreEncodingManager(); 00217 }; 00218 00220 DLLEXPORT extern QoreEncodingManager QEM; 00221 00222 // builtin character encodings 00223 DLLEXPORT extern const QoreEncoding *QCS_DEFAULT, 00224 *QCS_USASCII, 00225 *QCS_UTF8, 00226 *QCS_ISO_8859_1, 00227 *QCS_ISO_8859_2, 00228 *QCS_ISO_8859_3, 00229 *QCS_ISO_8859_4, 00230 *QCS_ISO_8859_5, 00231 *QCS_ISO_8859_6, 00232 *QCS_ISO_8859_7, 00233 *QCS_ISO_8859_8, 00234 *QCS_ISO_8859_9, 00235 *QCS_ISO_8859_10, 00236 *QCS_ISO_8859_11, 00237 *QCS_ISO_8859_13, 00238 *QCS_ISO_8859_14, 00239 *QCS_ISO_8859_15, 00240 *QCS_ISO_8859_16, 00241 *QCS_KOI8_R, 00242 *QCS_KOI8_U, 00243 *QCS_KOI7; 00244 00245 #endif // _QORE_CHARSET_H