00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef _IAIUNICODESTRING_H_
00024 #define _IAIUNICODESTRING_H_
00025
00026 #include "AITypes.h"
00027 #include "AICharacterEncoding.h"
00028 #include "IAIAutoBuffer.h"
00029
00030 #include <string>
00031
00032 #if defined(MAC_ENV)
00033 #import <CoreFoundation/CFString.h>
00034 #if defined _UTF16_BASIC_STRING_EXPORT_H_
00035 #include _UTF16_BASIC_STRING_EXPORT_H_
00036 #endif
00037 #endif // defined(MAC_ENV)
00038
00039
00042 class CAIUnicodeStringImpl;
00043
00047 #define kUnicodeStringBadIndex 'US!I'
00048
00051 #define kUnicodeStringLengthError 'US#L'
00052
00055 #define kUnicodeStringMalformedError 'US!F'
00056
00057
00058 namespace ai {
00059
00061 class const_PStr
00062 {
00063 public:
00067 explicit const_PStr(const unsigned char* pascalString) : fConstStr(pascalString) {};
00069 virtual ~const_PStr() {}
00073 const_PStr(const const_PStr& p) : fConstStr(p.fConstStr) {};
00076 const unsigned char* get() const
00077 { return fConstStr; }
00080 const unsigned char& operator[] (size_t i) const
00081 { return fConstStr[i]; }
00082 protected:
00083 const unsigned char* fConstStr;
00084 const_PStr& operator=(const const_PStr&);
00085
00086 };
00087
00089 class PStr : public const_PStr
00090 {
00091 public:
00095 explicit PStr(unsigned char* pascalString) : const_PStr(pascalString) {};
00099 PStr(const PStr& p) : const_PStr(p) {};
00102 unsigned char* get() const
00103 { return const_cast<unsigned char*>(fConstStr); }
00106 unsigned char& operator[] (size_t i) const
00107 { return get()[i]; }
00109
00110
00111 };
00112
00113
00114 #if defined(WIN_ENV)
00115
00116 class UnicodeString;
00117
00127 class WCHARStr
00128 {
00129 public:
00131 typedef wchar_t WCHAR;
00133 typedef const WCHAR* LPCWSTR;
00135 typedef WCHAR* LPWSTR;
00136
00137 public:
00141 WCHARStr () : fConstStr() {}
00146 WCHARStr (const ai::UnicodeString& string);
00147
00148 #if defined(_NATIVE_WCHAR_T_DEFINED)
00149
00150
00156 WCHARStr (LPCWSTR wcharString);
00157 #endif // defined(_NATIVE_WCHAR_T_DEFINED)
00158
00164 WCHARStr (const ASUnicode* string);
00165
00169 WCHARStr (const WCHARStr& p) : fConstStr(p.fConstStr) {}
00170
00172 virtual ~WCHARStr() {}
00173
00174
00176 WCHARStr& operator= (const WCHARStr& rhs)
00177 {
00178 fConstStr = rhs.fConstStr;
00179 return *this;
00180 }
00181
00186 LPCWSTR as_LPCWSTR () const
00187 {
00188 return fConstStr.c_str();
00189 }
00190
00195 const ASUnicode* as_ASUnicode () const;
00196
00200 size_t length() const;
00201
00202
00203 #if defined(_NATIVE_WCHAR_T_DEFINED)
00204
00205 operator LPCWSTR () const
00206 {
00207 return as_LPCWSTR();
00208 }
00209 #endif //defined(_NATIVE_WCHAR_T_DEFINED)
00210
00211 operator const ASUnicode* () const
00212 {
00213 return as_ASUnicode();
00214 }
00215
00216 protected:
00217 std::basic_string<WCHAR> fConstStr;
00218 };
00219
00220 #endif
00221
00231 class UnicodeString {
00232 public:
00234 typedef ai::sizediff_t offset_type;
00236 typedef size_t size_type;
00238 static const size_type npos;
00240 static const ai::UnicodeString kEmptyString;
00242 typedef ASUInt32 UTF32TextChar;
00244 typedef ASUnicode UTF16Char;
00245
00248 enum NormalizedForm {
00250 kForm_NFD = 0,
00252 kForm_NFC,
00254 kForm_NFKD,
00256 kForm_NFKC,
00257
00259 kForm_DummyValue = 0xFFFFFFFF
00260 };
00261
00263 class Collator;
00264
00265 public:
00266
00268
00270
00272 explicit UnicodeString (void) AINOTHROW;
00279 explicit UnicodeString (const char* string, offset_type srcByteLen,
00280 AICharacterEncoding encoding = kAIPlatformCharacterEncoding);
00281
00286 explicit UnicodeString (size_type count, UTF32TextChar ch);
00287
00292 explicit UnicodeString (const char* string, AICharacterEncoding encoding = kAIPlatformCharacterEncoding);
00293
00298 explicit UnicodeString (const std::string& string, AICharacterEncoding encoding = kAIPlatformCharacterEncoding);
00299
00304 explicit UnicodeString (const ASUnicode* string);
00305
00309 explicit UnicodeString (const ZRef zStringKey);
00310
00316 explicit UnicodeString (const ASUnicode* string, size_type srcUTF16Count);
00317
00322 explicit UnicodeString (const std::basic_string<ASUnicode>& string);
00323
00327 UnicodeString (const UnicodeString& s);
00328
00330 ~UnicodeString (void);
00332
00333
00335
00337
00343 static UnicodeString FromRoman (const char* string, size_type count);
00344
00350 static UnicodeString FromRoman (const char* string);
00351
00357 static UnicodeString FromRoman (const std::string& string);
00358
00364 static UnicodeString FromRoman (const const_PStr& pascalString);
00365
00372 static UnicodeString FromPlatform (const char* string, size_type count);
00373
00379 static UnicodeString FromPlatform (const char* string);
00380
00386 static UnicodeString FromPlatform (const std::string& string);
00387
00393 static UnicodeString FromPlatform (const const_PStr& pascalString);
00394
00400 static UnicodeString FromUTF8 (const char* string);
00401
00407 static UnicodeString FromUTF8 (const std::string& string);
00408
00414 static UnicodeString FromUTF8 (const const_PStr& pascalString);
00415
00417
00418
00419
00424 UnicodeString& append (const UnicodeString& str);
00425
00432 UnicodeString& append (const UnicodeString& str, size_type startOffset,
00433 size_type count);
00434
00440 UnicodeString& append (size_type count, UTF32TextChar ch)
00441 { return append(UnicodeString(count, ch)); }
00442
00447 UnicodeString& assign (const UnicodeString& str);
00448
00455 UnicodeString& assign (const UnicodeString& str, size_type offset,
00456 size_type count)
00457 { return assign(str.substr(offset, count)); }
00458
00464 UTF32TextChar at (size_type offset) const;
00465
00471 void clear ();
00472
00482 ai::int32 compare (const UnicodeString& str) const;
00483
00495 ai::int32 compare (size_type pos, size_type num, const UnicodeString& str) const;
00496
00510 ai::int32 compare (size_type pos, size_type num, const UnicodeString& str,
00511 size_type startOffset, size_type count) const;
00512
00516 size_type length () const;
00517
00521 bool empty () const;
00522
00528 UnicodeString& erase (size_type pos=0, size_type count = npos);
00529
00537 size_type find (UTF32TextChar ch, size_type startOffset = 0 ) const;
00538
00547 size_type find (const UnicodeString& target, size_type startOffset = 0) const;
00548
00557 size_type find (const UnicodeString& target, size_type startOffset, size_type count) const;
00558
00565 size_type caseFind (UTF32TextChar ch, size_type startOffset = 0 ) const
00566 { return caseFind(ai::UnicodeString(1, ch), startOffset, 1); }
00567
00575 size_type caseFind (const UnicodeString& target, size_type startOffset = 0) const
00576 { return caseFind(target, startOffset, target.length()); }
00577
00586 size_type caseFind (const UnicodeString& target, size_type startOffset, size_type count) const;
00587
00595 size_type rfind (UTF32TextChar ch, size_type startOffset = npos ) const;
00596
00605 size_type rfind (const UnicodeString& target, size_type startOffset = npos) const;
00606
00616 size_type rfind (const UnicodeString& target, size_type startOffset, size_type count) const;
00617
00625 size_type find_first_of (const UnicodeString& target, size_type startOffset = 0) const
00626 { return find_first_of(target, startOffset, npos); }
00627
00636 size_type find_first_of (const UnicodeString& target, size_type startOffset, size_type count) const;
00637
00645 size_type find_last_of (const UnicodeString& target, size_type startOffset = npos) const
00646 { return find_last_of(target, startOffset, npos); }
00647
00656 size_type find_last_of (const UnicodeString& target, size_type startOffset, size_type count) const;
00657
00665 size_type find_first_not_of (const UnicodeString& target, size_type startOffset = 0) const
00666 { return find_first_not_of(target, startOffset, npos); }
00667
00676 size_type find_first_not_of (const UnicodeString& target, size_type startOffset, size_type count) const;
00677
00685 size_type find_last_not_of (const UnicodeString& target, size_type startOffset = npos) const
00686 { return find_last_not_of (target, startOffset, npos); }
00687
00696 size_type find_last_not_of (const UnicodeString& target, size_type startOffset, size_type count) const;
00697
00704 UnicodeString& replace (size_type pos, size_type num, const UnicodeString& str)
00705 { return replace(pos, num, str, 0, npos); }
00706
00714 UnicodeString& replace (size_type pos, size_type num, const UnicodeString& str, size_type count)
00715 { return replace(pos, num, str, 0, count); }
00716
00725 UnicodeString& replace (size_type pos, size_type num, const UnicodeString& str,
00726 size_type startOffset, size_type count);
00727
00734 UnicodeString& insert (size_type insertOffset, const UnicodeString& str)
00735 { return insert(insertOffset, str, 0, npos); }
00736
00746 UnicodeString& insert (size_type insertOffset, const UnicodeString& str,
00747 size_type offset, size_type count);
00748
00756 UnicodeString& insert (size_type insertOffset, size_type count,
00757 UTF32TextChar ch)
00758 { return insert(insertOffset, UnicodeString(count, ch), 0, count); }
00759
00764 void push_back(UTF32TextChar ch)
00765 { (void) append(1, ch); }
00766
00773 void resize (size_type count, UTF32TextChar ch = UTF32TextChar());
00774
00779 ai::UnicodeString::size_type size (void) const
00780 { return length(); }
00781
00788 UnicodeString substr (size_type offset = 0, size_type count = npos) const;
00789
00794 void swap (UnicodeString& str);
00795
00796
00797
00802 UnicodeString& operator= (const UnicodeString& rhs);
00803
00808 UnicodeString& operator+= (UTF32TextChar ch)
00809 { return append(1, ch); }
00810
00815 UnicodeString& operator+= (const UnicodeString& rhs)
00816 { return append(rhs); }
00817
00827 UTF32TextChar operator[] (size_type offset) const;
00828
00834 bool operator== (const UnicodeString& rhs) const
00835 { return compare(rhs) == 0; }
00836
00842 bool operator!= (const UnicodeString& rhs) const
00843 { return !(operator==(rhs)); }
00844
00850 bool operator< (const UnicodeString& rhs) const
00851 { return compare(rhs) < 0; }
00852
00853
00854
00859 UnicodeString& toLower();
00860
00866 UnicodeString& toUpper();
00867
00878 ai::int32 caseCompare (const UnicodeString& str) const
00879 { return caseCompare(0, npos, str, 0, npos); }
00880
00893 ai::int32 caseCompare (size_type pos, size_type num, const UnicodeString& str) const
00894 { return caseCompare(pos, num, str, 0, npos); }
00895
00912 ai::int32 caseCompare (size_type pos, size_type num, const UnicodeString& str,
00913 size_type startOffset, size_type count) const;
00914
00930 ai::int32 canonicalCompare (const UnicodeString& str) const;
00931
00947 ai::int32 canonicalCaseCompare (const UnicodeString& str) const;
00948
00956 UnicodeString& normalize (NormalizedForm form);
00957
00961 bool hasSurrogates () const;
00962
00973 size_type utf_16 (const UTF16Char*& buffer) const;
00974
00979 std::basic_string<ASUnicode> as_ASUnicode ( ) const;
00980
00984 std::string as_UTF8 ( ) const;
00985
00988 std::string as_Platform () const;
00989
00992 std::string as_Roman () const;
00993
01003 size_type getToBuffer ( char* buffer, size_type bufferMax, AICharacterEncoding encoding ) const;
01004
01013 size_type getToBuffer ( const PStr& pascalString, size_type bufferMax, AICharacterEncoding encoding ) const;
01014
01023 size_type as_ASUnicode (ASUnicode* buffer, size_type bufferMax ) const;
01024
01032 size_type as_Platform ( const PStr& pascalString, size_type bufferMax ) const;
01033
01041 size_type as_Platform ( char* buffer, size_type bufferMax ) const;
01042
01050 size_type as_Roman ( char* buffer, size_type bufferMax ) const;
01051
01059 size_type as_Roman ( const PStr& pascalString, size_type bufferMax ) const;
01060
01066 std::string getInStdString (AICharacterEncoding encoding) const;
01067
01074 size_type getAs (AICharacterEncoding encoding, ai::AutoBuffer<char>& b) const;
01075
01076 #if defined(MAC_ENV)
01077
01078
01080
01082
01085 explicit UnicodeString (const CFStringRef& cfString);
01086
01092 CFStringRef as_CFString (CFAllocatorRef alloc) const;
01094 #endif // defined(MAC_ENV)
01095
01096 #if defined(WIN_ENV)
01097
01098
01100
01102
01105 explicit UnicodeString (const WCHARStr& string);
01106
01110 WCHARStr as_WCHARStr () const;
01111
01120 size_type as_WCHARStr (WCHARStr::LPWSTR buffer, size_type bufferMax ) const;
01121
01123 #endif // defined(WIN_ENV)
01124
01125
01126 public:
01127 void deleteImpl();
01128
01129 protected:
01130 explicit UnicodeString(class CAIUnicodeStringImpl* impl);
01131
01132 private:
01133 CAIUnicodeStringImpl* fImpl;
01134 };
01135
01137
01138
01139
01140
01142
01144
01145 inline UnicodeString& UnicodeString::insert (size_type insertOffset, const UnicodeString& str,
01146 size_type offset, size_type count)
01147 {
01148 if ( insertOffset > length() || offset > str.length() )
01149 throw ai::Error(kUnicodeStringBadIndex);
01150 UnicodeString result = substr(0, insertOffset);
01151 result.append(str, offset, count);
01152 result.append(substr(insertOffset));
01153
01154 *this = result;
01155 return *this;
01156 }
01157
01158 inline UnicodeString& UnicodeString::replace (size_type pos, size_type num, const UnicodeString& str,
01159 size_type startOffset, size_type count)
01160 {
01161 if ( pos > length() || startOffset > str.length() )
01162 throw ai::Error(kUnicodeStringBadIndex);
01163 erase(pos, num);
01164 insert(pos, str, startOffset, count);
01165
01166 return *this;
01167 }
01168
01169 inline std::string UnicodeString::as_UTF8 ( ) const
01170 {
01171 return getInStdString(kAIUTF8CharacterEncoding);
01172 }
01173
01174 inline std::string UnicodeString::as_Platform () const
01175 {
01176 return getInStdString(kAIPlatformCharacterEncoding);
01177 }
01178
01179 inline std::string UnicodeString::as_Roman () const
01180 {
01181 return getInStdString(kAIRomanCharacterEncoding);
01182 }
01183
01184 inline UnicodeString::size_type UnicodeString::as_ASUnicode ( ASUnicode* buffer, size_type bufferMax ) const
01185 {
01186 const UTF16Char* bufPtr = 0;
01187 const size_type kThisUTF16Len = utf_16(bufPtr) + 1;
01188 const size_type kCopyMax = (bufferMax < kThisUTF16Len ? bufferMax : kThisUTF16Len) - 1;
01189 memcpy(buffer, bufPtr, kCopyMax*sizeof(UTF16Char));
01190 buffer[kCopyMax] = 0;
01191
01192 return kThisUTF16Len;
01193 }
01194
01195 #if defined(WIN_ENV)
01196
01197 inline UnicodeString::size_type UnicodeString::as_WCHARStr ( WCHARStr::LPWSTR buffer, size_type bufferMax ) const
01198 {
01199 AI_STATIC_CHECK(sizeof(WCHARStr::WCHAR) == sizeof(ai::UnicodeString::UTF16Char), WCHAR_size_does_not_match_unsigned_short_size);
01200 return as_ASUnicode( reinterpret_cast<ASUnicode*>(buffer), bufferMax );
01201 }
01202
01203 #endif // defined(WIN_ENV)
01204
01205 inline UnicodeString::size_type UnicodeString::as_Platform ( char* buffer, size_type bufferMax ) const
01206 {
01207 return getToBuffer( buffer, bufferMax, kAIPlatformCharacterEncoding);
01208 }
01209
01210 inline UnicodeString::size_type UnicodeString::as_Roman ( char* buffer, size_type bufferMax ) const
01211 {
01212 return getToBuffer( buffer, bufferMax, kAIRomanCharacterEncoding);
01213 }
01214
01215 inline UnicodeString::size_type UnicodeString::as_Platform ( const ai::PStr& pascalString, size_type bufferMax ) const
01216 {
01217 return getToBuffer( pascalString, bufferMax, kAIPlatformCharacterEncoding );
01218 }
01219
01220 inline UnicodeString::size_type UnicodeString::as_Roman ( const ai::PStr& pascalString, size_type bufferMax ) const
01221 {
01222 return getToBuffer( pascalString, bufferMax, kAIRomanCharacterEncoding );
01223 }
01224
01225
01226
01227 #if defined(WIN_ENV)
01228
01230
01232
01233 inline WCHARStr::WCHARStr (const UnicodeString& string) : fConstStr()
01234 {
01235 AI_STATIC_CHECK(sizeof(WCHAR) == sizeof(ai::UnicodeString::UTF16Char), WCHAR_size_does_not_match_unsigned_short_size);
01236
01237 const std::basic_string<ASUnicode>& cInput = string.as_ASUnicode();
01238 const size_t cLen = cInput.length();
01239
01240 if ( cLen > 0 )
01241 fConstStr = std::basic_string<WCHAR>(reinterpret_cast<LPCWSTR>(cInput.data()), cLen);
01242 };
01243
01244 #if defined(_NATIVE_WCHAR_T_DEFINED)
01245
01246 inline WCHARStr::WCHARStr (WCHARStr::LPCWSTR wcharString) : fConstStr( wcharString )
01247 {
01248 }
01249 #endif // defined(_NATIVE_WCHAR_T_DEFINED)
01250
01251 inline WCHARStr::WCHARStr (const ASUnicode* string)
01252 {
01253 if ( string && *string )
01254 {
01255 AI_STATIC_CHECK(sizeof(WCHARStr::WCHAR) == sizeof(ai::UnicodeString::UTF16Char), WCHAR_size_does_not_match_unsigned_short_size);
01256 fConstStr.assign(reinterpret_cast<LPCWSTR>(string));
01257 }
01258 }
01259
01260 inline const ASUnicode* WCHARStr::as_ASUnicode () const
01261 {
01262 AI_STATIC_CHECK(sizeof(WCHARStr::WCHAR) == sizeof(ai::UnicodeString::UTF16Char), WCHAR_size_does_not_match_unsigned_short_size);
01263 return reinterpret_cast<const ASUnicode*>(this->as_LPCWSTR());
01264 }
01265
01266 inline size_t WCHARStr::length() const
01267 {
01268 return fConstStr.length();
01269 }
01270
01271 #endif // defined(WIN_ENV)
01272
01273 }
01274
01275 #endif // _IAIUNICODESTRING_H_