Adobe.com
Contents Suites Classes Class Index Member Index

IAIUnicodeString.h

Go to the documentation of this file.
00001 /*
00002  *        Name: IAIUnicodeString.h
00003  *   $Revision: 1 $
00004  *      Author:
00005  *        Date:
00006  *     Purpose: Interface to the wrapper class for AIUnicodeStringSuite & the
00007  *                              primary interface for the AI core to
00008  *                              the ai::UnicodeString objects.
00009  *
00010  * ADOBE SYSTEMS INCORPORATED
00011  * Copyright 2004-2015 Adobe Systems Incorporated.
00012  * All rights reserved.
00013  *
00014  * NOTICE:  Adobe permits you to use, modify, and distribute this file 
00015  * in accordance with the terms of the Adobe license agreement 
00016  * accompanying it. If you have received this file from a source other 
00017  * than Adobe, then your use, modification, or distribution of it 
00018  * requires the prior written permission of Adobe.
00019  *
00020  */
00021 
00022 
00023 #ifndef _IAIUNICODESTRING_H_
00024 #define _IAIUNICODESTRING_H_
00025 
00026 #include "AITypes.h"
00027 #include "AICharacterEncoding.h"
00028 #include "IAIAutoBuffer.h"
00029 
00030 #include <string>
00031 
00032 #if defined(MAC_ENV)
00033         #import <CoreFoundation/CFString.h>
00034     #if defined _UTF16_BASIC_STRING_EXPORT_H_
00035         #include _UTF16_BASIC_STRING_EXPORT_H_
00036     #endif
00037 #endif // defined(MAC_ENV)
00038 
00039 
00042 class CAIUnicodeStringImpl;
00043 
00047 #define kUnicodeStringBadIndex                          'US!I'
00048 
00051 #define kUnicodeStringLengthError                       'US#L'
00052 
00055 #define kUnicodeStringMalformedError            'US!F'
00056 
00057 
00058 namespace ai {
00059 
00061 class const_PStr
00062 {
00063 public:
00067         explicit const_PStr(const unsigned char* pascalString) : fConstStr(pascalString) {};
00069         virtual ~const_PStr() {}
00073         const_PStr(const const_PStr& p) : fConstStr(p.fConstStr) {};
00076         const unsigned char* get() const
00077         { return fConstStr; }
00080         const unsigned char& operator[] (size_t i) const
00081         { return fConstStr[i]; }
00082 protected:
00083         const unsigned char* fConstStr;
00084         const_PStr& operator=(const const_PStr&);
00085 
00086 };
00087 
00089 class PStr : public const_PStr
00090 {
00091 public:
00095         explicit PStr(unsigned char* pascalString) : const_PStr(pascalString) {};
00099         PStr(const PStr& p) : const_PStr(p) {};
00102         unsigned char* get() const
00103         { return const_cast<unsigned char*>(fConstStr); }
00106         unsigned char& operator[] (size_t i) const
00107         { return get()[i]; }
00109         //operator const const_PStr& ()
00110         //{ return *this; }
00111 };
00112 
00113 
00114 #if defined(WIN_ENV)
00115 
00116 class UnicodeString;
00117 
00127 class WCHARStr 
00128 {
00129 public:
00131         typedef wchar_t WCHAR;
00133         typedef const WCHAR* LPCWSTR;
00135         typedef WCHAR* LPWSTR;
00136 
00137 public:
00141         WCHARStr () : fConstStr() {}
00146         WCHARStr (const ai::UnicodeString& string);
00147 
00148 #if defined(_NATIVE_WCHAR_T_DEFINED)
00149         // This ctor is redundant if wchar_t is not an intrinsic.  It cannot be distinguished from the const ASUnicode* ctor. 
00150 
00156         WCHARStr (LPCWSTR wcharString);
00157 #endif // defined(_NATIVE_WCHAR_T_DEFINED)
00158 
00164         WCHARStr (const ASUnicode* string);
00165 
00169         WCHARStr (const WCHARStr& p) : fConstStr(p.fConstStr) {}
00170 
00172         virtual ~WCHARStr() {}
00173 
00174 
00176         WCHARStr& operator= (const WCHARStr& rhs)
00177         {
00178                 fConstStr = rhs.fConstStr;
00179                 return *this;
00180         }
00181 
00186         LPCWSTR as_LPCWSTR () const
00187         {
00188                 return fConstStr.c_str();
00189         }
00190         
00195         const ASUnicode* as_ASUnicode () const;
00196 
00200         size_t length() const;
00201 
00202         /* Convenience operators */
00203 #if defined(_NATIVE_WCHAR_T_DEFINED)
00204         // This operator is redundant if wchar_t is not an intrinsic.  It cannot be distinguished from the const ASUnicode* operator. 
00205         operator LPCWSTR () const 
00206         {
00207                 return as_LPCWSTR();
00208         }
00209 #endif //defined(_NATIVE_WCHAR_T_DEFINED)
00210 
00211         operator const ASUnicode* () const
00212         {
00213                 return as_ASUnicode();
00214         }
00215 
00216 protected:
00217         std::basic_string<WCHAR> fConstStr;
00218 };
00219 
00220 #endif /* WIN_ENV */
00221 
00231 class UnicodeString {
00232 public:
00234         typedef ai::sizediff_t offset_type;
00236         typedef size_t size_type;
00238         static const size_type npos;
00240         static const ai::UnicodeString kEmptyString;
00242         typedef ASUInt32 UTF32TextChar;
00244         typedef ASUnicode UTF16Char;
00245 
00248         enum NormalizedForm {
00250                 kForm_NFD               = 0,
00252                 kForm_NFC,
00254                 kForm_NFKD,
00256                 kForm_NFKC,
00257 
00259                 kForm_DummyValue = 0xFFFFFFFF
00260         };
00261         
00263         class Collator;
00264 
00265 public:
00266         //----------------------------------------------------------------------
00268         //----------------------------------------------------------------------
00270 
00272         explicit UnicodeString (void) AINOTHROW;
00279         explicit UnicodeString (const char* string, offset_type srcByteLen,
00280                 AICharacterEncoding encoding = kAIPlatformCharacterEncoding);
00281 
00286         explicit UnicodeString (size_type count, UTF32TextChar ch);
00287 
00292         explicit UnicodeString (const char* string, AICharacterEncoding encoding = kAIPlatformCharacterEncoding);
00293 
00298         explicit UnicodeString (const std::string& string, AICharacterEncoding encoding = kAIPlatformCharacterEncoding);
00299 
00304         explicit UnicodeString (const ASUnicode* string);
00305 
00309         explicit UnicodeString (const ZRef zStringKey);
00310 
00316         explicit UnicodeString (const ASUnicode* string, size_type srcUTF16Count);
00317 
00322         explicit UnicodeString (const std::basic_string<ASUnicode>& string);
00323 
00327         UnicodeString (const UnicodeString& s);
00328 
00330         ~UnicodeString (void);
00332 
00333         //----------------------------------------------------------------------
00335         //----------------------------------------------------------------------
00337 
00343         static UnicodeString FromRoman (const char* string, size_type count);
00344 
00350         static UnicodeString FromRoman (const char* string);
00351 
00357         static UnicodeString FromRoman (const std::string& string);
00358 
00364         static UnicodeString FromRoman (const const_PStr& pascalString);
00365 
00372         static UnicodeString FromPlatform (const char* string, size_type count);
00373 
00379         static UnicodeString FromPlatform (const char* string);
00380 
00386         static UnicodeString FromPlatform (const std::string& string);
00387 
00393         static UnicodeString FromPlatform (const const_PStr&  pascalString);
00394 
00400         static UnicodeString FromUTF8 (const char* string);
00401 
00407         static UnicodeString FromUTF8 (const std::string& string);
00408 
00414         static UnicodeString FromUTF8 (const const_PStr&  pascalString);
00415 
00417 
00418         /* Basic operations (as in std::basic_string) */
00419 
00424         UnicodeString& append (const UnicodeString& str);
00425 
00432         UnicodeString& append (const UnicodeString& str, size_type startOffset,
00433                 size_type count);
00434 
00440         UnicodeString& append (size_type count, UTF32TextChar ch)
00441         { return append(UnicodeString(count, ch)); }
00442 
00447         UnicodeString& assign (const UnicodeString& str);
00448 
00455         UnicodeString& assign (const UnicodeString& str, size_type offset,
00456                 size_type count)
00457         { return assign(str.substr(offset, count)); }
00458 
00464         UTF32TextChar at (size_type offset) const;
00465 
00471         void clear ();
00472 
00482         ai::int32 compare (const UnicodeString& str) const;
00483 
00495         ai::int32 compare (size_type pos, size_type num, const UnicodeString& str) const;
00496 
00510         ai::int32 compare (size_type pos, size_type num, const UnicodeString& str,
00511                 size_type startOffset, size_type count) const;
00512 
00516         size_type length () const;
00517 
00521         bool empty () const;
00522 
00528         UnicodeString&  erase (size_type pos=0, size_type count = npos);
00529 
00537         size_type find (UTF32TextChar ch, size_type startOffset = 0 ) const;
00538 
00547         size_type find (const UnicodeString& target, size_type startOffset = 0) const;
00548 
00557         size_type find (const UnicodeString& target, size_type startOffset, size_type count) const;
00558 
00565         size_type caseFind (UTF32TextChar ch, size_type startOffset = 0 ) const
00566         { return caseFind(ai::UnicodeString(1, ch), startOffset, 1); }
00567 
00575         size_type caseFind (const UnicodeString& target, size_type startOffset = 0) const
00576         { return caseFind(target, startOffset, target.length()); }
00577 
00586         size_type caseFind (const UnicodeString& target, size_type startOffset, size_type count) const;
00587 
00595         size_type rfind (UTF32TextChar ch, size_type startOffset = npos ) const;
00596 
00605         size_type rfind (const UnicodeString& target, size_type startOffset = npos) const;
00606 
00616         size_type rfind (const UnicodeString& target, size_type startOffset, size_type count) const;
00617 
00625         size_type find_first_of (const UnicodeString& target, size_type startOffset = 0) const
00626         { return find_first_of(target, startOffset, npos); }
00627 
00636         size_type find_first_of (const UnicodeString& target, size_type startOffset, size_type count) const;
00637 
00645         size_type find_last_of (const UnicodeString& target, size_type startOffset = npos) const
00646         { return find_last_of(target, startOffset, npos); }
00647 
00656         size_type find_last_of (const UnicodeString& target, size_type startOffset, size_type count) const;
00657 
00665         size_type find_first_not_of (const UnicodeString& target, size_type startOffset = 0) const
00666         { return find_first_not_of(target, startOffset, npos); }
00667 
00676         size_type find_first_not_of (const UnicodeString& target, size_type startOffset, size_type count) const;
00677 
00685         size_type find_last_not_of (const UnicodeString& target, size_type startOffset = npos) const
00686         { return find_last_not_of (target, startOffset, npos); }
00687 
00696         size_type find_last_not_of (const UnicodeString& target, size_type startOffset, size_type count) const;
00697 
00704         UnicodeString& replace (size_type pos, size_type num, const UnicodeString& str)
00705         { return replace(pos, num, str, 0, npos); }
00706 
00714         UnicodeString& replace (size_type pos, size_type num, const UnicodeString& str, size_type count)
00715         { return replace(pos, num, str, 0, count); }
00716 
00725         UnicodeString& replace (size_type pos, size_type num, const UnicodeString& str,
00726                 size_type startOffset, size_type count);
00727 
00734         UnicodeString& insert (size_type insertOffset, const UnicodeString& str)
00735         { return insert(insertOffset, str, 0, npos); }
00736 
00746         UnicodeString& insert (size_type insertOffset, const UnicodeString& str,
00747                 size_type offset, size_type count);
00748 
00756         UnicodeString& insert (size_type insertOffset, size_type count,
00757                 UTF32TextChar ch)
00758         { return insert(insertOffset, UnicodeString(count, ch), 0, count); }
00759 
00764         void push_back(UTF32TextChar ch)
00765         { (void) append(1, ch); }
00766 
00773         void resize (size_type count, UTF32TextChar ch = UTF32TextChar());
00774 
00779         ai::UnicodeString::size_type size (void) const
00780         { return length(); }
00781 
00788         UnicodeString substr (size_type offset = 0, size_type count = npos) const;
00789 
00794         void swap (UnicodeString& str);
00795 
00796         /* Operators */
00797 
00802         UnicodeString& operator= (const UnicodeString& rhs);
00803 
00808         UnicodeString& operator+= (UTF32TextChar ch)
00809         { return append(1, ch); }
00810 
00815         UnicodeString& operator+= (const UnicodeString& rhs)
00816         { return append(rhs); }
00817 
00827         UTF32TextChar operator[] (size_type offset) const;
00828 
00834         bool operator== (const UnicodeString& rhs) const
00835         { return compare(rhs) == 0; }
00836 
00842         bool operator!= (const UnicodeString& rhs) const
00843         { return !(operator==(rhs)); }
00844 
00850         bool operator< (const UnicodeString& rhs) const
00851         { return compare(rhs) < 0; }
00852 
00853         /* non-std::basic_string based functionality */
00854 
00859         UnicodeString& toLower();
00860 
00866         UnicodeString& toUpper();
00867 
00878         ai::int32 caseCompare (const UnicodeString& str) const
00879         { return caseCompare(0, npos, str, 0, npos); }
00880 
00893         ai::int32 caseCompare (size_type pos, size_type num, const UnicodeString& str) const
00894         { return caseCompare(pos, num, str, 0, npos); }
00895 
00912         ai::int32 caseCompare (size_type pos, size_type num, const UnicodeString& str,
00913                 size_type startOffset, size_type count) const;
00914 
00930         ai::int32 canonicalCompare (const UnicodeString& str) const;
00931 
00947         ai::int32 canonicalCaseCompare (const UnicodeString& str) const;
00948 
00956         UnicodeString& normalize (NormalizedForm form);
00957 
00961         bool hasSurrogates () const;
00962 
00973         size_type utf_16 (const UTF16Char*& buffer) const;
00974 
00979         std::basic_string<ASUnicode> as_ASUnicode ( ) const;
00980 
00984         std::string as_UTF8 ( ) const;
00985 
00988         std::string as_Platform () const;
00989 
00992         std::string as_Roman () const;
00993 
01003         size_type getToBuffer ( char* buffer, size_type bufferMax, AICharacterEncoding encoding ) const;
01004 
01013         size_type getToBuffer ( const PStr&  pascalString, size_type bufferMax, AICharacterEncoding encoding ) const;
01014 
01023         size_type as_ASUnicode (ASUnicode* buffer, size_type bufferMax ) const;
01024 
01032         size_type as_Platform ( const PStr&  pascalString, size_type bufferMax ) const;
01033 
01041         size_type as_Platform ( char* buffer, size_type bufferMax ) const;
01042 
01050         size_type as_Roman ( char* buffer, size_type bufferMax ) const;
01051 
01059         size_type as_Roman ( const PStr&  pascalString, size_type bufferMax ) const;
01060 
01066         std::string getInStdString (AICharacterEncoding encoding) const;
01067 
01074         size_type getAs (AICharacterEncoding encoding, ai::AutoBuffer<char>& b) const;
01075 
01076 #if defined(MAC_ENV)
01077 
01078         //----------------------------------------------------------------------
01080         //----------------------------------------------------------------------
01082 
01085         explicit UnicodeString (const CFStringRef& cfString);
01086 
01092         CFStringRef as_CFString (CFAllocatorRef alloc) const;
01094 #endif // defined(MAC_ENV)
01095         
01096 #if defined(WIN_ENV)
01097 
01098         //----------------------------------------------------------------------
01100         //----------------------------------------------------------------------
01102 
01105         explicit UnicodeString (const WCHARStr& string);
01106 
01110         WCHARStr as_WCHARStr () const;
01111 
01120         size_type as_WCHARStr (WCHARStr::LPWSTR buffer, size_type bufferMax ) const;
01121 
01123 #endif // defined(WIN_ENV)
01124 
01125 
01126 public: // internal use public interface
01127         void deleteImpl();
01128 
01129 protected:
01130         explicit UnicodeString(class CAIUnicodeStringImpl* impl);
01131 
01132 private:
01133         CAIUnicodeStringImpl* fImpl;
01134 };
01135 
01137 // Inline implementations  - Yes, some of these could cause code bloat.  
01138 // These could be moved to the IAIUnicodeString.inl file.
01139 //
01140 
01142 // UnicodeString inlines
01144 
01145 inline UnicodeString& UnicodeString::insert (size_type insertOffset, const UnicodeString& str,
01146                 size_type offset, size_type count)
01147 {
01148         if ( insertOffset > length() || offset > str.length() )
01149                 throw ai::Error(kUnicodeStringBadIndex);
01150         UnicodeString result = substr(0, insertOffset);
01151         result.append(str, offset, count);
01152         result.append(substr(insertOffset));
01153 
01154         *this = result;
01155         return *this;
01156 }
01157 
01158 inline UnicodeString& UnicodeString::replace (size_type pos, size_type num, const UnicodeString& str,
01159                                                 size_type startOffset, size_type count)
01160 {
01161         if ( pos > length() || startOffset > str.length() )
01162                 throw ai::Error(kUnicodeStringBadIndex);
01163         erase(pos, num);
01164         insert(pos, str, startOffset, count);
01165 
01166         return *this;
01167 }
01168 
01169 inline std::string UnicodeString::as_UTF8 ( ) const
01170 {
01171         return getInStdString(kAIUTF8CharacterEncoding);
01172 }
01173 
01174 inline std::string UnicodeString::as_Platform () const
01175 {
01176         return getInStdString(kAIPlatformCharacterEncoding);
01177 }
01178 
01179 inline std::string UnicodeString::as_Roman () const
01180 {
01181         return getInStdString(kAIRomanCharacterEncoding);
01182 }
01183 
01184 inline UnicodeString::size_type UnicodeString::as_ASUnicode ( ASUnicode* buffer, size_type bufferMax ) const
01185 {
01186         const UTF16Char* bufPtr = 0;
01187         const size_type kThisUTF16Len = utf_16(bufPtr) + 1;
01188         const size_type kCopyMax = (bufferMax < kThisUTF16Len ? bufferMax : kThisUTF16Len) - 1;
01189         memcpy(buffer, bufPtr, kCopyMax*sizeof(UTF16Char));
01190         buffer[kCopyMax] = 0;
01191 
01192         return kThisUTF16Len;
01193 }
01194 
01195 #if defined(WIN_ENV)
01196 
01197 inline UnicodeString::size_type UnicodeString::as_WCHARStr ( WCHARStr::LPWSTR buffer, size_type bufferMax ) const
01198 {
01199         AI_STATIC_CHECK(sizeof(WCHARStr::WCHAR) == sizeof(ai::UnicodeString::UTF16Char), WCHAR_size_does_not_match_unsigned_short_size); //-V503
01200         return as_ASUnicode( reinterpret_cast<ASUnicode*>(buffer), bufferMax );
01201 }
01202 
01203 #endif // defined(WIN_ENV)
01204 
01205 inline UnicodeString::size_type UnicodeString::as_Platform ( char* buffer, size_type bufferMax ) const
01206 {
01207         return getToBuffer( buffer, bufferMax, kAIPlatformCharacterEncoding);
01208 }
01209 
01210 inline UnicodeString::size_type UnicodeString::as_Roman ( char* buffer, size_type bufferMax ) const
01211 {
01212         return getToBuffer( buffer, bufferMax, kAIRomanCharacterEncoding);
01213 }
01214 
01215 inline UnicodeString::size_type UnicodeString::as_Platform ( const ai::PStr&  pascalString, size_type bufferMax ) const
01216 {
01217         return getToBuffer( pascalString, bufferMax, kAIPlatformCharacterEncoding );
01218 }
01219 
01220 inline UnicodeString::size_type UnicodeString::as_Roman ( const ai::PStr&  pascalString, size_type bufferMax ) const
01221 {
01222         return getToBuffer( pascalString, bufferMax, kAIRomanCharacterEncoding );
01223 }
01224 
01225 
01226 
01227 #if defined(WIN_ENV)
01228 
01230 // WCHARStr inlines
01232 
01233 inline WCHARStr::WCHARStr (const UnicodeString& string) : fConstStr()
01234 {
01235         AI_STATIC_CHECK(sizeof(WCHAR) == sizeof(ai::UnicodeString::UTF16Char), WCHAR_size_does_not_match_unsigned_short_size); //-V503
01236 
01237         const std::basic_string<ASUnicode>& cInput = string.as_ASUnicode();
01238         const size_t cLen = cInput.length();
01239 
01240         if ( cLen > 0 )
01241                 fConstStr = std::basic_string<WCHAR>(reinterpret_cast<LPCWSTR>(cInput.data()), cLen);
01242 };
01243 
01244 #if defined(_NATIVE_WCHAR_T_DEFINED)
01245         // This ctor is redundant if wchar_t is not an intrinsic.  This is the same as the const ASUnicode* ctor. 
01246 inline WCHARStr::WCHARStr (WCHARStr::LPCWSTR wcharString) : fConstStr( wcharString )
01247 {
01248 }
01249 #endif // defined(_NATIVE_WCHAR_T_DEFINED)
01250 
01251 inline WCHARStr::WCHARStr (const ASUnicode* string)
01252 {
01253         if ( string && *string )
01254         {
01255                 AI_STATIC_CHECK(sizeof(WCHARStr::WCHAR) == sizeof(ai::UnicodeString::UTF16Char), WCHAR_size_does_not_match_unsigned_short_size); //-V503
01256                 fConstStr.assign(reinterpret_cast<LPCWSTR>(string));
01257         }
01258 }
01259 
01260 inline const ASUnicode* WCHARStr::as_ASUnicode () const
01261 {
01262         AI_STATIC_CHECK(sizeof(WCHARStr::WCHAR) == sizeof(ai::UnicodeString::UTF16Char), WCHAR_size_does_not_match_unsigned_short_size); //-V503
01263         return reinterpret_cast<const ASUnicode*>(this->as_LPCWSTR());
01264 }
01265 
01266 inline size_t WCHARStr::length() const
01267 {
01268         return fConstStr.length();
01269 }
01270 
01271 #endif // defined(WIN_ENV)
01272 
01273 } // end of namespace ai
01274 
01275 #endif  // _IAIUNICODESTRING_H_


Contents Suites Classes Class Index Member Index
Adobe Solutions Network
 
Copyright © 2016 Adobe Systems Incorporated. All rights reserved.
Terms of Use Online Privacy Policy Adobe and accessibility Avoid software piracy Permissions and Trademarks