123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891 |
- /*
- Basic Unicode string class for Irrlicht.
- Copyright (c) 2009-2011 John Norman
- This software is provided 'as-is', without any express or implied
- warranty. In no event will the authors be held liable for any
- damages arising from the use of this software.
- Permission is granted to anyone to use this software for any
- purpose, including commercial applications, and to alter it and
- redistribute it freely, subject to the following restrictions:
- 1. The origin of this software must not be misrepresented; you
- must not claim that you wrote the original software. If you use
- this software in a product, an acknowledgment in the product
- documentation would be appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and
- must not be misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source
- distribution.
- The original version of this class can be located at:
- http://irrlicht.suckerfreegames.com/
- John Norman
- john@suckerfreegames.com
- */
- #pragma once
- #if (__cplusplus > 199711L) || (_MSC_VER >= 1600) || defined(__GXX_EXPERIMENTAL_CXX0X__)
- # define USTRING_CPP0X
- # if defined(__GXX_EXPERIMENTAL_CXX0X__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
- # define USTRING_CPP0X_NEWLITERALS
- # endif
- #endif
- #include <stdio.h>
- #include <string.h>
- #include <stdlib.h>
- #include <cstddef>
- #ifdef _WIN32
- #define __BYTE_ORDER 0
- #define __LITTLE_ENDIAN 0
- #define __BIG_ENDIAN 1
- #elif defined(__MACH__) && defined(__APPLE__)
- #include <machine/endian.h>
- #elif defined(__FreeBSD__) || defined(__DragonFly__)
- #include <sys/endian.h>
- #else
- #include <endian.h>
- #endif
- #ifdef USTRING_CPP0X
- # include <utility>
- #endif
- #ifndef USTRING_NO_STL
- # include <string>
- # include <iterator>
- # include <ostream>
- #endif
- #include "irrTypes.h"
- #include "irrAllocator.h"
- #include "irrArray.h"
- #include "irrMath.h"
- #include "irrString.h"
- #include "path.h"
- //! UTF-16 surrogate start values.
- static const irr::u16 UTF16_HI_SURROGATE = 0xD800;
- static const irr::u16 UTF16_LO_SURROGATE = 0xDC00;
- //! Is a UTF-16 code point a surrogate?
- #define UTF16_IS_SURROGATE(c) (((c) & 0xF800) == 0xD800)
- #define UTF16_IS_SURROGATE_HI(c) (((c) & 0xFC00) == 0xD800)
- #define UTF16_IS_SURROGATE_LO(c) (((c) & 0xFC00) == 0xDC00)
- namespace irr
- {
- // Define our character types.
- #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
- typedef char32_t uchar32_t;
- typedef char16_t uchar16_t;
- typedef char uchar8_t;
- #else
- typedef u32 uchar32_t;
- typedef u16 uchar16_t;
- typedef u8 uchar8_t;
- #endif
- namespace core
- {
- namespace unicode
- {
- //! The unicode replacement character. Used to replace invalid characters.
- const irr::u16 UTF_REPLACEMENT_CHARACTER = 0xFFFD;
- //! Convert a UTF-16 surrogate pair into a UTF-32 character.
- //! \param high The high value of the pair.
- //! \param low The low value of the pair.
- //! \return The UTF-32 character expressed by the surrogate pair.
- inline uchar32_t toUTF32(uchar16_t high, uchar16_t low)
- {
- // Convert the surrogate pair into a single UTF-32 character.
- uchar32_t x = ((high & ((1 << 6) -1)) << 10) | (low & ((1 << 10) -1));
- uchar32_t wu = ((high >> 6) & ((1 << 5) - 1)) + 1;
- return (wu << 16) | x;
- }
- //! Swaps the endianness of a 16-bit value.
- //! \return The new value.
- inline uchar16_t swapEndian16(const uchar16_t& c)
- {
- return ((c >> 8) & 0x00FF) | ((c << 8) & 0xFF00);
- }
- //! Swaps the endianness of a 32-bit value.
- //! \return The new value.
- inline uchar32_t swapEndian32(const uchar32_t& c)
- {
- return ((c >> 24) & 0x000000FF) |
- ((c >> 8) & 0x0000FF00) |
- ((c << 8) & 0x00FF0000) |
- ((c << 24) & 0xFF000000);
- }
- //! The Unicode byte order mark.
- const u16 BOM = 0xFEFF;
- //! The size of the Unicode byte order mark in terms of the Unicode character size.
- const u8 BOM_UTF8_LEN = 3;
- const u8 BOM_UTF16_LEN = 1;
- const u8 BOM_UTF32_LEN = 1;
- //! Unicode byte order marks for file operations.
- const u8 BOM_ENCODE_UTF8[3] = { 0xEF, 0xBB, 0xBF };
- const u8 BOM_ENCODE_UTF16_BE[2] = { 0xFE, 0xFF };
- const u8 BOM_ENCODE_UTF16_LE[2] = { 0xFF, 0xFE };
- const u8 BOM_ENCODE_UTF32_BE[4] = { 0x00, 0x00, 0xFE, 0xFF };
- const u8 BOM_ENCODE_UTF32_LE[4] = { 0xFF, 0xFE, 0x00, 0x00 };
- //! The size in bytes of the Unicode byte marks for file operations.
- const u8 BOM_ENCODE_UTF8_LEN = 3;
- const u8 BOM_ENCODE_UTF16_LEN = 2;
- const u8 BOM_ENCODE_UTF32_LEN = 4;
- //! Unicode encoding type.
- enum EUTF_ENCODE
- {
- EUTFE_NONE = 0,
- EUTFE_UTF8,
- EUTFE_UTF16,
- EUTFE_UTF16_LE,
- EUTFE_UTF16_BE,
- EUTFE_UTF32,
- EUTFE_UTF32_LE,
- EUTFE_UTF32_BE
- };
- //! Unicode endianness.
- enum EUTF_ENDIAN
- {
- EUTFEE_NATIVE = 0,
- EUTFEE_LITTLE,
- EUTFEE_BIG
- };
- //! Returns the specified unicode byte order mark in a byte array.
- //! The byte order mark is the first few bytes in a text file that signifies its encoding.
- /** \param mode The Unicode encoding method that we want to get the byte order mark for.
- If EUTFE_UTF16 or EUTFE_UTF32 is passed, it uses the native system endianness. **/
- //! \return An array that contains a byte order mark.
- inline core::array<u8> getUnicodeBOM(EUTF_ENCODE mode)
- {
- #define COPY_ARRAY(source, size) \
- memcpy(ret.pointer(), source, size); \
- ret.set_used(size)
- core::array<u8> ret(4);
- switch (mode)
- {
- case EUTFE_UTF8:
- COPY_ARRAY(BOM_ENCODE_UTF8, BOM_ENCODE_UTF8_LEN);
- break;
- case EUTFE_UTF16:
- #ifdef __BIG_ENDIAN__
- COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
- #else
- COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
- #endif
- break;
- case EUTFE_UTF16_BE:
- COPY_ARRAY(BOM_ENCODE_UTF16_BE, BOM_ENCODE_UTF16_LEN);
- break;
- case EUTFE_UTF16_LE:
- COPY_ARRAY(BOM_ENCODE_UTF16_LE, BOM_ENCODE_UTF16_LEN);
- break;
- case EUTFE_UTF32:
- #ifdef __BIG_ENDIAN__
- COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
- #else
- COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
- #endif
- break;
- case EUTFE_UTF32_BE:
- COPY_ARRAY(BOM_ENCODE_UTF32_BE, BOM_ENCODE_UTF32_LEN);
- break;
- case EUTFE_UTF32_LE:
- COPY_ARRAY(BOM_ENCODE_UTF32_LE, BOM_ENCODE_UTF32_LEN);
- break;
- case EUTFE_NONE:
- // TODO sapier: fixed warning only,
- // don't know if something needs to be done here
- break;
- }
- return ret;
- #undef COPY_ARRAY
- }
- //! Detects if the given data stream starts with a unicode BOM.
- //! \param data The data stream to check.
- //! \return The unicode BOM associated with the data stream, or EUTFE_NONE if none was found.
- inline EUTF_ENCODE determineUnicodeBOM(const char* data)
- {
- if (memcmp(data, BOM_ENCODE_UTF8, 3) == 0) return EUTFE_UTF8;
- if (memcmp(data, BOM_ENCODE_UTF16_BE, 2) == 0) return EUTFE_UTF16_BE;
- if (memcmp(data, BOM_ENCODE_UTF16_LE, 2) == 0) return EUTFE_UTF16_LE;
- if (memcmp(data, BOM_ENCODE_UTF32_BE, 4) == 0) return EUTFE_UTF32_BE;
- if (memcmp(data, BOM_ENCODE_UTF32_LE, 4) == 0) return EUTFE_UTF32_LE;
- return EUTFE_NONE;
- }
- } // end namespace unicode
- //! UTF-16 string class.
- template <typename TAlloc = irrAllocator<uchar16_t> >
- class ustring16
- {
- public:
- ///------------------///
- /// iterator classes ///
- ///------------------///
- //! Access an element in a unicode string, allowing one to change it.
- class _ustring16_iterator_access
- {
- public:
- _ustring16_iterator_access(const ustring16<TAlloc>* s, u32 p) : ref(s), pos(p) {}
- //! Allow the class to be interpreted as a single UTF-32 character.
- operator uchar32_t() const
- {
- return _get();
- }
- //! Allow one to change the character in the unicode string.
- //! \param c The new character to use.
- //! \return Myself.
- _ustring16_iterator_access& operator=(const uchar32_t c)
- {
- _set(c);
- return *this;
- }
- //! Increments the value by 1.
- //! \return Myself.
- _ustring16_iterator_access& operator++()
- {
- _set(_get() + 1);
- return *this;
- }
- //! Increments the value by 1, returning the old value.
- //! \return A unicode character.
- uchar32_t operator++(int)
- {
- uchar32_t old = _get();
- _set(old + 1);
- return old;
- }
- //! Decrements the value by 1.
- //! \return Myself.
- _ustring16_iterator_access& operator--()
- {
- _set(_get() - 1);
- return *this;
- }
- //! Decrements the value by 1, returning the old value.
- //! \return A unicode character.
- uchar32_t operator--(int)
- {
- uchar32_t old = _get();
- _set(old - 1);
- return old;
- }
- //! Adds to the value by a specified amount.
- //! \param val The amount to add to this character.
- //! \return Myself.
- _ustring16_iterator_access& operator+=(int val)
- {
- _set(_get() + val);
- return *this;
- }
- //! Subtracts from the value by a specified amount.
- //! \param val The amount to subtract from this character.
- //! \return Myself.
- _ustring16_iterator_access& operator-=(int val)
- {
- _set(_get() - val);
- return *this;
- }
- //! Multiples the value by a specified amount.
- //! \param val The amount to multiply this character by.
- //! \return Myself.
- _ustring16_iterator_access& operator*=(int val)
- {
- _set(_get() * val);
- return *this;
- }
- //! Divides the value by a specified amount.
- //! \param val The amount to divide this character by.
- //! \return Myself.
- _ustring16_iterator_access& operator/=(int val)
- {
- _set(_get() / val);
- return *this;
- }
- //! Modulos the value by a specified amount.
- //! \param val The amount to modulo this character by.
- //! \return Myself.
- _ustring16_iterator_access& operator%=(int val)
- {
- _set(_get() % val);
- return *this;
- }
- //! Adds to the value by a specified amount.
- //! \param val The amount to add to this character.
- //! \return A unicode character.
- uchar32_t operator+(int val) const
- {
- return _get() + val;
- }
- //! Subtracts from the value by a specified amount.
- //! \param val The amount to subtract from this character.
- //! \return A unicode character.
- uchar32_t operator-(int val) const
- {
- return _get() - val;
- }
- //! Multiplies the value by a specified amount.
- //! \param val The amount to multiply this character by.
- //! \return A unicode character.
- uchar32_t operator*(int val) const
- {
- return _get() * val;
- }
- //! Divides the value by a specified amount.
- //! \param val The amount to divide this character by.
- //! \return A unicode character.
- uchar32_t operator/(int val) const
- {
- return _get() / val;
- }
- //! Modulos the value by a specified amount.
- //! \param val The amount to modulo this character by.
- //! \return A unicode character.
- uchar32_t operator%(int val) const
- {
- return _get() % val;
- }
- private:
- //! Gets a uchar32_t from our current position.
- uchar32_t _get() const
- {
- const uchar16_t* a = ref->c_str();
- if (!UTF16_IS_SURROGATE(a[pos]))
- return static_cast<uchar32_t>(a[pos]);
- else
- {
- if (pos + 1 >= ref->size_raw())
- return 0;
- return unicode::toUTF32(a[pos], a[pos + 1]);
- }
- }
- //! Sets a uchar32_t at our current position.
- void _set(uchar32_t c)
- {
- ustring16<TAlloc>* ref2 = const_cast<ustring16<TAlloc>*>(ref);
- const uchar16_t* a = ref2->c_str();
- if (c > 0xFFFF)
- {
- // c will be multibyte, so split it up into the high and low surrogate pairs.
- uchar16_t x = static_cast<uchar16_t>(c);
- uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
- uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
- // If the previous position was a surrogate pair, just replace them. Else, insert the low pair.
- if (UTF16_IS_SURROGATE_HI(a[pos]) && pos + 1 != ref2->size_raw())
- ref2->replace_raw(vl, static_cast<u32>(pos) + 1);
- else ref2->insert_raw(vl, static_cast<u32>(pos) + 1);
- ref2->replace_raw(vh, static_cast<u32>(pos));
- }
- else
- {
- // c will be a single byte.
- uchar16_t vh = static_cast<uchar16_t>(c);
- // If the previous position was a surrogate pair, remove the extra byte.
- if (UTF16_IS_SURROGATE_HI(a[pos]))
- ref2->erase_raw(static_cast<u32>(pos) + 1);
- ref2->replace_raw(vh, static_cast<u32>(pos));
- }
- }
- const ustring16<TAlloc>* ref;
- u32 pos;
- };
- typedef typename ustring16<TAlloc>::_ustring16_iterator_access access;
- //! Iterator to iterate through a UTF-16 string.
- #ifndef USTRING_NO_STL
- class _ustring16_const_iterator : public std::iterator<
- std::bidirectional_iterator_tag, // iterator_category
- access, // value_type
- ptrdiff_t, // difference_type
- const access, // pointer
- const access // reference
- >
- #else
- class _ustring16_const_iterator
- #endif
- {
- public:
- typedef _ustring16_const_iterator _Iter;
- typedef std::iterator<std::bidirectional_iterator_tag, access, ptrdiff_t, const access, const access> _Base;
- typedef const access const_pointer;
- typedef const access const_reference;
- #ifndef USTRING_NO_STL
- typedef typename _Base::value_type value_type;
- typedef typename _Base::difference_type difference_type;
- typedef typename _Base::difference_type distance_type;
- typedef typename _Base::pointer pointer;
- typedef const_reference reference;
- #else
- typedef access value_type;
- typedef u32 difference_type;
- typedef u32 distance_type;
- typedef const_pointer pointer;
- typedef const_reference reference;
- #endif
- //! Constructors.
- _ustring16_const_iterator(const _Iter& i) : ref(i.ref), pos(i.pos) {}
- _ustring16_const_iterator(const ustring16<TAlloc>& s) : ref(&s), pos(0) {}
- _ustring16_const_iterator(const ustring16<TAlloc>& s, const u32 p) : ref(&s), pos(0)
- {
- if (ref->size_raw() == 0 || p == 0)
- return;
- // Go to the appropriate position.
- u32 i = p;
- u32 sr = ref->size_raw();
- const uchar16_t* a = ref->c_str();
- while (i != 0 && pos < sr)
- {
- if (UTF16_IS_SURROGATE_HI(a[pos]))
- pos += 2;
- else ++pos;
- --i;
- }
- }
- //! Test for equalness.
- bool operator==(const _Iter& iter) const
- {
- if (ref == iter.ref && pos == iter.pos)
- return true;
- return false;
- }
- //! Test for unequalness.
- bool operator!=(const _Iter& iter) const
- {
- if (ref != iter.ref || pos != iter.pos)
- return true;
- return false;
- }
- //! Switch to the next full character in the string.
- _Iter& operator++()
- { // ++iterator
- if (pos == ref->size_raw()) return *this;
- const uchar16_t* a = ref->c_str();
- if (UTF16_IS_SURROGATE_HI(a[pos]))
- pos += 2; // TODO: check for valid low surrogate?
- else ++pos;
- if (pos > ref->size_raw()) pos = ref->size_raw();
- return *this;
- }
- //! Switch to the next full character in the string, returning the previous position.
- _Iter operator++(int)
- { // iterator++
- _Iter _tmp(*this);
- ++*this;
- return _tmp;
- }
- //! Switch to the previous full character in the string.
- _Iter& operator--()
- { // --iterator
- if (pos == 0) return *this;
- const uchar16_t* a = ref->c_str();
- --pos;
- if (UTF16_IS_SURROGATE_LO(a[pos]) && pos != 0) // low surrogate, go back one more.
- --pos;
- return *this;
- }
- //! Switch to the previous full character in the string, returning the previous position.
- _Iter operator--(int)
- { // iterator--
- _Iter _tmp(*this);
- --*this;
- return _tmp;
- }
- //! Advance a specified number of full characters in the string.
- //! \return Myself.
- _Iter& operator+=(const difference_type v)
- {
- if (v == 0) return *this;
- if (v < 0) return operator-=(v * -1);
- if (pos >= ref->size_raw())
- return *this;
- // Go to the appropriate position.
- // TODO: Don't force u32 on an x64 OS. Make it agnostic.
- u32 i = (u32)v;
- u32 sr = ref->size_raw();
- const uchar16_t* a = ref->c_str();
- while (i != 0 && pos < sr)
- {
- if (UTF16_IS_SURROGATE_HI(a[pos]))
- pos += 2;
- else ++pos;
- --i;
- }
- if (pos > sr)
- pos = sr;
- return *this;
- }
- //! Go back a specified number of full characters in the string.
- //! \return Myself.
- _Iter& operator-=(const difference_type v)
- {
- if (v == 0) return *this;
- if (v > 0) return operator+=(v * -1);
- if (pos == 0)
- return *this;
- // Go to the appropriate position.
- // TODO: Don't force u32 on an x64 OS. Make it agnostic.
- u32 i = (u32)v;
- const uchar16_t* a = ref->c_str();
- while (i != 0 && pos != 0)
- {
- --pos;
- if (UTF16_IS_SURROGATE_LO(a[pos]) != 0 && pos != 0)
- --pos;
- --i;
- }
- return *this;
- }
- //! Return a new iterator that is a variable number of full characters forward from the current position.
- _Iter operator+(const difference_type v) const
- {
- _Iter ret(*this);
- ret += v;
- return ret;
- }
- //! Return a new iterator that is a variable number of full characters backward from the current position.
- _Iter operator-(const difference_type v) const
- {
- _Iter ret(*this);
- ret -= v;
- return ret;
- }
- //! Returns the distance between two iterators.
- difference_type operator-(const _Iter& iter) const
- {
- // Make sure we reference the same object!
- if (ref != iter.ref)
- return difference_type();
- _Iter i = iter;
- difference_type ret;
- // Walk up.
- if (pos > i.pos)
- {
- while (pos > i.pos)
- {
- ++i;
- ++ret;
- }
- return ret;
- }
- // Walk down.
- while (pos < i.pos)
- {
- --i;
- --ret;
- }
- return ret;
- }
- //! Accesses the full character at the iterator's position.
- const_reference operator*() const
- {
- if (pos >= ref->size_raw())
- {
- const uchar16_t* a = ref->c_str();
- u32 p = ref->size_raw();
- if (UTF16_IS_SURROGATE_LO(a[p]))
- --p;
- reference ret(ref, p);
- return ret;
- }
- const_reference ret(ref, pos);
- return ret;
- }
- //! Accesses the full character at the iterator's position.
- reference operator*()
- {
- if (pos >= ref->size_raw())
- {
- const uchar16_t* a = ref->c_str();
- u32 p = ref->size_raw();
- if (UTF16_IS_SURROGATE_LO(a[p]))
- --p;
- reference ret(ref, p);
- return ret;
- }
- reference ret(ref, pos);
- return ret;
- }
- //! Accesses the full character at the iterator's position.
- const_pointer operator->() const
- {
- return operator*();
- }
- //! Accesses the full character at the iterator's position.
- pointer operator->()
- {
- return operator*();
- }
- //! Is the iterator at the start of the string?
- bool atStart() const
- {
- return pos == 0;
- }
- //! Is the iterator at the end of the string?
- bool atEnd() const
- {
- const uchar16_t* a = ref->c_str();
- if (UTF16_IS_SURROGATE(a[pos]))
- return (pos + 1) >= ref->size_raw();
- else return pos >= ref->size_raw();
- }
- //! Moves the iterator to the start of the string.
- void toStart()
- {
- pos = 0;
- }
- //! Moves the iterator to the end of the string.
- void toEnd()
- {
- pos = ref->size_raw();
- }
- //! Returns the iterator's position.
- //! \return The iterator's position.
- u32 getPos() const
- {
- return pos;
- }
- protected:
- const ustring16<TAlloc>* ref;
- u32 pos;
- };
- //! Iterator to iterate through a UTF-16 string.
- class _ustring16_iterator : public _ustring16_const_iterator
- {
- public:
- typedef _ustring16_iterator _Iter;
- typedef _ustring16_const_iterator _Base;
- typedef typename _Base::const_pointer const_pointer;
- typedef typename _Base::const_reference const_reference;
- typedef typename _Base::value_type value_type;
- typedef typename _Base::difference_type difference_type;
- typedef typename _Base::distance_type distance_type;
- typedef access pointer;
- typedef access reference;
- using _Base::pos;
- using _Base::ref;
- //! Constructors.
- _ustring16_iterator(const _Iter& i) : _ustring16_const_iterator(i) {}
- _ustring16_iterator(const ustring16<TAlloc>& s) : _ustring16_const_iterator(s) {}
- _ustring16_iterator(const ustring16<TAlloc>& s, const u32 p) : _ustring16_const_iterator(s, p) {}
- //! Accesses the full character at the iterator's position.
- reference operator*() const
- {
- if (pos >= ref->size_raw())
- {
- const uchar16_t* a = ref->c_str();
- u32 p = ref->size_raw();
- if (UTF16_IS_SURROGATE_LO(a[p]))
- --p;
- reference ret(ref, p);
- return ret;
- }
- reference ret(ref, pos);
- return ret;
- }
- //! Accesses the full character at the iterator's position.
- reference operator*()
- {
- if (pos >= ref->size_raw())
- {
- const uchar16_t* a = ref->c_str();
- u32 p = ref->size_raw();
- if (UTF16_IS_SURROGATE_LO(a[p]))
- --p;
- reference ret(ref, p);
- return ret;
- }
- reference ret(ref, pos);
- return ret;
- }
- //! Accesses the full character at the iterator's position.
- pointer operator->() const
- {
- return operator*();
- }
- //! Accesses the full character at the iterator's position.
- pointer operator->()
- {
- return operator*();
- }
- };
- typedef typename ustring16<TAlloc>::_ustring16_iterator iterator;
- typedef typename ustring16<TAlloc>::_ustring16_const_iterator const_iterator;
- ///----------------------///
- /// end iterator classes ///
- ///----------------------///
- //! Default constructor
- ustring16()
- : array(0), allocated(1), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- array = allocator.allocate(1); // new u16[1];
- array[0] = 0x0;
- }
- //! Constructor
- ustring16(const ustring16<TAlloc>& other)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- *this = other;
- }
- //! Constructor from other string types
- template <class B, class A>
- ustring16(const string<B, A>& other)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- *this = other;
- }
- #ifndef USTRING_NO_STL
- //! Constructor from std::string
- template <class B, class A, typename Alloc>
- ustring16(const std::basic_string<B, A, Alloc>& other)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- *this = other.c_str();
- }
- //! Constructor from iterator.
- template <typename Itr>
- ustring16(Itr first, Itr last)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- reserve(std::distance(first, last));
- array[used] = 0;
- for (; first != last; ++first)
- append((uchar32_t)*first);
- }
- #endif
- #ifndef USTRING_CPP0X_NEWLITERALS
- //! Constructor for copying a character string from a pointer.
- ustring16(const char* const c)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- loadDataStream(c, strlen(c));
- //append((uchar8_t*)c);
- }
- //! Constructor for copying a character string from a pointer with a given length.
- ustring16(const char* const c, u32 length)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- loadDataStream(c, length);
- }
- #endif
- //! Constructor for copying a UTF-8 string from a pointer.
- ustring16(const uchar8_t* const c)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- append(c);
- }
- //! Constructor for copying a UTF-8 string from a single char.
- ustring16(const char c)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- append((uchar32_t)c);
- }
- //! Constructor for copying a UTF-8 string from a pointer with a given length.
- ustring16(const uchar8_t* const c, u32 length)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- append(c, length);
- }
- //! Constructor for copying a UTF-16 string from a pointer.
- ustring16(const uchar16_t* const c)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- append(c);
- }
- //! Constructor for copying a UTF-16 string from a pointer with a given length
- ustring16(const uchar16_t* const c, u32 length)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- append(c, length);
- }
- //! Constructor for copying a UTF-32 string from a pointer.
- ustring16(const uchar32_t* const c)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- append(c);
- }
- //! Constructor for copying a UTF-32 from a pointer with a given length.
- ustring16(const uchar32_t* const c, u32 length)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- append(c, length);
- }
- //! Constructor for copying a wchar_t string from a pointer.
- ustring16(const wchar_t* const c)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- if (sizeof(wchar_t) == 4)
- append(reinterpret_cast<const uchar32_t* const>(c));
- else if (sizeof(wchar_t) == 2)
- append(reinterpret_cast<const uchar16_t* const>(c));
- else if (sizeof(wchar_t) == 1)
- append(reinterpret_cast<const uchar8_t* const>(c));
- }
- //! Constructor for copying a wchar_t string from a pointer with a given length.
- ustring16(const wchar_t* const c, u32 length)
- : array(0), allocated(0), used(0)
- {
- #if __BYTE_ORDER == __BIG_ENDIAN
- encoding = unicode::EUTFE_UTF16_BE;
- #else
- encoding = unicode::EUTFE_UTF16_LE;
- #endif
- if (sizeof(wchar_t) == 4)
- append(reinterpret_cast<const uchar32_t* const>(c), length);
- else if (sizeof(wchar_t) == 2)
- append(reinterpret_cast<const uchar16_t* const>(c), length);
- else if (sizeof(wchar_t) == 1)
- append(reinterpret_cast<const uchar8_t* const>(c), length);
- }
- #ifdef USTRING_CPP0X
- //! Constructor for moving a ustring16
- ustring16(ustring16<TAlloc>&& other)
- : array(other.array), encoding(other.encoding), allocated(other.allocated), used(other.used)
- {
- //std::cout << "MOVE constructor" << std::endl;
- other.array = 0;
- other.allocated = 0;
- other.used = 0;
- }
- #endif
- //! Destructor
- ~ustring16()
- {
- allocator.deallocate(array); // delete [] array;
- }
- //! Assignment operator
- ustring16& operator=(const ustring16<TAlloc>& other)
- {
- if (this == &other)
- return *this;
- used = other.size_raw();
- if (used >= allocated)
- {
- allocator.deallocate(array); // delete [] array;
- allocated = used + 1;
- array = allocator.allocate(used + 1); //new u16[used];
- }
- const uchar16_t* p = other.c_str();
- for (u32 i=0; i<=used; ++i, ++p)
- array[i] = *p;
- array[used] = 0;
- // Validate our new UTF-16 string.
- validate();
- return *this;
- }
- #ifdef USTRING_CPP0X
- //! Move assignment operator
- ustring16& operator=(ustring16<TAlloc>&& other)
- {
- if (this != &other)
- {
- //std::cout << "MOVE operator=" << std::endl;
- allocator.deallocate(array);
- array = other.array;
- allocated = other.allocated;
- encoding = other.encoding;
- used = other.used;
- other.array = 0;
- other.used = 0;
- }
- return *this;
- }
- #endif
- //! Assignment operator for other string types
- template <class B, class A>
- ustring16<TAlloc>& operator=(const string<B, A>& other)
- {
- *this = other.c_str();
- return *this;
- }
- //! Assignment operator for UTF-8 strings
- ustring16<TAlloc>& operator=(const uchar8_t* const c)
- {
- if (!array)
- {
- array = allocator.allocate(1); //new u16[1];
- allocated = 1;
- }
- used = 0;
- array[used] = 0x0;
- if (!c) return *this;
- //! Append our string now.
- append(c);
- return *this;
- }
- //! Assignment operator for UTF-16 strings
- ustring16<TAlloc>& operator=(const uchar16_t* const c)
- {
- if (!array)
- {
- array = allocator.allocate(1); //new u16[1];
- allocated = 1;
- }
- used = 0;
- array[used] = 0x0;
- if (!c) return *this;
- //! Append our string now.
- append(c);
- return *this;
- }
- //! Assignment operator for UTF-32 strings
- ustring16<TAlloc>& operator=(const uchar32_t* const c)
- {
- if (!array)
- {
- array = allocator.allocate(1); //new u16[1];
- allocated = 1;
- }
- used = 0;
- array[used] = 0x0;
- if (!c) return *this;
- //! Append our string now.
- append(c);
- return *this;
- }
- //! Assignment operator for wchar_t strings.
- /** Note that this assumes that a correct unicode string is stored in the wchar_t string.
- Since wchar_t changes depending on its platform, it could either be a UTF-8, -16, or -32 string.
- This function assumes you are storing the correct unicode encoding inside the wchar_t string. **/
- ustring16<TAlloc>& operator=(const wchar_t* const c)
- {
- if (sizeof(wchar_t) == 4)
- *this = reinterpret_cast<const uchar32_t* const>(c);
- else if (sizeof(wchar_t) == 2)
- *this = reinterpret_cast<const uchar16_t* const>(c);
- else if (sizeof(wchar_t) == 1)
- *this = reinterpret_cast<const uchar8_t* const>(c);
- return *this;
- }
- //! Assignment operator for other strings.
- /** Note that this assumes that a correct unicode string is stored in the string. **/
- template <class B>
- ustring16<TAlloc>& operator=(const B* const c)
- {
- if (sizeof(B) == 4)
- *this = reinterpret_cast<const uchar32_t* const>(c);
- else if (sizeof(B) == 2)
- *this = reinterpret_cast<const uchar16_t* const>(c);
- else if (sizeof(B) == 1)
- *this = reinterpret_cast<const uchar8_t* const>(c);
- return *this;
- }
- //! Direct access operator
- access operator [](const u32 index)
- {
- _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
- iterator iter(*this, index);
- return iter.operator*();
- }
- //! Direct access operator
- const access operator [](const u32 index) const
- {
- _IRR_DEBUG_BREAK_IF(index>=size()) // bad index
- const_iterator iter(*this, index);
- return iter.operator*();
- }
- //! Equality operator
- bool operator ==(const uchar16_t* const str) const
- {
- if (!str)
- return false;
- u32 i;
- for(i=0; array[i] && str[i]; ++i)
- if (array[i] != str[i])
- return false;
- return !array[i] && !str[i];
- }
- //! Equality operator
- bool operator ==(const ustring16<TAlloc>& other) const
- {
- for(u32 i=0; array[i] && other.array[i]; ++i)
- if (array[i] != other.array[i])
- return false;
- return used == other.used;
- }
- //! Is smaller comparator
- bool operator <(const ustring16<TAlloc>& other) const
- {
- for(u32 i=0; array[i] && other.array[i]; ++i)
- {
- s32 diff = array[i] - other.array[i];
- if ( diff )
- return diff < 0;
- }
- return used < other.used;
- }
- //! Inequality operator
- bool operator !=(const uchar16_t* const str) const
- {
- return !(*this == str);
- }
- //! Inequality operator
- bool operator !=(const ustring16<TAlloc>& other) const
- {
- return !(*this == other);
- }
- //! Returns the length of a ustring16 in full characters.
- //! \return Length of a ustring16 in full characters.
- u32 size() const
- {
- const_iterator i(*this, 0);
- u32 pos = 0;
- while (!i.atEnd())
- {
- ++i;
- ++pos;
- }
- return pos;
- }
- //! Informs if the ustring is empty or not.
- //! \return True if the ustring is empty, false if not.
- bool empty() const
- {
- return (size_raw() == 0);
- }
- //! Returns a pointer to the raw UTF-16 string data.
- //! \return pointer to C-style NUL terminated array of UTF-16 code points.
- const uchar16_t* c_str() const
- {
- return array;
- }
- //! Compares the first n characters of this string with another.
- //! \param other Other string to compare to.
- //! \param n Number of characters to compare.
- //! \return True if the n first characters of both strings are equal.
- bool equalsn(const ustring16<TAlloc>& other, u32 n) const
- {
- u32 i;
- const uchar16_t* oa = other.c_str();
- for(i=0; i < n && array[i] && oa[i]; ++i)
- if (array[i] != oa[i])
- return false;
- // if one (or both) of the strings was smaller then they
- // are only equal if they have the same length
- return (i == n) || (used == other.used);
- }
- //! Compares the first n characters of this string with another.
- //! \param str Other string to compare to.
- //! \param n Number of characters to compare.
- //! \return True if the n first characters of both strings are equal.
- bool equalsn(const uchar16_t* const str, u32 n) const
- {
- if (!str)
- return false;
- u32 i;
- for(i=0; i < n && array[i] && str[i]; ++i)
- if (array[i] != str[i])
- return false;
- // if one (or both) of the strings was smaller then they
- // are only equal if they have the same length
- return (i == n) || (array[i] == 0 && str[i] == 0);
- }
- //! Appends a character to this ustring16
- //! \param character The character to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& append(uchar32_t character)
- {
- if (used + 2 >= allocated)
- reallocate(used + 2);
- if (character > 0xFFFF)
- {
- used += 2;
- // character will be multibyte, so split it up into a surrogate pair.
- uchar16_t x = static_cast<uchar16_t>(character);
- uchar16_t vh = UTF16_HI_SURROGATE | ((((character >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
- uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
- array[used-2] = vh;
- array[used-1] = vl;
- }
- else
- {
- ++used;
- array[used-1] = character;
- }
- array[used] = 0;
- return *this;
- }
- //! Appends a UTF-8 string to this ustring16
- //! \param other The UTF-8 string to append.
- //! \param length The length of the string to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& append(const uchar8_t* const other, u32 length=0xffffffff)
- {
- if (!other)
- return *this;
- // Determine if the string is long enough for a BOM.
- u32 len = 0;
- const uchar8_t* p = other;
- do
- {
- ++len;
- } while (*p++ && len < unicode::BOM_ENCODE_UTF8_LEN);
- // Check for BOM.
- unicode::EUTF_ENCODE c_bom = unicode::EUTFE_NONE;
- if (len == unicode::BOM_ENCODE_UTF8_LEN)
- {
- if (memcmp(other, unicode::BOM_ENCODE_UTF8, unicode::BOM_ENCODE_UTF8_LEN) == 0)
- c_bom = unicode::EUTFE_UTF8;
- }
- // If a BOM was found, don't include it in the string.
- const uchar8_t* c2 = other;
- if (c_bom != unicode::EUTFE_NONE)
- {
- c2 = other + unicode::BOM_UTF8_LEN;
- length -= unicode::BOM_UTF8_LEN;
- }
- // Calculate the size of the string to read in.
- len = 0;
- p = c2;
- do
- {
- ++len;
- } while(*p++ && len < length);
- if (len > length)
- len = length;
- // If we need to grow the array, do it now.
- if (used + len >= allocated)
- reallocate(used + (len * 2));
- u32 start = used;
- // Convert UTF-8 to UTF-16.
- u32 pos = start;
- for (u32 l = 0; l<len;)
- {
- ++used;
- if (((c2[l] >> 6) & 0x03) == 0x02)
- { // Invalid continuation byte.
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- ++l;
- }
- else if (c2[l] == 0xC0 || c2[l] == 0xC1)
- { // Invalid byte - overlong encoding.
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- ++l;
- }
- else if ((c2[l] & 0xF8) == 0xF0)
- { // 4 bytes UTF-8, 2 bytes UTF-16.
- // Check for a full string.
- if ((l + 3) >= len)
- {
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- l += 3;
- break;
- }
- // Validate.
- bool valid = true;
- u8 l2 = 0;
- if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
- if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
- if (valid && (((c2[l+3] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
- if (!valid)
- {
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- l += l2;
- continue;
- }
- // Decode.
- uchar8_t b1 = ((c2[l] & 0x7) << 2) | ((c2[l+1] >> 4) & 0x3);
- uchar8_t b2 = ((c2[l+1] & 0xF) << 4) | ((c2[l+2] >> 2) & 0xF);
- uchar8_t b3 = ((c2[l+2] & 0x3) << 6) | (c2[l+3] & 0x3F);
- uchar32_t v = b3 | ((uchar32_t)b2 << 8) | ((uchar32_t)b1 << 16);
- // Split v up into a surrogate pair.
- uchar16_t x = static_cast<uchar16_t>(v);
- uchar16_t vh = UTF16_HI_SURROGATE | ((((v >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
- uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
- array[pos++] = vh;
- array[pos++] = vl;
- l += 4;
- ++used; // Using two shorts this time, so increase used by 1.
- }
- else if ((c2[l] & 0xF0) == 0xE0)
- { // 3 bytes UTF-8, 1 byte UTF-16.
- // Check for a full string.
- if ((l + 2) >= len)
- {
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- l += 2;
- break;
- }
- // Validate.
- bool valid = true;
- u8 l2 = 0;
- if (valid && (((c2[l+1] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
- if (valid && (((c2[l+2] >> 6) & 0x03) == 0x02)) ++l2; else valid = false;
- if (!valid)
- {
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- l += l2;
- continue;
- }
- // Decode.
- uchar8_t b1 = ((c2[l] & 0xF) << 4) | ((c2[l+1] >> 2) & 0xF);
- uchar8_t b2 = ((c2[l+1] & 0x3) << 6) | (c2[l+2] & 0x3F);
- uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
- array[pos++] = ch;
- l += 3;
- }
- else if ((c2[l] & 0xE0) == 0xC0)
- { // 2 bytes UTF-8, 1 byte UTF-16.
- // Check for a full string.
- if ((l + 1) >= len)
- {
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- l += 1;
- break;
- }
- // Validate.
- if (((c2[l+1] >> 6) & 0x03) != 0x02)
- {
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- ++l;
- continue;
- }
- // Decode.
- uchar8_t b1 = (c2[l] >> 2) & 0x7;
- uchar8_t b2 = ((c2[l] & 0x3) << 6) | (c2[l+1] & 0x3F);
- uchar16_t ch = b2 | ((uchar16_t)b1 << 8);
- array[pos++] = ch;
- l += 2;
- }
- else
- { // 1 byte UTF-8, 1 byte UTF-16.
- // Validate.
- if (c2[l] > 0x7F)
- { // Values above 0xF4 are restricted and aren't used. By now, anything above 0x7F is invalid.
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- }
- else array[pos++] = static_cast<uchar16_t>(c2[l]);
- ++l;
- }
- }
- array[used] = 0;
- // Validate our new UTF-16 string.
- validate();
- return *this;
- }
- //! Appends a UTF-16 string to this ustring16
- //! \param other The UTF-16 string to append.
- //! \param length The length of the string to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& append(const uchar16_t* const other, u32 length=0xffffffff)
- {
- if (!other)
- return *this;
- // Determine if the string is long enough for a BOM.
- u32 len = 0;
- const uchar16_t* p = other;
- do
- {
- ++len;
- } while (*p++ && len < unicode::BOM_ENCODE_UTF16_LEN);
- // Check for the BOM to determine the string's endianness.
- unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
- if (memcmp(other, unicode::BOM_ENCODE_UTF16_LE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
- c_end = unicode::EUTFEE_LITTLE;
- else if (memcmp(other, unicode::BOM_ENCODE_UTF16_BE, unicode::BOM_ENCODE_UTF16_LEN) == 0)
- c_end = unicode::EUTFEE_BIG;
- // If a BOM was found, don't include it in the string.
- const uchar16_t* c2 = other;
- if (c_end != unicode::EUTFEE_NATIVE)
- {
- c2 = other + unicode::BOM_UTF16_LEN;
- length -= unicode::BOM_UTF16_LEN;
- }
- // Calculate the size of the string to read in.
- len = 0;
- p = c2;
- do
- {
- ++len;
- } while(*p++ && len < length);
- if (len > length)
- len = length;
- // If we need to grow the size of the array, do it now.
- if (used + len >= allocated)
- reallocate(used + (len * 2));
- u32 start = used;
- used += len;
- // Copy the string now.
- unicode::EUTF_ENDIAN m_end = getEndianness();
- for (u32 l = start; l < start + len; ++l)
- {
- array[l] = (uchar16_t)c2[l];
- if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
- array[l] = unicode::swapEndian16(array[l]);
- }
- array[used] = 0;
- // Validate our new UTF-16 string.
- validate();
- return *this;
- }
- //! Appends a UTF-32 string to this ustring16
- //! \param other The UTF-32 string to append.
- //! \param length The length of the string to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& append(const uchar32_t* const other, u32 length=0xffffffff)
- {
- if (!other)
- return *this;
- // Check for the BOM to determine the string's endianness.
- unicode::EUTF_ENDIAN c_end = unicode::EUTFEE_NATIVE;
- if (memcmp(other, unicode::BOM_ENCODE_UTF32_LE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
- c_end = unicode::EUTFEE_LITTLE;
- else if (memcmp(other, unicode::BOM_ENCODE_UTF32_BE, unicode::BOM_ENCODE_UTF32_LEN) == 0)
- c_end = unicode::EUTFEE_BIG;
- // If a BOM was found, don't include it in the string.
- const uchar32_t* c2 = other;
- if (c_end != unicode::EUTFEE_NATIVE)
- {
- c2 = other + unicode::BOM_UTF32_LEN;
- length -= unicode::BOM_UTF32_LEN;
- }
- // Calculate the size of the string to read in.
- u32 len = 0;
- const uchar32_t* p = c2;
- do
- {
- ++len;
- } while(*p++ && len < length);
- if (len > length)
- len = length;
- // If we need to grow the size of the array, do it now.
- // In case all of the UTF-32 string is split into surrogate pairs, do len * 2.
- if (used + (len * 2) >= allocated)
- reallocate(used + ((len * 2) * 2));
- u32 start = used;
- // Convert UTF-32 to UTF-16.
- unicode::EUTF_ENDIAN m_end = getEndianness();
- u32 pos = start;
- for (u32 l = 0; l<len; ++l)
- {
- ++used;
- uchar32_t ch = c2[l];
- if (c_end != unicode::EUTFEE_NATIVE && c_end != m_end)
- ch = unicode::swapEndian32(ch);
- if (ch > 0xFFFF)
- {
- // Split ch up into a surrogate pair as it is over 16 bits long.
- uchar16_t x = static_cast<uchar16_t>(ch);
- uchar16_t vh = UTF16_HI_SURROGATE | ((((ch >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
- uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
- array[pos++] = vh;
- array[pos++] = vl;
- ++used; // Using two shorts, so increased used again.
- }
- else if (ch >= 0xD800 && ch <= 0xDFFF)
- {
- // Between possible UTF-16 surrogates (invalid!)
- array[pos++] = unicode::UTF_REPLACEMENT_CHARACTER;
- }
- else array[pos++] = static_cast<uchar16_t>(ch);
- }
- array[used] = 0;
- // Validate our new UTF-16 string.
- validate();
- return *this;
- }
- //! Appends a ustring16 to this ustring16
- //! \param other The string to append to this one.
- //! \return A reference to our current string.
- ustring16<TAlloc>& append(const ustring16<TAlloc>& other)
- {
- const uchar16_t* oa = other.c_str();
- u32 len = other.size_raw();
- if (used + len >= allocated)
- reallocate(used + len);
- for (u32 l=0; l<len; ++l)
- array[used+l] = oa[l];
- used += len;
- array[used] = 0;
- return *this;
- }
- //! Appends a certain amount of characters of a ustring16 to this ustring16.
- //! \param other The string to append to this one.
- //! \param length How many characters of the other string to add to this one.
- //! \return A reference to our current string.
- ustring16<TAlloc>& append(const ustring16<TAlloc>& other, u32 length)
- {
- if (other.size() == 0)
- return *this;
- if (other.size() < length)
- {
- append(other);
- return *this;
- }
- if (used + length * 2 >= allocated)
- reallocate(used + length * 2);
- const_iterator iter(other, 0);
- u32 l = length;
- while (!iter.atEnd() && l)
- {
- uchar32_t c = *iter;
- append(c);
- ++iter;
- --l;
- }
- return *this;
- }
- //! Reserves some memory.
- //! \param count The amount of characters to reserve.
- void reserve(u32 count)
- {
- if (count < allocated)
- return;
- reallocate(count);
- }
- //! Finds first occurrence of character.
- //! \param c The character to search for.
- //! \return Position where the character has been found, or -1 if not found.
- s32 findFirst(uchar32_t c) const
- {
- const_iterator i(*this, 0);
- s32 pos = 0;
- while (!i.atEnd())
- {
- uchar32_t t = *i;
- if (c == t)
- return pos;
- ++pos;
- ++i;
- }
- return -1;
- }
- //! Finds first occurrence of a character of a list.
- //! \param c A list of characters to find. For example if the method should find the first occurrence of 'a' or 'b', this parameter should be "ab".
- //! \param count The amount of characters in the list. Usually, this should be strlen(c).
- //! \return Position where one of the characters has been found, or -1 if not found.
- s32 findFirstChar(const uchar32_t* const c, u32 count=1) const
- {
- if (!c || !count)
- return -1;
- const_iterator i(*this, 0);
- s32 pos = 0;
- while (!i.atEnd())
- {
- uchar32_t t = *i;
- for (u32 j=0; j<count; ++j)
- if (t == c[j])
- return pos;
- ++pos;
- ++i;
- }
- return -1;
- }
- //! Finds first position of a character not in a given list.
- //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
- //! \param count The amount of characters in the list. Usually, this should be strlen(c).
- //! \return Position where the character has been found, or -1 if not found.
- s32 findFirstCharNotInList(const uchar32_t* const c, u32 count=1) const
- {
- if (!c || !count)
- return -1;
- const_iterator i(*this, 0);
- s32 pos = 0;
- while (!i.atEnd())
- {
- uchar32_t t = *i;
- u32 j;
- for (j=0; j<count; ++j)
- if (t == c[j])
- break;
- if (j==count)
- return pos;
- ++pos;
- ++i;
- }
- return -1;
- }
- //! Finds last position of a character not in a given list.
- //! \param c A list of characters to NOT find. For example if the method should find the first occurrence of a character not 'a' or 'b', this parameter should be "ab".
- //! \param count The amount of characters in the list. Usually, this should be strlen(c).
- //! \return Position where the character has been found, or -1 if not found.
- s32 findLastCharNotInList(const uchar32_t* const c, u32 count=1) const
- {
- if (!c || !count)
- return -1;
- const_iterator i(end());
- --i;
- s32 pos = size() - 1;
- while (!i.atStart())
- {
- uchar32_t t = *i;
- u32 j;
- for (j=0; j<count; ++j)
- if (t == c[j])
- break;
- if (j==count)
- return pos;
- --pos;
- --i;
- }
- return -1;
- }
- //! Finds next occurrence of character.
- //! \param c The character to search for.
- //! \param startPos The position in the string to start searching.
- //! \return Position where the character has been found, or -1 if not found.
- s32 findNext(uchar32_t c, u32 startPos) const
- {
- const_iterator i(*this, startPos);
- s32 pos = startPos;
- while (!i.atEnd())
- {
- uchar32_t t = *i;
- if (t == c)
- return pos;
- ++pos;
- ++i;
- }
- return -1;
- }
- //! Finds last occurrence of character.
- //! \param c The character to search for.
- //! \param start The start position of the reverse search ( default = -1, on end ).
- //! \return Position where the character has been found, or -1 if not found.
- s32 findLast(uchar32_t c, s32 start = -1) const
- {
- u32 s = size();
- start = core::clamp ( start < 0 ? (s32)s : start, 0, (s32)s ) - 1;
- const_iterator i(*this, start);
- u32 pos = start;
- while (!i.atStart())
- {
- uchar32_t t = *i;
- if (t == c)
- return pos;
- --pos;
- --i;
- }
- return -1;
- }
- //! Finds last occurrence of a character in a list.
- //! \param c A list of strings to find. For example if the method should find the last occurrence of 'a' or 'b', this parameter should be "ab".
- //! \param count The amount of characters in the list. Usually, this should be strlen(c).
- //! \return Position where one of the characters has been found, or -1 if not found.
- s32 findLastChar(const uchar32_t* const c, u32 count=1) const
- {
- if (!c || !count)
- return -1;
- const_iterator i(end());
- --i;
- s32 pos = size();
- while (!i.atStart())
- {
- uchar32_t t = *i;
- for (u32 j=0; j<count; ++j)
- if (t == c[j])
- return pos;
- --pos;
- --i;
- }
- return -1;
- }
- //! Finds another ustring16 in this ustring16.
- //! \param str The string to find.
- //! \param start The start position of the search.
- //! \return Positions where the ustring16 has been found, or -1 if not found.
- s32 find(const ustring16<TAlloc>& str, const u32 start = 0) const
- {
- u32 my_size = size();
- u32 their_size = str.size();
- if (their_size == 0 || my_size - start < their_size)
- return -1;
- const_iterator i(*this, start);
- s32 pos = start;
- while (!i.atEnd())
- {
- const_iterator i2(i);
- const_iterator j(str, 0);
- uchar32_t t1 = (uchar32_t)*i2;
- uchar32_t t2 = (uchar32_t)*j;
- while (t1 == t2)
- {
- ++i2;
- ++j;
- if (j.atEnd())
- return pos;
- t1 = (uchar32_t)*i2;
- t2 = (uchar32_t)*j;
- }
- ++i;
- ++pos;
- }
- return -1;
- }
- //! Finds another ustring16 in this ustring16.
- //! \param str The string to find.
- //! \param start The start position of the search.
- //! \return Positions where the string has been found, or -1 if not found.
- s32 find_raw(const ustring16<TAlloc>& str, const u32 start = 0) const
- {
- const uchar16_t* data = str.c_str();
- if (data && *data)
- {
- u32 len = 0;
- while (data[len])
- ++len;
- if (len > used)
- return -1;
- for (u32 i=start; i<=used-len; ++i)
- {
- u32 j=0;
- while(data[j] && array[i+j] == data[j])
- ++j;
- if (!data[j])
- return i;
- }
- }
- return -1;
- }
- //! Returns a substring.
- //! \param begin: Start of substring.
- //! \param length: Length of substring.
- //! \return A reference to our current string.
- ustring16<TAlloc> subString(u32 begin, s32 length) const
- {
- u32 len = size();
- // if start after ustring16
- // or no proper substring length
- if ((length <= 0) || (begin>=len))
- return ustring16<TAlloc>("");
- // clamp length to maximal value
- if ((length+begin) > len)
- length = len-begin;
- ustring16<TAlloc> o;
- o.reserve((length+1) * 2);
- const_iterator i(*this, begin);
- while (!i.atEnd() && length)
- {
- o.append(*i);
- ++i;
- --length;
- }
- return o;
- }
- //! Appends a character to this ustring16.
- //! \param c Character to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (char c)
- {
- append((uchar32_t)c);
- return *this;
- }
- //! Appends a character to this ustring16.
- //! \param c Character to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (uchar32_t c)
- {
- append(c);
- return *this;
- }
- //! Appends a number to this ustring16.
- //! \param c Number to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (short c)
- {
- append(core::stringc(c));
- return *this;
- }
- //! Appends a number to this ustring16.
- //! \param c Number to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (unsigned short c)
- {
- append(core::stringc(c));
- return *this;
- }
- #ifdef USTRING_CPP0X_NEWLITERALS
- //! Appends a number to this ustring16.
- //! \param c Number to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (int c)
- {
- append(core::stringc(c));
- return *this;
- }
- //! Appends a number to this ustring16.
- //! \param c Number to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (unsigned int c)
- {
- append(core::stringc(c));
- return *this;
- }
- #endif
- //! Appends a number to this ustring16.
- //! \param c Number to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (long c)
- {
- append(core::stringc(c));
- return *this;
- }
- //! Appends a number to this ustring16.
- //! \param c Number to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (unsigned long c)
- {
- append(core::stringc(c));
- return *this;
- }
- //! Appends a number to this ustring16.
- //! \param c Number to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (double c)
- {
- append(core::stringc(c));
- return *this;
- }
- //! Appends a char ustring16 to this ustring16.
- //! \param c Char ustring16 to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (const uchar16_t* const c)
- {
- append(c);
- return *this;
- }
- //! Appends a ustring16 to this ustring16.
- //! \param other ustring16 to append.
- //! \return A reference to our current string.
- ustring16<TAlloc>& operator += (const ustring16<TAlloc>& other)
- {
- append(other);
- return *this;
- }
- //! Replaces all characters of a given type with another one.
- //! \param toReplace Character to replace.
- //! \param replaceWith Character replacing the old one.
- //! \return A reference to our current string.
- ustring16<TAlloc>& replace(uchar32_t toReplace, uchar32_t replaceWith)
- {
- iterator i(*this, 0);
- while (!i.atEnd())
- {
- typename ustring16<TAlloc>::access a = *i;
- if ((uchar32_t)a == toReplace)
- a = replaceWith;
- ++i;
- }
- return *this;
- }
- //! Replaces all instances of a string with another one.
- //! \param toReplace The string to replace.
- //! \param replaceWith The string replacing the old one.
- //! \return A reference to our current string.
- ustring16<TAlloc>& replace(const ustring16<TAlloc>& toReplace, const ustring16<TAlloc>& replaceWith)
- {
- if (toReplace.size() == 0)
- return *this;
- const uchar16_t* other = toReplace.c_str();
- const uchar16_t* replace = replaceWith.c_str();
- const u32 other_size = toReplace.size_raw();
- const u32 replace_size = replaceWith.size_raw();
- // Determine the delta. The algorithm will change depending on the delta.
- s32 delta = replace_size - other_size;
- // A character for character replace. The string will not shrink or grow.
- if (delta == 0)
- {
- s32 pos = 0;
- while ((pos = find_raw(other, pos)) != -1)
- {
- for (u32 i = 0; i < replace_size; ++i)
- array[pos + i] = replace[i];
- ++pos;
- }
- return *this;
- }
- // We are going to be removing some characters. The string will shrink.
- if (delta < 0)
- {
- u32 i = 0;
- for (u32 pos = 0; pos <= used; ++i, ++pos)
- {
- // Is this potentially a match?
- if (array[pos] == *other)
- {
- // Check to see if we have a match.
- u32 j;
- for (j = 0; j < other_size; ++j)
- {
- if (array[pos + j] != other[j])
- break;
- }
- // If we have a match, replace characters.
- if (j == other_size)
- {
- for (j = 0; j < replace_size; ++j)
- array[i + j] = replace[j];
- i += replace_size - 1;
- pos += other_size - 1;
- continue;
- }
- }
- // No match found, just copy characters.
- array[i - 1] = array[pos];
- }
- array[i] = 0;
- used = i;
- return *this;
- }
- // We are going to be adding characters, so the string size will increase.
- // Count the number of times toReplace exists in the string so we can allocate the new size.
- u32 find_count = 0;
- s32 pos = 0;
- while ((pos = find_raw(other, pos)) != -1)
- {
- ++find_count;
- ++pos;
- }
- // Re-allocate the string now, if needed.
- u32 len = delta * find_count;
- if (used + len >= allocated)
- reallocate(used + len);
- // Start replacing.
- pos = 0;
- while ((pos = find_raw(other, pos)) != -1)
- {
- uchar16_t* start = array + pos + other_size - 1;
- uchar16_t* ptr = array + used;
- uchar16_t* end = array + used + delta;
- // Shift characters to make room for the string.
- while (ptr != start)
- {
- *end = *ptr;
- --ptr;
- --end;
- }
- // Add the new string now.
- for (u32 i = 0; i < replace_size; ++i)
- array[pos + i] = replace[i];
- pos += replace_size;
- used += delta;
- }
- // Terminate the string and return ourself.
- array[used] = 0;
- return *this;
- }
- //! Removes characters from a ustring16..
- //! \param c The character to remove.
- //! \return A reference to our current string.
- ustring16<TAlloc>& remove(uchar32_t c)
- {
- u32 pos = 0;
- u32 found = 0;
- u32 len = (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
- for (u32 i=0; i<=used; ++i)
- {
- uchar32_t uc32 = 0;
- if (!UTF16_IS_SURROGATE_HI(array[i]))
- uc32 |= array[i];
- else if (i + 1 <= used)
- {
- // Convert the surrogate pair into a single UTF-32 character.
- uc32 = unicode::toUTF32(array[i], array[i + 1]);
- }
- u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
- if (uc32 == c)
- {
- found += len;
- continue;
- }
- array[pos++] = array[i];
- if (len2 == 2)
- array[pos++] = array[++i];
- }
- used -= found;
- array[used] = 0;
- return *this;
- }
- //! Removes a ustring16 from the ustring16.
- //! \param toRemove The string to remove.
- //! \return A reference to our current string.
- ustring16<TAlloc>& remove(const ustring16<TAlloc>& toRemove)
- {
- u32 size = toRemove.size_raw();
- if (size == 0) return *this;
- const uchar16_t* tra = toRemove.c_str();
- u32 pos = 0;
- u32 found = 0;
- for (u32 i=0; i<=used; ++i)
- {
- u32 j = 0;
- while (j < size)
- {
- if (array[i + j] != tra[j])
- break;
- ++j;
- }
- if (j == size)
- {
- found += size;
- i += size - 1;
- continue;
- }
- array[pos++] = array[i];
- }
- used -= found;
- array[used] = 0;
- return *this;
- }
- //! Removes characters from the ustring16.
- //! \param characters The characters to remove.
- //! \return A reference to our current string.
- ustring16<TAlloc>& removeChars(const ustring16<TAlloc>& characters)
- {
- if (characters.size_raw() == 0)
- return *this;
- u32 pos = 0;
- u32 found = 0;
- const_iterator iter(characters);
- for (u32 i=0; i<=used; ++i)
- {
- uchar32_t uc32 = 0;
- if (!UTF16_IS_SURROGATE_HI(array[i]))
- uc32 |= array[i];
- else if (i + 1 <= used)
- {
- // Convert the surrogate pair into a single UTF-32 character.
- uc32 = unicode::toUTF32(array[i], array[i+1]);
- }
- u32 len2 = (uc32 > 0xFFFF ? 2 : 1);
- bool cont = false;
- iter.toStart();
- while (!iter.atEnd())
- {
- uchar32_t c = *iter;
- if (uc32 == c)
- {
- found += (c > 0xFFFF ? 2 : 1); // Remove characters equal to the size of c as a UTF-16 character.
- ++i;
- cont = true;
- break;
- }
- ++iter;
- }
- if (cont) continue;
- array[pos++] = array[i];
- if (len2 == 2)
- array[pos++] = array[++i];
- }
- used -= found;
- array[used] = 0;
- return *this;
- }
- //! Trims the ustring16.
- //! Removes the specified characters (by default, Latin-1 whitespace) from the begining and the end of the ustring16.
- //! \param whitespace The characters that are to be considered as whitespace.
- //! \return A reference to our current string.
- ustring16<TAlloc>& trim(const ustring16<TAlloc>& whitespace = " \t\n\r")
- {
- core::array<uchar32_t> utf32white = whitespace.toUTF32();
- // find start and end of the substring without the specified characters
- const s32 begin = findFirstCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
- if (begin == -1)
- return (*this="");
- const s32 end = findLastCharNotInList(utf32white.const_pointer(), whitespace.used + 1);
- return (*this = subString(begin, (end +1) - begin));
- }
- //! Erases a character from the ustring16.
- //! May be slow, because all elements following after the erased element have to be copied.
- //! \param index Index of element to be erased.
- //! \return A reference to our current string.
- ustring16<TAlloc>& erase(u32 index)
- {
- _IRR_DEBUG_BREAK_IF(index>used) // access violation
- iterator i(*this, index);
- uchar32_t t = *i;
- u32 len = (t > 0xFFFF ? 2 : 1);
- for (u32 j = static_cast<u32>(i.getPos()) + len; j <= used; ++j)
- array[j - len] = array[j];
- used -= len;
- array[used] = 0;
- return *this;
- }
- //! Validate the existing ustring16, checking for valid surrogate pairs and checking for proper termination.
- //! \return A reference to our current string.
- ustring16<TAlloc>& validate()
- {
- // Validate all unicode characters.
- for (u32 i=0; i<allocated; ++i)
- {
- // Terminate on existing null.
- if (array[i] == 0)
- {
- used = i;
- return *this;
- }
- if (UTF16_IS_SURROGATE(array[i]))
- {
- if (((i+1) >= allocated) || UTF16_IS_SURROGATE_LO(array[i]))
- array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
- else if (UTF16_IS_SURROGATE_HI(array[i]) && !UTF16_IS_SURROGATE_LO(array[i+1]))
- array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
- ++i;
- }
- if (array[i] >= 0xFDD0 && array[i] <= 0xFDEF)
- array[i] = unicode::UTF_REPLACEMENT_CHARACTER;
- }
- // terminate
- used = 0;
- if (allocated > 0)
- {
- used = allocated - 1;
- array[used] = 0;
- }
- return *this;
- }
- //! Gets the last char of the ustring16, or 0.
- //! \return The last char of the ustring16, or 0.
- uchar32_t lastChar() const
- {
- if (used < 1)
- return 0;
- if (UTF16_IS_SURROGATE_LO(array[used-1]))
- {
- // Make sure we have a paired surrogate.
- if (used < 2)
- return 0;
- // Check for an invalid surrogate.
- if (!UTF16_IS_SURROGATE_HI(array[used-2]))
- return 0;
- // Convert the surrogate pair into a single UTF-32 character.
- return unicode::toUTF32(array[used-2], array[used-1]);
- }
- else
- {
- return array[used-1];
- }
- }
- //! Split the ustring16 into parts.
- /** This method will split a ustring16 at certain delimiter characters
- into the container passed in as reference. The type of the container
- has to be given as template parameter. It must provide a push_back and
- a size method.
- \param ret The result container
- \param c C-style ustring16 of delimiter characters
- \param count Number of delimiter characters
- \param ignoreEmptyTokens Flag to avoid empty substrings in the result
- container. If two delimiters occur without a character in between, an
- empty substring would be placed in the result. If this flag is set,
- only non-empty strings are stored.
- \param keepSeparators Flag which allows to add the separator to the
- result ustring16. If this flag is true, the concatenation of the
- substrings results in the original ustring16. Otherwise, only the
- characters between the delimiters are returned.
- \return The number of resulting substrings
- */
- template<class container>
- u32 split(container& ret, const uchar32_t* const c, u32 count=1, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
- {
- if (!c)
- return 0;
- const_iterator i(*this);
- const u32 oldSize=ret.size();
- u32 pos = 0;
- u32 lastpos = 0;
- u32 lastpospos = 0;
- bool lastWasSeparator = false;
- while (!i.atEnd())
- {
- uchar32_t ch = *i;
- bool foundSeparator = false;
- for (u32 j=0; j<count; ++j)
- {
- if (ch == c[j])
- {
- if ((!ignoreEmptyTokens || pos - lastpos != 0) &&
- !lastWasSeparator)
- ret.push_back(ustring16<TAlloc>(&array[lastpospos], pos - lastpos));
- foundSeparator = true;
- lastpos = (keepSeparators ? pos : pos + 1);
- lastpospos = (keepSeparators ? i.getPos() : i.getPos() + 1);
- break;
- }
- }
- lastWasSeparator = foundSeparator;
- ++pos;
- ++i;
- }
- u32 s = size() + 1;
- if (s > lastpos)
- ret.push_back(ustring16<TAlloc>(&array[lastpospos], s - lastpos));
- return ret.size()-oldSize;
- }
- //! Split the ustring16 into parts.
- /** This method will split a ustring16 at certain delimiter characters
- into the container passed in as reference. The type of the container
- has to be given as template parameter. It must provide a push_back and
- a size method.
- \param ret The result container
- \param c A unicode string of delimiter characters
- \param ignoreEmptyTokens Flag to avoid empty substrings in the result
- container. If two delimiters occur without a character in between, an
- empty substring would be placed in the result. If this flag is set,
- only non-empty strings are stored.
- \param keepSeparators Flag which allows to add the separator to the
- result ustring16. If this flag is true, the concatenation of the
- substrings results in the original ustring16. Otherwise, only the
- characters between the delimiters are returned.
- \return The number of resulting substrings
- */
- template<class container>
- u32 split(container& ret, const ustring16<TAlloc>& c, bool ignoreEmptyTokens=true, bool keepSeparators=false) const
- {
- core::array<uchar32_t> v = c.toUTF32();
- return split(ret, v.pointer(), v.size(), ignoreEmptyTokens, keepSeparators);
- }
- //! Gets the size of the allocated memory buffer for the string.
- //! \return The size of the allocated memory buffer.
- u32 capacity() const
- {
- return allocated;
- }
- //! Returns the raw number of UTF-16 code points in the string which includes the individual surrogates.
- //! \return The raw number of UTF-16 code points, excluding the trialing NUL.
- u32 size_raw() const
- {
- return used;
- }
- //! Inserts a character into the string.
- //! \param c The character to insert.
- //! \param pos The position to insert the character.
- //! \return A reference to our current string.
- ustring16<TAlloc>& insert(uchar32_t c, u32 pos)
- {
- u8 len = (c > 0xFFFF ? 2 : 1);
- if (used + len >= allocated)
- reallocate(used + len);
- used += len;
- iterator iter(*this, pos);
- for (u32 i = used - 2; i > iter.getPos(); --i)
- array[i] = array[i - len];
- if (c > 0xFFFF)
- {
- // c will be multibyte, so split it up into a surrogate pair.
- uchar16_t x = static_cast<uchar16_t>(c);
- uchar16_t vh = UTF16_HI_SURROGATE | ((((c >> 16) & ((1 << 5) - 1)) - 1) << 6) | (x >> 10);
- uchar16_t vl = UTF16_LO_SURROGATE | (x & ((1 << 10) - 1));
- array[iter.getPos()] = vh;
- array[iter.getPos()+1] = vl;
- }
- else
- {
- array[iter.getPos()] = static_cast<uchar16_t>(c);
- }
- array[used] = 0;
- return *this;
- }
- //! Inserts a string into the string.
- //! \param c The string to insert.
- //! \param pos The position to insert the string.
- //! \return A reference to our current string.
- ustring16<TAlloc>& insert(const ustring16<TAlloc>& c, u32 pos)
- {
- u32 len = c.size_raw();
- if (len == 0) return *this;
- if (used + len >= allocated)
- reallocate(used + len);
- used += len;
- iterator iter(*this, pos);
- for (u32 i = used - 2; i > iter.getPos() + len; --i)
- array[i] = array[i - len];
- const uchar16_t* s = c.c_str();
- for (u32 i = 0; i < len; ++i)
- {
- array[pos++] = *s;
- ++s;
- }
- array[used] = 0;
- return *this;
- }
- //! Inserts a character into the string.
- //! \param c The character to insert.
- //! \param pos The position to insert the character.
- //! \return A reference to our current string.
- ustring16<TAlloc>& insert_raw(uchar16_t c, u32 pos)
- {
- if (used + 1 >= allocated)
- reallocate(used + 1);
- ++used;
- for (u32 i = used - 1; i > pos; --i)
- array[i] = array[i - 1];
- array[pos] = c;
- array[used] = 0;
- return *this;
- }
- //! Removes a character from string.
- //! \param pos Position of the character to remove.
- //! \return A reference to our current string.
- ustring16<TAlloc>& erase_raw(u32 pos)
- {
- for (u32 i=pos; i<=used; ++i)
- {
- array[i] = array[i + 1];
- }
- --used;
- array[used] = 0;
- return *this;
- }
- //! Replaces a character in the string.
- //! \param c The new character.
- //! \param pos The position of the character to replace.
- //! \return A reference to our current string.
- ustring16<TAlloc>& replace_raw(uchar16_t c, u32 pos)
- {
- array[pos] = c;
- return *this;
- }
- //! Returns an iterator to the beginning of the string.
- //! \return An iterator to the beginning of the string.
- iterator begin()
- {
- iterator i(*this, 0);
- return i;
- }
- //! Returns an iterator to the beginning of the string.
- //! \return An iterator to the beginning of the string.
- const_iterator begin() const
- {
- const_iterator i(*this, 0);
- return i;
- }
- //! Returns an iterator to the beginning of the string.
- //! \return An iterator to the beginning of the string.
- const_iterator cbegin() const
- {
- const_iterator i(*this, 0);
- return i;
- }
- //! Returns an iterator to the end of the string.
- //! \return An iterator to the end of the string.
- iterator end()
- {
- iterator i(*this, 0);
- i.toEnd();
- return i;
- }
- //! Returns an iterator to the end of the string.
- //! \return An iterator to the end of the string.
- const_iterator end() const
- {
- const_iterator i(*this, 0);
- i.toEnd();
- return i;
- }
- //! Returns an iterator to the end of the string.
- //! \return An iterator to the end of the string.
- const_iterator cend() const
- {
- const_iterator i(*this, 0);
- i.toEnd();
- return i;
- }
- //! Converts the string to a UTF-8 encoded string.
- //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
- //! \return A string containing the UTF-8 encoded string.
- core::string<uchar8_t> toUTF8_s(const bool addBOM = false) const
- {
- core::string<uchar8_t> ret;
- ret.reserve(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
- const_iterator iter(*this, 0);
- // Add the byte order mark if the user wants it.
- if (addBOM)
- {
- ret.append(unicode::BOM_ENCODE_UTF8[0]);
- ret.append(unicode::BOM_ENCODE_UTF8[1]);
- ret.append(unicode::BOM_ENCODE_UTF8[2]);
- }
- while (!iter.atEnd())
- {
- uchar32_t c = *iter;
- if (c > 0xFFFF)
- { // 4 bytes
- uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
- uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
- uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
- uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
- ret.append(b1);
- ret.append(b2);
- ret.append(b3);
- ret.append(b4);
- }
- else if (c > 0x7FF)
- { // 3 bytes
- uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
- uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
- uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
- ret.append(b1);
- ret.append(b2);
- ret.append(b3);
- }
- else if (c > 0x7F)
- { // 2 bytes
- uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
- uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
- ret.append(b1);
- ret.append(b2);
- }
- else
- { // 1 byte
- ret.append(static_cast<uchar8_t>(c));
- }
- ++iter;
- }
- return ret;
- }
- //! Converts the string to a UTF-8 encoded string array.
- //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
- //! \return An array containing the UTF-8 encoded string.
- core::array<uchar8_t> toUTF8(const bool addBOM = false) const
- {
- core::array<uchar8_t> ret(used * 4 + (addBOM ? unicode::BOM_UTF8_LEN : 0) + 1);
- const_iterator iter(*this, 0);
- // Add the byte order mark if the user wants it.
- if (addBOM)
- {
- ret.push_back(unicode::BOM_ENCODE_UTF8[0]);
- ret.push_back(unicode::BOM_ENCODE_UTF8[1]);
- ret.push_back(unicode::BOM_ENCODE_UTF8[2]);
- }
- while (!iter.atEnd())
- {
- uchar32_t c = *iter;
- if (c > 0xFFFF)
- { // 4 bytes
- uchar8_t b1 = (0x1E << 3) | ((c >> 18) & 0x7);
- uchar8_t b2 = (0x2 << 6) | ((c >> 12) & 0x3F);
- uchar8_t b3 = (0x2 << 6) | ((c >> 6) & 0x3F);
- uchar8_t b4 = (0x2 << 6) | (c & 0x3F);
- ret.push_back(b1);
- ret.push_back(b2);
- ret.push_back(b3);
- ret.push_back(b4);
- }
- else if (c > 0x7FF)
- { // 3 bytes
- uchar8_t b1 = (0xE << 4) | ((c >> 12) & 0xF);
- uchar8_t b2 = (0x2 << 6) | ((c >> 6) & 0x3F);
- uchar8_t b3 = (0x2 << 6) | (c & 0x3F);
- ret.push_back(b1);
- ret.push_back(b2);
- ret.push_back(b3);
- }
- else if (c > 0x7F)
- { // 2 bytes
- uchar8_t b1 = (0x6 << 5) | ((c >> 6) & 0x1F);
- uchar8_t b2 = (0x2 << 6) | (c & 0x3F);
- ret.push_back(b1);
- ret.push_back(b2);
- }
- else
- { // 1 byte
- ret.push_back(static_cast<uchar8_t>(c));
- }
- ++iter;
- }
- ret.push_back(0);
- return ret;
- }
- #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
- //! Converts the string to a UTF-16 encoded string.
- //! \param endian The desired endianness of the string.
- //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
- //! \return A string containing the UTF-16 encoded string.
- core::string<char16_t> toUTF16_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
- {
- core::string<char16_t> ret;
- ret.reserve(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
- // Add the BOM if specified.
- if (addBOM)
- {
- if (endian == unicode::EUTFEE_NATIVE)
- ret[0] = unicode::BOM;
- else if (endian == unicode::EUTFEE_LITTLE)
- {
- uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(&ret[0]);
- *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
- *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
- }
- else
- {
- uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(&ret[0]);
- *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
- *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
- }
- }
- ret.append(array);
- if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
- {
- char16_t* ptr = ret.c_str();
- for (u32 i = 0; i < ret.size(); ++i)
- *ptr++ = unicode::swapEndian16(*ptr);
- }
- return ret;
- }
- #endif
- //! Converts the string to a UTF-16 encoded string array.
- //! Unfortunately, no toUTF16_s() version exists due to limitations with Irrlicht's string class.
- //! \param endian The desired endianness of the string.
- //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
- //! \return An array containing the UTF-16 encoded string.
- core::array<uchar16_t> toUTF16(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
- {
- core::array<uchar16_t> ret(used + (addBOM ? unicode::BOM_UTF16_LEN : 0) + 1);
- uchar16_t* ptr = ret.pointer();
- // Add the BOM if specified.
- if (addBOM)
- {
- if (endian == unicode::EUTFEE_NATIVE)
- *ptr = unicode::BOM;
- else if (endian == unicode::EUTFEE_LITTLE)
- {
- uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
- *ptr8++ = unicode::BOM_ENCODE_UTF16_LE[0];
- *ptr8 = unicode::BOM_ENCODE_UTF16_LE[1];
- }
- else
- {
- uchar8_t* ptr8 = reinterpret_cast<uchar8_t*>(ptr);
- *ptr8++ = unicode::BOM_ENCODE_UTF16_BE[0];
- *ptr8 = unicode::BOM_ENCODE_UTF16_BE[1];
- }
- ++ptr;
- }
- memcpy((void*)ptr, (void*)array, used * sizeof(uchar16_t));
- if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
- {
- for (u32 i = 0; i <= used; ++i)
- ptr[i] = unicode::swapEndian16(ptr[i]);
- }
- ret.set_used(used + (addBOM ? unicode::BOM_UTF16_LEN : 0));
- ret.push_back(0);
- return ret;
- }
- #ifdef USTRING_CPP0X_NEWLITERALS // C++0x
- //! Converts the string to a UTF-32 encoded string.
- //! \param endian The desired endianness of the string.
- //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
- //! \return A string containing the UTF-32 encoded string.
- core::string<char32_t> toUTF32_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
- {
- core::string<char32_t> ret;
- ret.reserve(size() + 1 + (addBOM ? unicode::BOM_UTF32_LEN : 0));
- const_iterator iter(*this, 0);
- // Add the BOM if specified.
- if (addBOM)
- {
- if (endian == unicode::EUTFEE_NATIVE)
- ret.append(unicode::BOM);
- else
- {
- union
- {
- uchar32_t full;
- u8 chunk[4];
- } t;
- if (endian == unicode::EUTFEE_LITTLE)
- {
- t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
- t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
- t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
- t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
- }
- else
- {
- t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
- t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
- t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
- t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
- }
- ret.append(t.full);
- }
- }
- while (!iter.atEnd())
- {
- uchar32_t c = *iter;
- if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
- c = unicode::swapEndian32(c);
- ret.append(c);
- ++iter;
- }
- return ret;
- }
- #endif
- //! Converts the string to a UTF-32 encoded string array.
- //! Unfortunately, no toUTF32_s() version exists due to limitations with Irrlicht's string class.
- //! \param endian The desired endianness of the string.
- //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
- //! \return An array containing the UTF-32 encoded string.
- core::array<uchar32_t> toUTF32(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
- {
- core::array<uchar32_t> ret(size() + (addBOM ? unicode::BOM_UTF32_LEN : 0) + 1);
- const_iterator iter(*this, 0);
- // Add the BOM if specified.
- if (addBOM)
- {
- if (endian == unicode::EUTFEE_NATIVE)
- ret.push_back(unicode::BOM);
- else
- {
- union
- {
- uchar32_t full;
- u8 chunk[4];
- } t;
- if (endian == unicode::EUTFEE_LITTLE)
- {
- t.chunk[0] = unicode::BOM_ENCODE_UTF32_LE[0];
- t.chunk[1] = unicode::BOM_ENCODE_UTF32_LE[1];
- t.chunk[2] = unicode::BOM_ENCODE_UTF32_LE[2];
- t.chunk[3] = unicode::BOM_ENCODE_UTF32_LE[3];
- }
- else
- {
- t.chunk[0] = unicode::BOM_ENCODE_UTF32_BE[0];
- t.chunk[1] = unicode::BOM_ENCODE_UTF32_BE[1];
- t.chunk[2] = unicode::BOM_ENCODE_UTF32_BE[2];
- t.chunk[3] = unicode::BOM_ENCODE_UTF32_BE[3];
- }
- ret.push_back(t.full);
- }
- }
- ret.push_back(0);
- while (!iter.atEnd())
- {
- uchar32_t c = *iter;
- if (endian != unicode::EUTFEE_NATIVE && getEndianness() != endian)
- c = unicode::swapEndian32(c);
- ret.push_back(c);
- ++iter;
- }
- return ret;
- }
- //! Converts the string to a wchar_t encoded string.
- /** The size of a wchar_t changes depending on the platform. This function will store a
- correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
- //! \param endian The desired endianness of the string.
- //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
- //! \return A string containing the wchar_t encoded string.
- core::string<wchar_t> toWCHAR_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
- {
- if (sizeof(wchar_t) == 4)
- {
- core::array<uchar32_t> a(toUTF32(endian, addBOM));
- core::stringw ret(a.pointer());
- return ret;
- }
- else if (sizeof(wchar_t) == 2)
- {
- if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
- {
- core::stringw ret(array);
- return ret;
- }
- else
- {
- core::array<uchar16_t> a(toUTF16(endian, addBOM));
- core::stringw ret(a.pointer());
- return ret;
- }
- }
- else if (sizeof(wchar_t) == 1)
- {
- core::array<uchar8_t> a(toUTF8(addBOM));
- core::stringw ret(a.pointer());
- return ret;
- }
- // Shouldn't happen.
- return core::stringw();
- }
- //! Converts the string to a wchar_t encoded string array.
- /** The size of a wchar_t changes depending on the platform. This function will store a
- correct UTF-8, -16, or -32 encoded string depending on the size of a wchar_t. **/
- //! \param endian The desired endianness of the string.
- //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
- //! \return An array containing the wchar_t encoded string.
- core::array<wchar_t> toWCHAR(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
- {
- if (sizeof(wchar_t) == 4)
- {
- core::array<uchar32_t> a(toUTF32(endian, addBOM));
- core::array<wchar_t> ret(a.size());
- ret.set_used(a.size());
- memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar32_t));
- return ret;
- }
- if (sizeof(wchar_t) == 2)
- {
- if (endian == unicode::EUTFEE_NATIVE && addBOM == false)
- {
- core::array<wchar_t> ret(used);
- ret.set_used(used);
- memcpy((void*)ret.pointer(), (void*)array, used * sizeof(uchar16_t));
- return ret;
- }
- else
- {
- core::array<uchar16_t> a(toUTF16(endian, addBOM));
- core::array<wchar_t> ret(a.size());
- ret.set_used(a.size());
- memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar16_t));
- return ret;
- }
- }
- if (sizeof(wchar_t) == 1)
- {
- core::array<uchar8_t> a(toUTF8(addBOM));
- core::array<wchar_t> ret(a.size());
- ret.set_used(a.size());
- memcpy((void*)ret.pointer(), (void*)a.pointer(), a.size() * sizeof(uchar8_t));
- return ret;
- }
- // Shouldn't happen.
- return core::array<wchar_t>();
- }
- //! Converts the string to a properly encoded io::path string.
- //! \param endian The desired endianness of the string.
- //! \param addBOM If true, the proper unicode byte-order mark will be prefixed to the string.
- //! \return An io::path string containing the properly encoded string.
- io::path toPATH_s(const unicode::EUTF_ENDIAN endian = unicode::EUTFEE_NATIVE, const bool addBOM = false) const
- {
- #if defined(_IRR_WCHAR_FILESYSTEM)
- return toWCHAR_s(endian, addBOM);
- #else
- return toUTF8_s(addBOM);
- #endif
- }
- //! Loads an unknown stream of data.
- //! Will attempt to determine if the stream is unicode data. Useful for loading from files.
- //! \param data The data stream to load from.
- //! \param data_size The length of the data string.
- //! \return A reference to our current string.
- ustring16<TAlloc>& loadDataStream(const char* data, size_t data_size)
- {
- // Clear our string.
- *this = "";
- if (!data)
- return *this;
- unicode::EUTF_ENCODE e = unicode::determineUnicodeBOM(data);
- switch (e)
- {
- default:
- case unicode::EUTFE_UTF8:
- append((uchar8_t*)data, data_size);
- break;
- case unicode::EUTFE_UTF16:
- case unicode::EUTFE_UTF16_BE:
- case unicode::EUTFE_UTF16_LE:
- append((uchar16_t*)data, data_size / 2);
- break;
- case unicode::EUTFE_UTF32:
- case unicode::EUTFE_UTF32_BE:
- case unicode::EUTFE_UTF32_LE:
- append((uchar32_t*)data, data_size / 4);
- break;
- }
- return *this;
- }
- //! Gets the encoding of the Unicode string this class contains.
- //! \return An enum describing the current encoding of this string.
- const unicode::EUTF_ENCODE getEncoding() const
- {
- return encoding;
- }
- //! Gets the endianness of the Unicode string this class contains.
- //! \return An enum describing the endianness of this string.
- const unicode::EUTF_ENDIAN getEndianness() const
- {
- if (encoding == unicode::EUTFE_UTF16_LE ||
- encoding == unicode::EUTFE_UTF32_LE)
- return unicode::EUTFEE_LITTLE;
- else return unicode::EUTFEE_BIG;
- }
- private:
- //! Reallocate the string, making it bigger or smaller.
- //! \param new_size The new size of the string.
- void reallocate(u32 new_size)
- {
- uchar16_t* old_array = array;
- array = allocator.allocate(new_size + 1); //new u16[new_size];
- allocated = new_size + 1;
- if (old_array == 0) return;
- u32 amount = used < new_size ? used : new_size;
- for (u32 i=0; i<=amount; ++i)
- array[i] = old_array[i];
- if (allocated <= used)
- used = allocated - 1;
- array[used] = 0;
- allocator.deallocate(old_array); // delete [] old_array;
- }
- //--- member variables
- uchar16_t* array;
- unicode::EUTF_ENCODE encoding;
- u32 allocated;
- u32 used;
- TAlloc allocator;
- //irrAllocator<uchar16_t> allocator;
- };
- typedef ustring16<irrAllocator<uchar16_t> > ustring;
- //! Appends two ustring16s.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a null-terminated unicode string.
- template <typename TAlloc, class B>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const B* const right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a null-terminated unicode string.
- template <class B, typename TAlloc>
- inline ustring16<TAlloc> operator+(const B* const left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and an Irrlicht string.
- template <typename TAlloc, typename B, typename BAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const string<B, BAlloc>& right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and an Irrlicht string.
- template <typename TAlloc, typename B, typename BAlloc>
- inline ustring16<TAlloc> operator+(const string<B, BAlloc>& left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a std::basic_string.
- template <typename TAlloc, typename B, typename A, typename BAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const std::basic_string<B, A, BAlloc>& right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a std::basic_string.
- template <typename TAlloc, typename B, typename A, typename BAlloc>
- inline ustring16<TAlloc> operator+(const std::basic_string<B, A, BAlloc>& left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a char.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const char right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a char.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const char left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- #ifdef USTRING_CPP0X_NEWLITERALS
- //! Appends a ustring16 and a uchar32_t.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const uchar32_t right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a uchar32_t.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const uchar32_t left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret(left);
- ret += right;
- return ret;
- }
- #endif
- //! Appends a ustring16 and a short.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const short right)
- {
- ustring16<TAlloc> ret(left);
- ret += core::stringc(right);
- return ret;
- }
- //! Appends a ustring16 and a short.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const short left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret((core::stringc(left)));
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and an unsigned short.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned short right)
- {
- ustring16<TAlloc> ret(left);
- ret += core::stringc(right);
- return ret;
- }
- //! Appends a ustring16 and an unsigned short.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const unsigned short left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret((core::stringc(left)));
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and an int.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const int right)
- {
- ustring16<TAlloc> ret(left);
- ret += core::stringc(right);
- return ret;
- }
- //! Appends a ustring16 and an int.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const int left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret((core::stringc(left)));
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and an unsigned int.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned int right)
- {
- ustring16<TAlloc> ret(left);
- ret += core::stringc(right);
- return ret;
- }
- //! Appends a ustring16 and an unsigned int.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const unsigned int left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret((core::stringc(left)));
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a long.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const long right)
- {
- ustring16<TAlloc> ret(left);
- ret += core::stringc(right);
- return ret;
- }
- //! Appends a ustring16 and a long.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const long left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret((core::stringc(left)));
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and an unsigned long.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const unsigned long right)
- {
- ustring16<TAlloc> ret(left);
- ret += core::stringc(right);
- return ret;
- }
- //! Appends a ustring16 and an unsigned long.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const unsigned long left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret((core::stringc(left)));
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a float.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const float right)
- {
- ustring16<TAlloc> ret(left);
- ret += core::stringc(right);
- return ret;
- }
- //! Appends a ustring16 and a float.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const float left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret((core::stringc(left)));
- ret += right;
- return ret;
- }
- //! Appends a ustring16 and a double.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const ustring16<TAlloc>& left, const double right)
- {
- ustring16<TAlloc> ret(left);
- ret += core::stringc(right);
- return ret;
- }
- //! Appends a ustring16 and a double.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const double left, const ustring16<TAlloc>& right)
- {
- ustring16<TAlloc> ret((core::stringc(left)));
- ret += right;
- return ret;
- }
- #ifdef USTRING_CPP0X
- //! Appends two ustring16s.
- template <typename TAlloc>
- inline ustring16<TAlloc>&& operator+(const ustring16<TAlloc>& left, ustring16<TAlloc>&& right)
- {
- //std::cout << "MOVE operator+(&, &&)" << std::endl;
- right.insert(left, 0);
- return std::move(right);
- }
- //! Appends two ustring16s.
- template <typename TAlloc>
- inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const ustring16<TAlloc>& right)
- {
- //std::cout << "MOVE operator+(&&, &)" << std::endl;
- left.append(right);
- return std::move(left);
- }
- //! Appends two ustring16s.
- template <typename TAlloc>
- inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, ustring16<TAlloc>&& right)
- {
- //std::cout << "MOVE operator+(&&, &&)" << std::endl;
- if ((right.size_raw() <= left.capacity() - left.size_raw()) ||
- (right.capacity() - right.size_raw() < left.size_raw()))
- {
- left.append(right);
- return std::move(left);
- }
- else
- {
- right.insert(left, 0);
- return std::move(right);
- }
- }
- //! Appends a ustring16 and a null-terminated unicode string.
- template <typename TAlloc, class B>
- inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const B* const right)
- {
- //std::cout << "MOVE operator+(&&, B*)" << std::endl;
- left.append(right);
- return std::move(left);
- }
- //! Appends a ustring16 and a null-terminated unicode string.
- template <class B, typename TAlloc>
- inline ustring16<TAlloc>&& operator+(const B* const left, ustring16<TAlloc>&& right)
- {
- //std::cout << "MOVE operator+(B*, &&)" << std::endl;
- right.insert(left, 0);
- return std::move(right);
- }
- //! Appends a ustring16 and an Irrlicht string.
- template <typename TAlloc, typename B, typename BAlloc>
- inline ustring16<TAlloc>&& operator+(const string<B, BAlloc>& left, ustring16<TAlloc>&& right)
- {
- //std::cout << "MOVE operator+(&, &&)" << std::endl;
- right.insert(left, 0);
- return std::move(right);
- }
- //! Appends a ustring16 and an Irrlicht string.
- template <typename TAlloc, typename B, typename BAlloc>
- inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const string<B, BAlloc>& right)
- {
- //std::cout << "MOVE operator+(&&, &)" << std::endl;
- left.append(right);
- return std::move(left);
- }
- //! Appends a ustring16 and a std::basic_string.
- template <typename TAlloc, typename B, typename A, typename BAlloc>
- inline ustring16<TAlloc>&& operator+(const std::basic_string<B, A, BAlloc>& left, ustring16<TAlloc>&& right)
- {
- //std::cout << "MOVE operator+(&, &&)" << std::endl;
- right.insert(core::ustring16<TAlloc>(left), 0);
- return std::move(right);
- }
- //! Appends a ustring16 and a std::basic_string.
- template <typename TAlloc, typename B, typename A, typename BAlloc>
- inline ustring16<TAlloc>&& operator+(ustring16<TAlloc>&& left, const std::basic_string<B, A, BAlloc>& right)
- {
- //std::cout << "MOVE operator+(&&, &)" << std::endl;
- left.append(right);
- return std::move(left);
- }
- //! Appends a ustring16 and a char.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const char right)
- {
- left.append((uchar32_t)right);
- return std::move(left);
- }
- //! Appends a ustring16 and a char.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const char left, ustring16<TAlloc>&& right)
- {
- right.insert((uchar32_t)left, 0);
- return std::move(right);
- }
- #ifdef USTRING_CPP0X_NEWLITERALS
- //! Appends a ustring16 and a uchar32_t.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const uchar32_t right)
- {
- left.append(right);
- return std::move(left);
- }
- //! Appends a ustring16 and a uchar32_t.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const uchar32_t left, ustring16<TAlloc>&& right)
- {
- right.insert(left, 0);
- return std::move(right);
- }
- #endif
- //! Appends a ustring16 and a short.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const short right)
- {
- left.append(core::stringc(right));
- return std::move(left);
- }
- //! Appends a ustring16 and a short.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const short left, ustring16<TAlloc>&& right)
- {
- right.insert(core::stringc(left), 0);
- return std::move(right);
- }
- //! Appends a ustring16 and an unsigned short.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned short right)
- {
- left.append(core::stringc(right));
- return std::move(left);
- }
- //! Appends a ustring16 and an unsigned short.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const unsigned short left, ustring16<TAlloc>&& right)
- {
- right.insert(core::stringc(left), 0);
- return std::move(right);
- }
- //! Appends a ustring16 and an int.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const int right)
- {
- left.append(core::stringc(right));
- return std::move(left);
- }
- //! Appends a ustring16 and an int.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const int left, ustring16<TAlloc>&& right)
- {
- right.insert(core::stringc(left), 0);
- return std::move(right);
- }
- //! Appends a ustring16 and an unsigned int.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned int right)
- {
- left.append(core::stringc(right));
- return std::move(left);
- }
- //! Appends a ustring16 and an unsigned int.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const unsigned int left, ustring16<TAlloc>&& right)
- {
- right.insert(core::stringc(left), 0);
- return std::move(right);
- }
- //! Appends a ustring16 and a long.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const long right)
- {
- left.append(core::stringc(right));
- return std::move(left);
- }
- //! Appends a ustring16 and a long.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const long left, ustring16<TAlloc>&& right)
- {
- right.insert(core::stringc(left), 0);
- return std::move(right);
- }
- //! Appends a ustring16 and an unsigned long.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const unsigned long right)
- {
- left.append(core::stringc(right));
- return std::move(left);
- }
- //! Appends a ustring16 and an unsigned long.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const unsigned long left, ustring16<TAlloc>&& right)
- {
- right.insert(core::stringc(left), 0);
- return std::move(right);
- }
- //! Appends a ustring16 and a float.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const float right)
- {
- left.append(core::stringc(right));
- return std::move(left);
- }
- //! Appends a ustring16 and a float.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const float left, ustring16<TAlloc>&& right)
- {
- right.insert(core::stringc(left), 0);
- return std::move(right);
- }
- //! Appends a ustring16 and a double.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(ustring16<TAlloc>&& left, const double right)
- {
- left.append(core::stringc(right));
- return std::move(left);
- }
- //! Appends a ustring16 and a double.
- template <typename TAlloc>
- inline ustring16<TAlloc> operator+(const double left, ustring16<TAlloc>&& right)
- {
- right.insert(core::stringc(left), 0);
- return std::move(right);
- }
- #endif
- #ifndef USTRING_NO_STL
- //! Writes a ustring16 to an ostream.
- template <typename TAlloc>
- inline std::ostream& operator<<(std::ostream& out, const ustring16<TAlloc>& in)
- {
- out << in.toUTF8_s().c_str();
- return out;
- }
- //! Writes a ustring16 to a wostream.
- template <typename TAlloc>
- inline std::wostream& operator<<(std::wostream& out, const ustring16<TAlloc>& in)
- {
- out << in.toWCHAR_s().c_str();
- return out;
- }
- #endif
- #ifndef USTRING_NO_STL
- namespace unicode
- {
- //! Hashing algorithm for hashing a ustring. Used for things like unordered_maps.
- //! Algorithm taken from std::hash<std::string>.
- class hash : public std::unary_function<core::ustring, size_t>
- {
- public:
- size_t operator()(const core::ustring& s) const
- {
- size_t ret = 2166136261U;
- size_t index = 0;
- size_t stride = 1 + s.size_raw() / 10;
- core::ustring::const_iterator i = s.begin();
- while (i != s.end())
- {
- // TODO: Don't force u32 on an x64 OS. Make it agnostic.
- ret = 16777619U * ret ^ (size_t)s[(u32)index];
- index += stride;
- i += stride;
- }
- return (ret);
- }
- };
- } // end namespace unicode
- #endif
- } // end namespace core
- } // end namespace irr
|