00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <vlCore/String.hpp>
00033 #include <vlCore/String_Tables.hpp>
00034 #include <vlCore/Log.hpp>
00035 #include <vlCore/Say.hpp>
00036 #include <vlCore/FileSystem.hpp>
00037 #include <vlCore/VirtualFile.hpp>
00038 #include <vlCore/VisualizationLibrary.hpp>
00039 #include <stdio.h>
00040
00041 using namespace vl;
00042
00043
00044
00045
00046 String::String()
00047 {
00048 #if VL_STRING_COPY_ON_WRITE == 0
00049 acquireData();
00050 #endif
00051 }
00052
00053 String::String(const String& other)
00054 {
00055 operator=(other);
00056 }
00057
00058 String::String(const wchar_t* wstr)
00059 {
00060 #if VL_STRING_COPY_ON_WRITE == 0
00061 acquireData();
00062 #endif
00063 if (wstr)
00064 *this = wstr;
00065 }
00066
00067 String::String(const char* str)
00068 {
00069 #if VL_STRING_COPY_ON_WRITE == 0
00070 acquireData();
00071 #endif
00072 if (str)
00073 *this = str;
00074 }
00075
00076 String::String(wchar_t ch, int count)
00077 {
00078 #if VL_STRING_COPY_ON_WRITE == 0
00079 acquireData();
00080 #endif
00081 for(int i=0; i<count; ++i)
00082 *this += ch;
00083 }
00084
00085 String String::loadText(const String& path, EStringEncoding default_encoding)
00086 {
00087 ref<VirtualFile> file = defFileSystem()->locateFile(path);
00088 if (file)
00089 return loadText( file.get(), default_encoding );
00090 else
00091 {
00092 Log::error( Say("Could not locate '%s'.\n") << path );
00093 return String();
00094 }
00095 }
00096
00097 String String::loadText(VirtualFile* file, EStringEncoding default_encoding)
00098 {
00099 std::vector<unsigned char> buffer;
00100 file->load( buffer );
00101 file->close();
00102
00103 if ( buffer.size() )
00104 {
00105 return loadText( &buffer[0], (int)buffer.size(), default_encoding );
00106 }
00107 else
00108 {
00109 return String();
00110 }
00111 }
00112
00113 String String::loadText(void* data, int bytes, EStringEncoding default_encoding )
00114 {
00115 EStringEncoding enc = detectEncoding( data, bytes, default_encoding );
00116 String text;
00117 switch(enc)
00118 {
00119 case SE_ASCII:
00120 return fromAscii((char*)data, bytes);
00121 break;
00122 case SE_LATIN1:
00123 return fromLatin1((char*)data, bytes);
00124 break;
00125 case SE_UTF8:
00126 return fromUTF8((char*)data, bytes);
00127 break;
00128 case SE_UTF16_BE:
00129 return fromUTF16BE((unsigned short*)data, bytes );
00130 break;
00131 case SE_UTF16_LE:
00132 return fromUTF16LE((unsigned short*)data, bytes );
00133 break;
00134 case SE_Unknown:
00135 Log::error("String::loadText() unknown encoding.\n");
00136 break;
00137 case SE_UTF32_BE:
00138 case SE_UTF32_LE:
00139 Log::error("String::loadText() SE_UTF32_BE/SE_UTF32_LE encoding not supported.\n");
00140 break;
00141 }
00142 return String();
00143 }
00144
00145 String& String::resize(int character_count)
00146 {
00147 acquireData();
00148 mString->resize(character_count);
00149 return *this;
00150 }
00151
00152 String String::substring(int start, int count) const
00153 {
00154
00155 if ( empty() )
00156 return String();
00157
00158 if (start<0)
00159 start = 0;
00160 if (count<0)
00161 count = length();
00162 int end_idx = start+count-1;
00163 if (end_idx > length()-1 )
00164 end_idx = length()-1;
00165
00166 String str;
00167 str.acquireData();
00168 int sz = end_idx - start + 1;
00169 sz = sz < 0 ? 0 : sz;
00170 str.mString->resize( sz );
00171 for(int i=0; i<(int)str.mString->length(); ++i)
00172 (*str.mString)[i] = (*mString)[start+i];
00173 return str;
00174 }
00175
00176 int String::findBackwards(wchar_t ch) const
00177 {
00178
00179 if (empty())
00180 return -1;
00181
00182 for(int i=length(); i--; )
00183 if ((*mString)[i] == ch)
00184 return i;
00185 return -1;
00186 }
00187
00188 int String::findBackwards(const String& str) const
00189 {
00190
00191 if (empty())
00192 return -1;
00193
00194 if (str.length() < length())
00195 {
00196 for(int i = length() - str.length()+1; i--; )
00197 {
00198 int j=0;
00199 for(; j<str.length(); ++j)
00200 {
00201 if ( str[j] != (*mString)[i+j] )
00202 break;
00203 }
00204 if ( j == str.length() )
00205 return i;
00206 }
00207 }
00208 return -1;
00209 }
00210
00211 bool String::contains(wchar_t ch) const
00212 {
00213 return find(ch) != -1;
00214 }
00215
00216 int String::find(wchar_t ch, int start) const
00217 {
00218
00219 if (empty())
00220 return -1;
00221
00222 for(int i=start; i<length(); ++i)
00223 if ((*mString)[i] == ch)
00224 return i;
00225 return -1;
00226 }
00227
00228 bool String::contains(const String& substr) const
00229 {
00230 return find(substr) != -1;
00231 }
00232
00233 namespace
00234 {
00235
00236 int String_Quick_Search(const wchar_t*x, int m, const wchar_t*y, int n)
00237 {
00238 int qsBc[0x10000];
00239 for (int i = 0; i < 0x10000; ++i)
00240 qsBc[i] = m + 1;
00241 for (int i = 0; i < m; ++i)
00242 qsBc[x[i]] = m - i;
00243 for(int j=0; j <= n - m; j += qsBc[y[j + m]] )
00244 if (memcmp(x, y + j, m*sizeof(wchar_t)) == 0)
00245 return j;
00246 return -1;
00247 }
00248 }
00249
00250 int String::findInLargeText(const String& substr, int start) const
00251 {
00252
00253 if (empty())
00254 return -1;
00255
00256 if ( substr.length() > length() || start >= length() || substr.empty() || empty() )
00257 return -1;
00258 {
00259 int pos = String_Quick_Search( &(*substr.mString)[0], substr.length(), &(*mString)[0]+start, length()-start );
00260 return pos >= 0 ? pos + start : pos;
00261 }
00262 }
00263
00264 int String::find(const String& substr, int start) const
00265 {
00266
00267 if (empty())
00268 return -1;
00269
00270 if ( substr.length() > length() || start >= length() || substr.empty() || empty() )
00271 return -1;
00272 {
00273 int max = length() - substr.length();
00274 for(int i=start; i<=max; ++i)
00275 {
00276 int j=0;
00277 for(; j<substr.length(); ++j)
00278 {
00279 if (substr[j] != (*mString)[i+j])
00280 break;
00281 }
00282 if (j == substr.length())
00283 return i;
00284 }
00285 return -1;
00286 }
00287 }
00288
00289 void String::squeeze()
00290 {
00291 if(empty())
00292 return;
00293
00294 mString->squeeze();
00295 }
00296
00297 String& String::fill(wchar_t ch)
00298 {
00299 acquireData();
00300
00301 for(int i=0; i<length(); ++i)
00302 (*mString)[i] = ch;
00303 return *this;
00304 }
00305
00306 String& String::trim(const String& chars)
00307 {
00308 acquireData();
00309
00310 while( chars.length() )
00311 {
00312 int len = length();
00313 for( int i=0; i<chars.length(); ++i)
00314 trim(chars[i]);
00315 if ( len == length())
00316 break;
00317 }
00318 return *this;
00319 }
00320
00321 String& String::trim(wchar_t ch)
00322 {
00323 acquireData();
00324
00325 if (length())
00326 {
00327 int pos = 0;
00328 while( (*mString)[pos] == ch )
00329 pos++;
00330 if (pos)
00331 *this = substring(pos);
00332 pos = length()-1;
00333 while( pos >=0 && (*mString)[pos] == ch )
00334 pos--;
00335 pos++;
00336 if (pos != length())
00337 *this = substring(0,pos);
00338 }
00339 return *this;
00340 }
00341
00342 String& String::trim()
00343 {
00344 acquireData();
00345
00346 trim("\n\r\t\v ");
00347
00348 return *this;
00349 }
00350
00351 void String::split(const String& separator_list, std::vector<String>& fields, bool remove_empty) const
00352 {
00353 fields.clear();
00354
00355
00356 if (empty())
00357 return;
00358
00359 if ( length() )
00360 {
00361 fields.push_back( String() );
00362 fields.back().acquireData();
00363 fields.back().mString->clear();
00364 for(int i=0; i<length(); ++i)
00365 {
00366 if ( separator_list.contains((*mString)[i]) )
00367 {
00368 fields.push_back( String() );
00369 fields.back().acquireData();
00370 fields.back().mString->clear();
00371 continue;
00372 }
00373 fields.back().mString->push_back( (*mString)[i] );
00374 }
00375 }
00376
00377 if (remove_empty)
00378 {
00379 for ( size_t i=fields.size(); i--; )
00380 if (fields[i].empty())
00381 fields.erase(fields.begin() + i);
00382 }
00383 }
00384
00385 void String::split(wchar_t separator, std::vector<String>& fields, bool remove_empty) const
00386 {
00387 fields.clear();
00388
00389
00390 if (empty())
00391 return;
00392
00393 if ( length() )
00394 {
00395 fields.push_back( String() );
00396 fields.back().acquireData();
00397 fields.back().mString->clear();
00398 for(int i=0; i<length(); ++i)
00399 {
00400 if ((*mString)[i] == separator)
00401 {
00402 fields.push_back( String() );
00403 fields.back().acquireData();
00404 fields.back().mString->clear();
00405 continue;
00406 }
00407 fields.back().mString->push_back( (*mString)[i] );
00408 }
00409 }
00410
00411 if (remove_empty)
00412 {
00413 for ( size_t i=fields.size(); i--; )
00414 if (fields[i].empty())
00415 fields.erase(fields.begin() + i);
00416 }
00417 }
00418
00419 void String::splitLines(std::vector<String>& lines) const
00420 {
00421 lines.clear();
00422
00423
00424 if (empty())
00425 return;
00426
00427 if ( length() )
00428 {
00429 lines.push_back( String() );
00430 lines.back().acquireData();
00431 lines.back().mString->clear();
00432 for(int i=0; i<length(); ++i)
00433 {
00434 if ((*mString)[i] == '\n' || (*mString)[i] == '\r')
00435 {
00436 lines.push_back( String() );
00437 lines.back().acquireData();
00438 lines.back().mString->clear();
00439
00440 if ((*mString)[i] == '\n' && (*mString)[i+1] == '\r')
00441 ++i;
00442 else
00443 if ((*mString)[i] == '\r' && (*mString)[i+1] == '\n')
00444 ++i;
00445 continue;
00446 }
00447 lines.back().mString->push_back( (*mString)[i] );
00448 }
00449 }
00450 }
00451
00452 String String::field(wchar_t separator, int field_index) const
00453 {
00454 String field;
00455 int field_count = 0;
00456 int i=0;
00457 for(; i<length() && field_count<field_index; ++i)
00458 {
00459 if ( (*this)[i] == separator )
00460 ++field_count;
00461 }
00462
00463 for(; i<length() && (*this)[i] != separator; ++i)
00464 field+=(*this)[i];
00465 return field;
00466 }
00467
00468 String& String::remove(const String& str, int start, int count)
00469 {
00470 acquireData();
00471
00472 if (count == 0)
00473 return *this;
00474 if (count<0)
00475 count = length();
00476 int removed = 0;
00477 for( int pos = find(str, start); pos != -1 && removed<count; start=pos, pos=find(str, start), ++removed)
00478 remove( pos, str.length() );
00479 return *this;
00480 }
00481
00482 String& String::remove( int start, int count )
00483 {
00484 if (count == 0)
00485 return *this;
00486
00487 acquireData();
00488
00489 String tmp;
00490 tmp.acquireData();
00491 tmp.mString->clear();
00492 int end = start + count-1;
00493 for( int i=0; i<length(); i++ )
00494 if (i<start || i>end)
00495 tmp.mString->push_back((*mString)[i]);
00496
00497 mString = tmp.mString;
00498 return *this;
00499 }
00500
00501 String& String::remove(wchar_t ch, int start, int count)
00502 {
00503 acquireData();
00504
00505 if (count<0)
00506 count = length();
00507 String tmp = *this;
00508 tmp.acquireData();
00509 mString->clear();
00510 int removed = 0;
00511 for(int i=0; i<tmp.length(); ++i)
00512 if ( tmp[i]!=ch || removed==count || i<start)
00513 mString->push_back( tmp[i] );
00514 else
00515 ++removed;
00516
00517 return *this;
00518 }
00519
00520 String& String::reverse()
00521 {
00522 acquireData();
00523 int count = length() / 2;
00524 for(int i=0; i<count; ++i)
00525 {
00526 wchar_t tmp = (*this)[i];
00527 (*this)[i] = (*this)[length() - 1 - i];
00528 (*this)[length() - 1 - i] = tmp;
00529 }
00530 return *this;
00531 }
00532
00533 String& String::normalizeSlashes()
00534 {
00535 replace('\\', '/');
00536 int len=0;
00537 do
00538 {
00539 len=length();
00540 replace("//", "/");
00541 }
00542 while(len!=length());
00543 return *this;
00544 }
00545
00546 String& String::append(wchar_t ch, int count)
00547 {
00548 acquireData();
00549
00550 for(int i=0; i<count; ++i)
00551 mString->push_back(ch);
00552 return *this;
00553 }
00554
00555 String& String::append(const String& other)
00556 {
00557 acquireData();
00558
00559 for(int i=0; i<other.length(); ++i)
00560 mString->push_back(other[i]);
00561 return *this;
00562 }
00563
00564 String& String::prepend(const String& str)
00565 {
00566 return insert(0, str);
00567 }
00568
00569 String& String::prepend(wchar_t ch, int count)
00570 {
00571 return insert(0, ch, count);
00572 }
00573
00574 String& String::replace( int start, int count, const String& str )
00575 {
00576 remove(start, count);
00577 insert(start, str);
00578 return *this;
00579 }
00580
00581 String& String::replace( const String& oldstr, const String& newstr, bool case_sensitive )
00582 {
00583 acquireData();
00584 String supstr = case_sensitive ? *this : toLowerCase();
00585 String substr = case_sensitive ? oldstr : oldstr.toLowerCase();
00586
00587 std::vector<int> positions;
00588 for( int pos = 0; (pos=supstr.find(substr,pos)) != -1; pos += substr.length() )
00589 positions.push_back(pos);
00590
00591
00592 for(unsigned i=positions.size(); i--; )
00593 replace(positions[i], oldstr.length(), newstr);
00594
00595 return *this;
00596 }
00597
00598 String& String::replace( int start, int count, wchar_t ch )
00599 {
00600 acquireData();
00601
00602 if (start < 0 )
00603 start = 0;
00604 if (count < 0)
00605 count = length();
00606 int end = start + count;
00607 if (end > length())
00608 end = length();
00609 for(int i=start; i<end; ++i)
00610 (*mString)[i] = ch;
00611 return *this;
00612 }
00613
00614 String& String::replace( wchar_t old_ch, wchar_t new_ch )
00615 {
00616 acquireData();
00617
00618 for(int i=0; i<length(); ++i)
00619 if ((*mString)[i] == old_ch)
00620 (*mString)[i] = new_ch;
00621 return *this;
00622 }
00623
00624 int String::count(wchar_t ch, int start) const
00625 {
00626
00627 if (empty())
00628 return 0;
00629
00630 int num = 0;
00631 for(int i=start; i<length(); ++i)
00632 if ((*mString)[i] == ch)
00633 ++num;
00634 return num;
00635 }
00636
00637 int String::count(const String& str, int start) const
00638 {
00639
00640 if (empty())
00641 return 0;
00642
00643 int found = 0;
00644 for( int pos = find(str, start); pos != -1; start=pos+str.length(), pos=find(str, start))
00645 ++found;
00646 return found;
00647 }
00648
00649 int String::compare(const String& other) const
00650 {
00651 createData();
00652
00653 int min = length() < other.length() ? length() : other.length();
00654 for(int i=0; i<min; ++i)
00655 {
00656 if ( (*mString)[i] != (*other.mString)[i] )
00657 return (int)(*mString)[i] - (int)(*other.mString)[i];
00658 }
00659
00660 return length() - other.length();
00661 }
00662
00663 bool String::endsWith(const String& str) const
00664 {
00665
00666 if (empty())
00667 return false;
00668
00669 if (length() < str.length() || empty() || str.empty() )
00670 return false;
00671 else
00672 {
00673 int offset = length() - str.length();
00674 return memcmp( &(*mString)[0] + offset, &(*str.mString)[0], sizeof((*mString)[0])*str.length() ) == 0;
00675 }
00676 }
00677
00678 bool String::startsWith(const String& str) const
00679 {
00680
00681 if (str.empty())
00682 return true;
00683
00684 if (empty())
00685 return false;
00686
00687 if (length() < str.length() || empty() || str.empty() )
00688 return false;
00689 else
00690 {
00691 return memcmp( &(*mString)[0], &(*str.mString)[0], sizeof((*mString)[0])*str.length() ) == 0;
00692 }
00693 }
00694
00695 bool String::endsWith(wchar_t ch) const
00696 {
00697
00698 if (empty())
00699 return false;
00700
00701 return length() > 0 && (*mString)[length()-1] == ch;
00702 }
00703
00704 bool String::startsWith(wchar_t ch) const
00705 {
00706
00707 if (empty())
00708 return false;
00709
00710 return length() > 0 && (*mString)[0] == ch;
00711 }
00712
00713 String String::toLowerCase() const
00714 {
00715
00716 if (empty())
00717 return String();
00718
00719 String lower = *this;
00720 lower.acquireData();
00721 for(int i=0; i<length(); ++i)
00722 (*lower.mString)[i] = getLowerCase( (*lower.mString)[i] );
00723 return lower;
00724 }
00725
00726 String String::toUpperCase() const
00727 {
00728
00729 if (empty())
00730 return String();
00731
00732 String lower = *this;
00733 lower.acquireData();
00734 for(int i=0; i<length(); ++i)
00735 (*lower.mString)[i] = getUpperCase( (*lower.mString)[i] );
00736 return lower;
00737 }
00738
00739 String& String::insert(int pos, const String& str)
00740 {
00741 if (str.empty())
00742 return *this;
00743
00744 acquireData();
00745
00746 if (pos > length())
00747 return append(str);
00748 int remaining = length() - pos;
00749 mString->resize( mString->length() + str.length() );
00750 memmove( &(*mString)[0]+pos+str.length(), &(*mString)[0]+pos, sizeof(str[0])*remaining );
00751 memcpy( &(*mString)[0]+pos, &(*str.mString)[0], sizeof(str[0])*str.length() );
00752 return *this;
00753 }
00754
00755 String& String::insert(int pos, wchar_t ch, int count)
00756 {
00757 if (count == 0)
00758 return *this;
00759
00760 acquireData();
00761
00762 if (pos >= length())
00763 return append(ch, count);
00764 int remaining = length() - pos;
00765 mString->resize( mString->length() + count );
00766 memmove( &(*mString)[0]+pos+count, &(*mString)[0]+pos, sizeof((*mString)[0])*remaining );
00767 for(int i=0; i<count && i+pos<length(); ++i)
00768 (*mString)[i+pos] = ch;
00769 return *this;
00770 }
00771
00772 String String::left(int count) const
00773 {
00774 if (count<0)
00775 return substring(0, length()+count);
00776 else
00777 return substring(0, count);
00778 }
00779
00780 String String::right(int count) const
00781 {
00782 if (count<0)
00783 return substring(-count, length()+count);
00784 else
00785 return substring(length()-count, count);
00786 }
00787
00788 String String::extractPath() const
00789 {
00790
00791 if (empty())
00792 return String();
00793
00794 String path = *this;
00795 path.normalizeSlashes();
00796 int slash_pos = path.findBackwards('/');
00797 if (slash_pos<0)
00798 return String();
00799 else
00800 return path.substring(0,slash_pos+1);
00801 }
00802
00803 String String::extractFileName() const
00804 {
00805
00806 if (empty())
00807 return String();
00808
00809 int a = findBackwards('/');
00810 int b = findBackwards('\\');
00811 int slash_pos = a > b ? a : b;
00812 return substring(slash_pos+1);
00813 }
00814
00815 String String::extractFileExtension(bool require_dot) const
00816 {
00817
00818 if (empty())
00819 return String();
00820
00821 int dot_pos = findBackwards('.');
00822 if (require_dot && dot_pos == -1)
00823 return String();
00824 else
00825 return substring(dot_pos+1);
00826 }
00827
00828 String String::fromStdWString(const std::wstring& str)
00829 {
00830 String s;
00831 s.acquireData();
00832
00833 s.mString->clear();
00834 for(int i=0; i<(int)str.length(); ++i)
00835 s.mString->push_back( str[i] );
00836 return s;
00837 }
00838
00839 String String::fromStdString(const std::string& str)
00840 {
00841 return fromAscii( str.c_str() );
00842 }
00843
00844 String String::fromAscii(const char* str, int size)
00845 {
00846 String s;
00847 s.acquireData();
00848
00849 if (size<0)
00850 size = (int)strlen(str);
00851 const unsigned char* ascii = (const unsigned char*)str;
00852 s.mString->clear();
00853 for(int i=0; i<size; ++i)
00854 {
00855 if( ascii[i] < 128 )
00856 s.mString->push_back( ascii[i] );
00857 else
00858
00859 s.mString->push_back( L'?' );
00860 }
00861
00862 return s;
00863 }
00864
00865 String String::fromUTF16BE(const unsigned short* str, int byte_count)
00866 {
00867 String s;
00868 s.acquireData();
00869
00870 VL_COMPILE_TIME_CHECK( sizeof(unsigned short) == 2 )
00871 int character_count = byte_count < 0 ? -1 : byte_count / 2;
00872
00873
00874 if (character_count<0)
00875 for(character_count=0; str[character_count]; ) ++character_count;
00876
00877
00878 if (str[0] == 65534)
00879 {
00880 str++;
00881 --character_count;
00882 }
00883
00884 s.mString->clear();
00885 for(int i=0; i<character_count; ++i)
00886 {
00887 const unsigned char* bytes = (const unsigned char*)(str+i);
00888 unsigned int code = bytes[1] + (bytes[0]<<8);
00889
00890 if (code>=0xD800 && code <=0xDC00)
00891 {
00892 s.mString->push_back( '?' );
00893 ++i;
00894 }
00895 else
00896 s.mString->push_back( (wchar_t)code );
00897 }
00898 return s;
00899 }
00900
00901 String String::fromUTF16LE(const unsigned short* str, int byte_count)
00902 {
00903 String s;
00904 s.acquireData();
00905
00906 VL_COMPILE_TIME_CHECK( sizeof(unsigned short) == 2 )
00907 int character_count = byte_count < 0 ? -1 : byte_count / 2;
00908
00909
00910 if (character_count<0)
00911 for(character_count=0; str[character_count]; ) ++character_count;
00912
00913
00914 if (str[0] == 65279)
00915 {
00916 str++;
00917 --character_count;
00918 }
00919
00920 s.mString->clear();
00921 for(int i=0; i<character_count; ++i)
00922 {
00923 unsigned char* bytes = (unsigned char*)(str+i);
00924 unsigned int code = bytes[0] + (bytes[1]<<8);
00925
00926 if (code>=0xD800 && code <=0xDC00)
00927 {
00928 s.mString->push_back( '?' );
00929 ++i;
00930 }
00931 else
00932 s.mString->push_back( (wchar_t)code );
00933 }
00934 return s;
00935 }
00936
00937 String String::fromUTF16(const unsigned short* str, int byte_count)
00938 {
00939 String s;
00940 s.acquireData();
00941
00942 if (str[0] == 65279)
00943 s = fromUTF16LE(str, byte_count);
00944 else
00945 if (str[0] == 65534)
00946 s = fromUTF16BE(str, byte_count);
00947 else
00948 {
00949 Log::error("String::fromUTF16(): not UTF16 BE nor LE found.\n");
00950 s.clear();
00951 }
00952 return s;
00953 }
00954
00955 String String::fromUTF8(const char* str, int byte_count)
00956 {
00957 String s;
00958 s.acquireData();
00959
00960 unsigned char* utf8 = (unsigned char*)str;
00961 int start=0;
00962
00963 if ( utf8[0] == 0xEF && utf8[1] == 0xBB && utf8[2] == 0xBF )
00964 start=3;
00965
00966 if (byte_count<0)
00967 for(byte_count=0; utf8[byte_count]; ) ++byte_count;
00968
00969 s.mString->clear();
00970 const int UTF8_1BYTE = 128;
00971 const int UTF8_2BYTE = 128+64;
00972 const int UTF8_3BYTE = 128+64+32;
00973 const int UTF8_4BYTE = 128+64+32+16;
00974
00975 for( int i=start; i<byte_count; ++i )
00976 {
00977
00978
00979
00980
00981
00982
00983 unsigned int unicode_code_point = 0;
00984 if (utf8[i] < UTF8_1BYTE)
00985 unicode_code_point = utf8[i];
00986 else
00987 if ( (utf8[i] & UTF8_3BYTE) == UTF8_2BYTE )
00988 {
00989 unicode_code_point = ((utf8[i]-UTF8_2BYTE)<<6) + (utf8[i+1]&0x3f);
00990 i+=1;
00991 }
00992 else
00993 if ( (utf8[i] & UTF8_4BYTE) == UTF8_3BYTE )
00994 {
00995 unicode_code_point = ((utf8[i]-UTF8_3BYTE)<<12) + ((utf8[i+1]&0x3f)<<6) + (utf8[i+2]&0x3f);
00996 i+=2;
00997 }
00998 else
00999 {
01000 unicode_code_point = ((utf8[i]-UTF8_4BYTE)<<18) + ((utf8[i+1]&0x3f)<<12) + ((utf8[i+2]&0x3f)<<6) + (utf8[i+3]&0x3f);
01001 i+=3;
01002 }
01003
01004 if (unicode_code_point <= 0xFFFF)
01005 s.mString->push_back((wchar_t)unicode_code_point);
01006 else
01007 s.mString->push_back(L'?');
01008 }
01009 return s;
01010 }
01011
01012 String String::fromLatin1(const char* str, int character_count)
01013 {
01014 String s;
01015 s.acquireData();
01016
01017 unsigned char* latin1 = (unsigned char*)str;
01018 if (character_count<0)
01019 for(character_count=0; latin1[character_count]; ) ++character_count;
01020
01021 s.mString->clear();
01022 for(int i=0; i<character_count; ++i)
01023 s.mString->push_back( latin1_to_unicode[ latin1[i] ] );
01024 return s;
01025 }
01026
01027 String String::fromInt(int value)
01028 {
01029 char buffer[256];
01030 memset(buffer, 0, 256);
01031 sprintf(buffer, "%d", value);
01032 return fromAscii(buffer);
01033 }
01034
01035 String String::fromUInt(unsigned int value)
01036 {
01037 char buffer[256];
01038 memset(buffer, 0, 256);
01039 sprintf(buffer, "%u", value);
01040 return fromAscii(buffer);
01041 }
01042
01043 String String::fromLongLong(long long value)
01044 {
01045 char buffer[256];
01046 memset(buffer, 0, 256);
01047 sprintf(buffer, "%lld", value);
01048 return fromAscii(buffer);
01049 }
01050
01051 String String::fromULongLong(unsigned long long value)
01052 {
01053 char buffer[256];
01054 memset(buffer, 0, 256);
01055 sprintf(buffer, "%llu", value);
01056 return fromAscii(buffer);
01057 }
01058
01059 String String::fromDouble(double value, int decimals)
01060 {
01061 char buffer[256];
01062 memset(buffer, 0, 256);
01063 switch(decimals)
01064 {
01065 case 0: sprintf(buffer, "%.0lf", value); break;
01066 case 1: sprintf(buffer, "%.1lf", value); break;
01067 case 2: sprintf(buffer, "%.2lf", value); break;
01068 case 3: sprintf(buffer, "%.3lf", value); break;
01069 case 4: sprintf(buffer, "%.4lf", value); break;
01070 case 5: sprintf(buffer, "%.5lf", value); break;
01071 case 6: sprintf(buffer, "%.6lf", value); break;
01072 case 7: sprintf(buffer, "%.7lf", value); break;
01073 case 8: sprintf(buffer, "%.8lf", value); break;
01074 case 9: sprintf(buffer, "%.9lf", value); break;
01075 case 10: sprintf(buffer, "%.10lf", value); break;
01076 case 11: sprintf(buffer, "%.11lf", value); break;
01077 case 12: sprintf(buffer, "%.12lf", value); break;
01078 case 13: sprintf(buffer, "%.13lf", value); break;
01079 case 14: sprintf(buffer, "%.14lf", value); break;
01080 case 15: sprintf(buffer, "%.15lf", value); break;
01081 case 16: sprintf(buffer, "%.16lf", value); break;
01082 case 17: sprintf(buffer, "%.17lf", value); break;
01083 case 18: sprintf(buffer, "%.18lf", value); break;
01084 case 19: sprintf(buffer, "%.19lf", value); break;
01085 case 20: sprintf(buffer, "%.20lf", value); break;
01086 default: sprintf(buffer, "%.6lf", value); break;
01087 }
01088 return fromAscii(buffer);
01089 }
01090
01091 std::wstring String::toStdWString() const
01092 {
01093
01094 if (empty())
01095 return std::wstring();
01096
01097 std::wstring ws;
01098 for(int i=0; i<length(); ++i)
01099 ws += (*mString)[i];
01100 return ws;
01101 }
01102
01103 std::string String::toStdString() const
01104 {
01105
01106 if (empty())
01107 return std::string();
01108 std::string std_string;
01109
01110 std::vector<unsigned char> utf8;
01111 toUTF8(utf8, false);
01112 if (utf8.size()>1)
01113 {
01114 std_string.resize(utf8.size()-1);
01115 memcpy(&std_string[0], &utf8[0], utf8.size()-1);
01116 }
01117
01118 return std_string;
01119 }
01120
01121 void String::toAscii(std::string& ascii, bool translate_non_ascii_chars) const
01122 {
01123
01124 if (empty())
01125 {
01126 ascii.clear();
01127 return;
01128 }
01129
01130 ascii.clear();
01131 if (mString->length())
01132 {
01133 for(int i=0; i<(int)mString->length() && (*mString)[i]; ++i)
01134 {
01135 if ( (*mString)[i] < 128 || !translate_non_ascii_chars )
01136 ascii += (char)((*mString)[i] & 0xFF);
01137 else
01138 {
01139 const char* translation = unicode_to_ascii( (*mString)[i] );
01140 if (translation)
01141 {
01142 for(int j=0; translation[j]; ++j)
01143 ascii += translation[j];
01144 }
01145 else
01146 ascii += '?';
01147 }
01148 }
01149 }
01150
01151 }
01152
01153 void String::toUTF8(std::string& str, bool include_utf8_signature) const
01154 {
01155 std::vector<unsigned char> utf8;
01156 toUTF8(utf8, include_utf8_signature);
01157 str.clear();
01158 if (utf8.size())
01159 {
01160 for(int i=0; utf8[i]; ++i)
01161 str.push_back(utf8[i]);
01162 }
01163 }
01164
01165 void String::toUTF8(std::vector<unsigned char>& utf8, bool include_utf8_signature) const
01166 {
01167 utf8.clear();
01168 if(include_utf8_signature)
01169 {
01170 utf8.push_back(0xEF);
01171 utf8.push_back(0xBB);
01172 utf8.push_back(0xBF);
01173 }
01174
01175
01176 if (empty())
01177 {
01178 utf8.push_back(0);
01179 return;
01180 }
01181
01182
01183
01184
01185
01186
01187 for(int i=0; i<length(); ++i)
01188 {
01189 if ( (*mString)[i] < 0x80)
01190 utf8.push_back( (unsigned char)(*mString)[i] );
01191 else
01192 if ( (*mString)[i] < 0x800)
01193 {
01194 int a = 0xC0 | ((*mString)[i]>>6);
01195 int b = 0x80 | ((*mString)[i]&0x3F);
01196 utf8.push_back( (unsigned char)a );
01197 utf8.push_back( (unsigned char)b );
01198 }
01199 else
01200 {
01201 int a = 0xE0 | ((*mString)[i]>>12);
01202 int b = 0x80 | (((*mString)[i]>>6)&0x3F);
01203 int c = 0x80 | ((*mString)[i]&0x3F);
01204 utf8.push_back( (unsigned char)a );
01205 utf8.push_back( (unsigned char)b );
01206 utf8.push_back( (unsigned char)c );
01207 }
01208 }
01209
01210 utf8.push_back(0);
01211 }
01212
01213 void String::toUTF16BE(std::vector<unsigned char>& utf16, bool include_utf16be_signature) const
01214 {
01215 utf16.clear();
01216 if (include_utf16be_signature)
01217 {
01218 utf16.push_back(0xFE);
01219 utf16.push_back(0xFF);
01220 }
01221
01222
01223 if (empty())
01224 {
01225 utf16.push_back(0);
01226 return;
01227 }
01228
01229 for(int i=0; i<length(); ++i)
01230 {
01231 int x = ((*mString)[i]>>8) & 0xFF;
01232 int y = (*mString)[i] & 0xFF;
01233 utf16.push_back( (unsigned char)x );
01234 utf16.push_back( (unsigned char)y );
01235 }
01236 utf16.push_back(0);
01237 }
01238
01239 void String::toUTF16LE(std::vector<unsigned char>& utf16, bool include_utf16le_signature) const
01240 {
01241 utf16.clear();
01242 if (include_utf16le_signature)
01243 {
01244 utf16.push_back(0xFF);
01245 utf16.push_back(0xFE);
01246 }
01247
01248
01249 if (empty())
01250 {
01251 utf16.push_back(0);
01252 return;
01253 }
01254
01255 for(int i=0; i<length(); ++i)
01256 {
01257 int x = (*mString)[i] & 0xFF;
01258 int y = ((*mString)[i]>>8) & 0xFF;
01259 utf16.push_back( (unsigned char)x );
01260 utf16.push_back( (unsigned char)y );
01261 }
01262 utf16.push_back(0);
01263 }
01264
01265 void String::toLatin1(std::vector<unsigned char>& latin1) const
01266 {
01267 latin1.clear();
01268
01269
01270 if (empty())
01271 {
01272 latin1.push_back(0);
01273 return;
01274 }
01275
01276 for(int i=0; i<length(); ++i)
01277 {
01278 if ((*mString)[i] < 128)
01279 latin1.push_back((unsigned char)(*mString)[i]);
01280 else
01281 {
01282
01283 int j=128;
01284 for(; latin1_to_unicode[j]; ++j)
01285 {
01286 if ( latin1_to_unicode[j] == (*mString)[i] )
01287 {
01288 latin1.push_back((unsigned char)j);
01289 break;
01290 }
01291 }
01292 if (j==256)
01293 latin1.push_back('?');
01294 }
01295 }
01296 latin1.push_back(0);
01297 }
01298
01299 int String::toInt(bool hex) const
01300 {
01301
01302 if (empty())
01303 return 0;
01304
01305 if (hex)
01306 {
01307 int i=0;
01308 sscanf(toStdString().c_str(), "%x", &i);
01309 return i;
01310 }
01311 else
01312 return atoi( toStdString().c_str() );
01313 }
01314
01315 double String::toDouble() const
01316 {
01317
01318 if (empty())
01319 return 0.0;
01320
01321 return atof( toStdString().c_str() );
01322 }
01323
01324 void String::filterStrings(std::vector<String>& strings, const String& filter)
01325 {
01326 String match = filter;
01327 int filter_type = 0;
01328 bool filter_ok = filter.empty();
01329
01330 if ( filter.startsWith('*') )
01331 {
01332 filter_type--;
01333 match.remove(0, 1);
01334 filter_ok = true;
01335 }
01336
01337 if ( filter.endsWith('*') )
01338 {
01339 filter_type++;
01340 match.remove(match.length()-1, 1);
01341 filter_ok = true;
01342 }
01343
01344 if ( !filter_ok )
01345 {
01346 Log::error( Say("unacceptable filter '%s'.\n") << filter );
01347 return;
01348 }
01349
01350 if ( filter_type && filter.length() > 1 )
01351 {
01352 for( int i=(int)strings.size(); i--; )
01353 switch(filter_type)
01354 {
01355 case 0: if( strings[i].find(match) == -1 ) strings.erase( strings.begin() + i ); break;
01356 case -1: if( !strings[i].endsWith(match) ) strings.erase( strings.begin() + i ); break;
01357 case +1: if( !strings[i].startsWith(match) ) strings.erase( strings.begin() + i ); break;
01358 }
01359 }
01360 }
01361
01362 EStringEncoding String::detectEncoding(const void* str, int byte_count, EStringEncoding default_encoding)
01363 {
01364 const unsigned char* h = (unsigned char*)str;
01365
01366
01367
01368
01369
01370
01371 if (byte_count>4 && h[0] == 0xFF && h[1] == 0xFE && h[2] == 0 && h[3] == 0 ) return SE_UTF32_LE;
01372 if (byte_count>4 && h[0] == 0 && h[1] == 0 && h[2] == 0xFE && h[3] == 0xFF) return SE_UTF32_BE;
01373 if (byte_count>3 && h[0] == 0xEF && h[1] == 0xBB && h[2] == 0xBF ) return SE_UTF8;
01374 if (byte_count>2 && h[0] == 0xFE && h[1] == 0xFF ) return SE_UTF16_BE;
01375 if (byte_count>2 && h[0] == 0xFF && h[1] == 0xFE ) return SE_UTF16_LE;
01376 return default_encoding;
01377 }
01378
01379 unsigned short String::getUpperCase(unsigned short ch)
01380 {
01381 for(int i=0; i<107; ++i)
01382 {
01383 if (ch >= case_table_start_min_max[i][1] && ch <= case_table_start_min_max[i][2])
01384 {
01385 int index = ch - case_table_start_min_max[i][1] + case_table_start_min_max[i][0];
01386 return case_table_upper_lower_title[index][0];
01387 }
01388 }
01389 return ch;
01390 }
01391
01392 unsigned short String::getLowerCase(unsigned short ch)
01393 {
01394 for(int i=0; i<107; ++i)
01395 {
01396 if (ch >= case_table_start_min_max[i][1] && ch <= case_table_start_min_max[i][2])
01397 {
01398 int index = ch - case_table_start_min_max[i][1] + case_table_start_min_max[i][0];
01399 return case_table_upper_lower_title[index][1];
01400 }
01401 }
01402 return ch;
01403 }
01404
01405 unsigned short String::getTitleCase(unsigned short ch)
01406 {
01407 for(int i=0; i<107; ++i)
01408 {
01409 if (ch >= case_table_start_min_max[i][1] && ch <= case_table_start_min_max[i][2])
01410 {
01411 int index = ch - case_table_start_min_max[i][1] + case_table_start_min_max[i][0];
01412 return case_table_upper_lower_title[index][2];
01413 }
01414 }
01415 return ch;
01416 }
01417
01418 std::string String::trimStdString(const std::string& text)
01419 {
01420 std::string trimmed;
01421 for(unsigned i=0; i<text.length(); ++i)
01422 {
01423 if(text[i] == ' ' ||
01424 text[i] == '\n' ||
01425 text[i] == '\t' ||
01426 text[i] == '\v' ||
01427 text[i] == '\b' ||
01428 text[i] == '\a' ||
01429 text[i] == '\f' ||
01430 text[i] == '\r' )
01431 continue;
01432 else
01433 {
01434 trimmed = text.c_str() + i;
01435 break;
01436 }
01437 }
01438 int i = (int)trimmed.length();
01439 while( i-- )
01440 {
01441 if(trimmed[i] == ' ' ||
01442 trimmed[i] == '\n' ||
01443 trimmed[i] == '\t' ||
01444 trimmed[i] == '\v' ||
01445 trimmed[i] == '\b' ||
01446 trimmed[i] == '\a' ||
01447 trimmed[i] == '\f' ||
01448 trimmed[i] == '\r' )
01449 continue;
01450 else
01451 break;
01452 }
01453 trimmed.resize( i+1 );
01454 return trimmed;
01455 }
01456