| 1 |
// Copyright (c) 2009, Whispersoft s.r.l. |
|---|
| 2 |
// All rights reserved. |
|---|
| 3 |
// |
|---|
| 4 |
// Redistribution and use in source and binary forms, with or without |
|---|
| 5 |
// modification, are permitted provided that the following conditions are |
|---|
| 6 |
// met: |
|---|
| 7 |
// |
|---|
| 8 |
// * Redistributions of source code must retain the above copyright |
|---|
| 9 |
// notice, this list of conditions and the following disclaimer. |
|---|
| 10 |
// * Redistributions in binary form must reproduce the above |
|---|
| 11 |
// copyright notice, this list of conditions and the following disclaimer |
|---|
| 12 |
// in the documentation and/or other materials provided with the |
|---|
| 13 |
// distribution. |
|---|
| 14 |
// * Neither the name of Whispersoft s.r.l. nor the names of its |
|---|
| 15 |
// contributors may be used to endorse or promote products derived from |
|---|
| 16 |
// this software without specific prior written permission. |
|---|
| 17 |
// |
|---|
| 18 |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|---|
| 19 |
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|---|
| 20 |
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|---|
| 21 |
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|---|
| 22 |
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|---|
| 23 |
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|---|
| 24 |
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|---|
| 25 |
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|---|
| 26 |
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|---|
| 27 |
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|---|
| 28 |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|---|
| 29 |
// |
|---|
| 30 |
// Author: Cosmin Tudorache |
|---|
| 31 |
|
|---|
| 32 |
|
|---|
| 33 |
|
|---|
| 34 |
#include <string.h> |
|---|
| 35 |
#include <stdarg.h> |
|---|
| 36 |
#include <memory> |
|---|
| 37 |
#include <map> |
|---|
| 38 |
#include <unicode/utf8.h> // ICU/source/common/unicode/utf8.h |
|---|
| 39 |
|
|---|
| 40 |
#include "common/base/log.h" |
|---|
| 41 |
#include "common/base/strutil.h" |
|---|
| 42 |
#include "common/base/scoped_ptr.h" |
|---|
| 43 |
|
|---|
| 44 |
#ifndef PATH_SEPARATOR |
|---|
| 45 |
#define PATH_SEPARATOR '/' |
|---|
| 46 |
#endif |
|---|
| 47 |
|
|---|
| 48 |
namespace strutil { |
|---|
| 49 |
|
|---|
| 50 |
bool StrEql(const char* str1, const char* str2) { |
|---|
| 51 |
return (str1 == str2 || |
|---|
| 52 |
0 == strcmp(str1, str2)); |
|---|
| 53 |
} |
|---|
| 54 |
|
|---|
| 55 |
bool StrEql(const string& str1, const string& str2) { |
|---|
| 56 |
return str1 == str2; |
|---|
| 57 |
} |
|---|
| 58 |
|
|---|
| 59 |
bool StrIEql(const char* str1, const char* str2) { |
|---|
| 60 |
return str1 == str2 || 0 == strcasecmp(str1, str2); |
|---|
| 61 |
} |
|---|
| 62 |
|
|---|
| 63 |
bool StrIEql(const string& str1, const string& str2) { |
|---|
| 64 |
return 0 == ::strcasecmp(str1.c_str(), str2.c_str()); |
|---|
| 65 |
} |
|---|
| 66 |
|
|---|
| 67 |
bool StrPrefix(const char* str, const char* prefix) { |
|---|
| 68 |
if ( str == prefix ) { |
|---|
| 69 |
return true; |
|---|
| 70 |
} |
|---|
| 71 |
while ( *str && *prefix && *str == *prefix ) { |
|---|
| 72 |
str++; |
|---|
| 73 |
prefix++; |
|---|
| 74 |
} |
|---|
| 75 |
return *prefix == '\0'; |
|---|
| 76 |
} |
|---|
| 77 |
|
|---|
| 78 |
bool StrStartsWith(const char* str, const char* prefix) { |
|---|
| 79 |
return StrPrefix(str, prefix); |
|---|
| 80 |
} |
|---|
| 81 |
|
|---|
| 82 |
bool StrStartsWith(const string& str, const string& prefix) { |
|---|
| 83 |
return StrStartsWith(str.c_str(), prefix.c_str()); |
|---|
| 84 |
} |
|---|
| 85 |
|
|---|
| 86 |
bool StrCasePrefix(const char* str, const char* prefix) { |
|---|
| 87 |
if ( str == prefix ) { |
|---|
| 88 |
return true; |
|---|
| 89 |
} |
|---|
| 90 |
while ( *str && *prefix && (*str == *prefix || |
|---|
| 91 |
::toupper(*str) == ::toupper(*prefix)) ) { |
|---|
| 92 |
str++; |
|---|
| 93 |
prefix++; |
|---|
| 94 |
} |
|---|
| 95 |
return *prefix == '\0'; |
|---|
| 96 |
} |
|---|
| 97 |
|
|---|
| 98 |
bool StrIStartsWith(const char* str, const char* prefix) { |
|---|
| 99 |
return StrCasePrefix(str, prefix); |
|---|
| 100 |
} |
|---|
| 101 |
|
|---|
| 102 |
bool StrIStartsWith(const string& str, const string& prefix) { |
|---|
| 103 |
return StrIStartsWith(str.c_str(), prefix.c_str()); |
|---|
| 104 |
} |
|---|
| 105 |
|
|---|
| 106 |
bool StrSuffix(const char* str, const char* suffix) { |
|---|
| 107 |
size_t n1 = ::strlen(str); |
|---|
| 108 |
size_t n2 = ::strlen(suffix); |
|---|
| 109 |
const char* s1 = str + n1 - 1; |
|---|
| 110 |
const char* s2 = suffix + n2 - 1; |
|---|
| 111 |
|
|---|
| 112 |
while ( *s1 == *s2 && n1 > 0 && n2 > 0 ) { |
|---|
| 113 |
s1--; |
|---|
| 114 |
s2--; |
|---|
| 115 |
n1--; |
|---|
| 116 |
n2--; |
|---|
| 117 |
} |
|---|
| 118 |
|
|---|
| 119 |
return n2 == 0; |
|---|
| 120 |
} |
|---|
| 121 |
bool StrSuffix(const string& str, const string& suffix) { |
|---|
| 122 |
return StrSuffix(str.c_str(), suffix.c_str()); |
|---|
| 123 |
} |
|---|
| 124 |
bool StrEndsWith(const char* str, const char* suffix) { |
|---|
| 125 |
return StrSuffix(str, suffix); |
|---|
| 126 |
} |
|---|
| 127 |
bool StrEndsWith(const string& str, const string& suffix) { |
|---|
| 128 |
return StrEndsWith(str.c_str(), suffix.c_str()); |
|---|
| 129 |
} |
|---|
| 130 |
|
|---|
| 131 |
const char* StrFrontTrim(const char* str) { |
|---|
| 132 |
while ( isspace(*str) ) |
|---|
| 133 |
++str; |
|---|
| 134 |
return str; |
|---|
| 135 |
} |
|---|
| 136 |
|
|---|
| 137 |
string StrFrontTrim(const string& str) { |
|---|
| 138 |
size_t i = 0; |
|---|
| 139 |
while ( i < str.size() && isspace(str[i]) ) { |
|---|
| 140 |
++i; |
|---|
| 141 |
} |
|---|
| 142 |
return str.substr(i); |
|---|
| 143 |
} |
|---|
| 144 |
|
|---|
| 145 |
string StrTrim(const string& str) { |
|---|
| 146 |
int i = 0; |
|---|
| 147 |
int j = str.size() - 1; |
|---|
| 148 |
while ( i <= j && isspace(str[i]) ) { |
|---|
| 149 |
++i; |
|---|
| 150 |
} |
|---|
| 151 |
while ( j >= i && isspace(str[j]) ) { |
|---|
| 152 |
--j; |
|---|
| 153 |
} |
|---|
| 154 |
if ( j < i ) { |
|---|
| 155 |
return string(""); |
|---|
| 156 |
} |
|---|
| 157 |
return str.substr(i, j - i + 1); |
|---|
| 158 |
} |
|---|
| 159 |
|
|---|
| 160 |
string StrTrimChars(const string& str, const char* chars_to_trim) { |
|---|
| 161 |
int i = 0; |
|---|
| 162 |
int j = str.size() - 1; |
|---|
| 163 |
while ( i <= j && strchr(chars_to_trim, str[i]) != NULL ) { |
|---|
| 164 |
++i; |
|---|
| 165 |
} |
|---|
| 166 |
while ( j >= i && strchr(chars_to_trim, str[j]) != NULL ) { |
|---|
| 167 |
--j; |
|---|
| 168 |
} |
|---|
| 169 |
if ( j < i ) { |
|---|
| 170 |
return string(""); |
|---|
| 171 |
} |
|---|
| 172 |
return str.substr(i, j - i + 1); |
|---|
| 173 |
} |
|---|
| 174 |
|
|---|
| 175 |
string StrTrimCompress(const string& str) { |
|---|
| 176 |
string s; |
|---|
| 177 |
for ( size_t i = 0; i < str.size(); i++ ) { |
|---|
| 178 |
if ( !isspace(str[i]) ) |
|---|
| 179 |
s.append(1, str[i]); |
|---|
| 180 |
} |
|---|
| 181 |
return s; |
|---|
| 182 |
} |
|---|
| 183 |
|
|---|
| 184 |
|
|---|
| 185 |
void ShiftLeftBuffer(void* buf, |
|---|
| 186 |
size_t buf_size, |
|---|
| 187 |
size_t shift_size, |
|---|
| 188 |
int fill_value) { |
|---|
| 189 |
if ( shift_size == 0 ) return; |
|---|
| 190 |
CHECK_GE(buf_size, shift_size); |
|---|
| 191 |
uint8* const data = reinterpret_cast<uint8*>(buf); |
|---|
| 192 |
const size_t cb = buf_size - shift_size; |
|---|
| 193 |
memmove(data, data + shift_size, cb); |
|---|
| 194 |
memset(data + cb, fill_value, shift_size); |
|---|
| 195 |
} |
|---|
| 196 |
|
|---|
| 197 |
const char* Basename(const char* filename) { |
|---|
| 198 |
const char* sep = strrchr(filename, PATH_SEPARATOR); |
|---|
| 199 |
return sep ? sep + 1 : filename; |
|---|
| 200 |
} |
|---|
| 201 |
|
|---|
| 202 |
string Basename(const string& filename) { |
|---|
| 203 |
return Basename(filename.c_str()); |
|---|
| 204 |
} |
|---|
| 205 |
|
|---|
| 206 |
string Dirname(const string& filename) { |
|---|
| 207 |
string::size_type sep = filename.rfind(PATH_SEPARATOR); |
|---|
| 208 |
return filename.substr(0, (sep == string::npos) ? 0 : sep); |
|---|
| 209 |
} |
|---|
| 210 |
string CutExtension(const string& filename) { |
|---|
| 211 |
string::size_type dot_pos = filename.rfind('.'); |
|---|
| 212 |
return (dot_pos == string::npos) ? filename : filename.substr(0, dot_pos); |
|---|
| 213 |
} |
|---|
| 214 |
string Extension(const string& filename) { |
|---|
| 215 |
string::size_type dot_pos = filename.rfind('.'); |
|---|
| 216 |
return (dot_pos == string::npos) ? string("") : filename.substr(dot_pos + 1); |
|---|
| 217 |
} |
|---|
| 218 |
|
|---|
| 219 |
string NormalizeUrlPath(const string& path) { |
|---|
| 220 |
if ( path == "" ) { |
|---|
| 221 |
return "/"; |
|---|
| 222 |
} |
|---|
| 223 |
string ret(strutil::NormalizePath(path)); |
|---|
| 224 |
int i = 0; |
|---|
| 225 |
while ( i < ret.size() && ret[i] == '/' ) { |
|---|
| 226 |
++i; |
|---|
| 227 |
} |
|---|
| 228 |
if ( i >= ret.size() ) { |
|---|
| 229 |
return "/"; |
|---|
| 230 |
} |
|---|
| 231 |
if ( i == 0 ) { |
|---|
| 232 |
return ret; |
|---|
| 233 |
} |
|---|
| 234 |
return ret.substr(i - 1); |
|---|
| 235 |
} |
|---|
| 236 |
|
|---|
| 237 |
string NormalizePath(const string& path) { |
|---|
| 238 |
string s(path); |
|---|
| 239 |
|
|---|
| 240 |
// Normalize the slashes and add leading slash if necessary |
|---|
| 241 |
for ( size_t i = 0; i < s.size(); ++i ) { |
|---|
| 242 |
if ( s[i] == '\\' ) { |
|---|
| 243 |
s[i] = '/'; |
|---|
| 244 |
} |
|---|
| 245 |
} |
|---|
| 246 |
bool slash_added = false; |
|---|
| 247 |
if ( s[0] != '/' ) { |
|---|
| 248 |
s = string("/") + s; |
|---|
| 249 |
slash_added = true; |
|---|
| 250 |
} |
|---|
| 251 |
|
|---|
| 252 |
// Resolve occurrences of "///" in the normalized path |
|---|
| 253 |
while ( true ) { |
|---|
| 254 |
const size_t index = s.find("///"); |
|---|
| 255 |
if ( index == string::npos ) break; |
|---|
| 256 |
s = s.substr(0, index) + s.substr(index + 2); |
|---|
| 257 |
} |
|---|
| 258 |
// Resolve occurrences of "//" in the normalized path (but not beginning !) |
|---|
| 259 |
while ( true ) { |
|---|
| 260 |
const size_t index = s.find("//", 1); |
|---|
| 261 |
if ( index == string::npos ) break; |
|---|
| 262 |
s = s.substr(0, index) + s.substr(index + 1); |
|---|
| 263 |
} |
|---|
| 264 |
// Resolve occurrences of "/./" in the normalized path |
|---|
| 265 |
while ( true ) { |
|---|
| 266 |
const size_t index = s.find("/./"); |
|---|
| 267 |
if ( index == string::npos ) break; |
|---|
| 268 |
s = s.substr(0, index) + s.substr(index + 2); |
|---|
| 269 |
} |
|---|
| 270 |
// Resolve occurrences of "/../" in the normalized path |
|---|
| 271 |
while ( true ) { |
|---|
| 272 |
const size_t index = s.find("/../"); |
|---|
| 273 |
if ( index == string::npos ) break; |
|---|
| 274 |
if ( index == 0 ) |
|---|
| 275 |
return slash_added ? "" : "/"; // The only left path is the root. |
|---|
| 276 |
const size_t index2 = s.find_last_of('/', index - 1); |
|---|
| 277 |
if ( index2 == string::npos ) |
|---|
| 278 |
return slash_added ? "": "/"; |
|---|
| 279 |
s = s.substr(0, index2) + s.substr(index + 3); |
|---|
| 280 |
} |
|---|
| 281 |
// Resolve ending "/.." and "/." |
|---|
| 282 |
{ |
|---|
| 283 |
const size_t index = s.rfind("/."); |
|---|
| 284 |
if ( index != string::npos && index == s.length() - 2 ) { |
|---|
| 285 |
s = s.substr(0, index); |
|---|
| 286 |
} |
|---|
| 287 |
} |
|---|
| 288 |
{ |
|---|
| 289 |
size_t index = s.rfind("/.."); |
|---|
| 290 |
if ( index != string::npos && index == s.length() - 3 ) { |
|---|
| 291 |
if ( index == 0 ) |
|---|
| 292 |
return slash_added ? "": "/"; |
|---|
| 293 |
const size_t index2 = s.find_last_of('/', index - 1); |
|---|
| 294 |
if ( index2 == string::npos ) |
|---|
| 295 |
return slash_added ? "": "/"; |
|---|
| 296 |
s = s.substr(0, index2); |
|---|
| 297 |
} |
|---|
| 298 |
if ( !slash_added && s.empty() ) s = "/"; |
|---|
| 299 |
} |
|---|
| 300 |
if ( !slash_added || s.empty() ) return s; |
|---|
| 301 |
return s.substr(1); |
|---|
| 302 |
} |
|---|
| 303 |
|
|---|
| 304 |
string JoinStrings(const char* pieces[], size_t size, const char* glue) { |
|---|
| 305 |
string s; |
|---|
| 306 |
if ( size > 0 ) { |
|---|
| 307 |
s += pieces[0]; |
|---|
| 308 |
} |
|---|
| 309 |
for ( size_t i = 1; i < size; i++ ) { |
|---|
| 310 |
s += glue; |
|---|
| 311 |
s += pieces[i]; |
|---|
| 312 |
} |
|---|
| 313 |
return s; |
|---|
| 314 |
} |
|---|
| 315 |
|
|---|
| 316 |
string JoinStrings(const vector<string>& pieces, const char* glue) { |
|---|
| 317 |
string s; |
|---|
| 318 |
if ( !pieces.empty() ) { |
|---|
| 319 |
s += pieces[0]; |
|---|
| 320 |
} |
|---|
| 321 |
for ( size_t i = 1; i < pieces.size(); ++i ) { |
|---|
| 322 |
s += glue; |
|---|
| 323 |
s += pieces[i]; |
|---|
| 324 |
} |
|---|
| 325 |
return s; |
|---|
| 326 |
} |
|---|
| 327 |
|
|---|
| 328 |
string PrintableDataBuffer(const void* pbuffer, size_t size) { |
|---|
| 329 |
const uint8* buffer = reinterpret_cast<const uint8*>(pbuffer); |
|---|
| 330 |
string l1, l2; |
|---|
| 331 |
l1.reserve(size * 8 + (size / 16) * 10); |
|---|
| 332 |
l2.reserve(size * 8 + (size / 16) * 10); |
|---|
| 333 |
for ( size_t i = 0; i < size; i++ ) { |
|---|
| 334 |
if ( i % 16 == 0 ) { |
|---|
| 335 |
l1 += strutil::StringPrintf("\n%06d", static_cast<int32>(i)); |
|---|
| 336 |
l2 += strutil::StringPrintf("\n%06d", static_cast<int32>(i)); |
|---|
| 337 |
} |
|---|
| 338 |
l1 += strutil::StringPrintf(" 0x%02x, ", |
|---|
| 339 |
static_cast<int32>(buffer[i] & 0xff)); |
|---|
| 340 |
if ( buffer[i] >= ' ' && buffer[i] <= '}' ) { |
|---|
| 341 |
l2 += strutil::StringPrintf(" '%c', ", |
|---|
| 342 |
static_cast<char>(buffer[i])); |
|---|
| 343 |
} else { |
|---|
| 344 |
l2 += strutil::StringPrintf(" '\\x%02x',", |
|---|
| 345 |
static_cast<int32>(buffer[i] & 0xff)); |
|---|
| 346 |
} |
|---|
| 347 |
} |
|---|
| 348 |
const string str_size = StringOf(size); |
|---|
| 349 |
return "#" + str_size + " bytes HEXA: \n" + l1 + "\n" + |
|---|
| 350 |
"#" + str_size + " bytes CHAR: \n" + l2 + "\n"; |
|---|
| 351 |
} |
|---|
| 352 |
string PrintableDataBufferHexa(const void* pbuffer, size_t size) { |
|---|
| 353 |
const uint8* buffer = reinterpret_cast<const uint8*>(pbuffer); |
|---|
| 354 |
string l1; |
|---|
| 355 |
l1.reserve(size * 8 + (size / 16) * 10); |
|---|
| 356 |
for ( size_t i = 0; i < size; i++ ) { |
|---|
| 357 |
if ( i % 16 == 0 ) { |
|---|
| 358 |
l1 += strutil::StringPrintf("\n%06d", static_cast<int32>(i)); |
|---|
| 359 |
} |
|---|
| 360 |
l1 += strutil::StringPrintf(" 0x%02x, ", |
|---|
| 361 |
static_cast<int32>(buffer[i] & 0xff)); |
|---|
| 362 |
} |
|---|
| 363 |
const string str_size = StringOf(size); |
|---|
| 364 |
return "#" + str_size + " bytes HEXA: \n" + l1 + "\n"; |
|---|
| 365 |
} |
|---|
| 366 |
|
|---|
| 367 |
|
|---|
| 368 |
void SplitString(const string& s, |
|---|
| 369 |
const string& separator, |
|---|
| 370 |
vector<string>* output) { |
|---|
| 371 |
if ( separator.length() == 0 ) { |
|---|
| 372 |
// split all characters |
|---|
| 373 |
for ( string::const_iterator it = s.begin(); it != s.end(); ++it ) { |
|---|
| 374 |
output->push_back(string(1, *it)); |
|---|
| 375 |
} |
|---|
| 376 |
return; |
|---|
| 377 |
} |
|---|
| 378 |
|
|---|
| 379 |
size_t pos = 0; |
|---|
| 380 |
size_t last_pos = string::npos; |
|---|
| 381 |
while ( true ) { |
|---|
| 382 |
last_pos = s.find(separator, pos); |
|---|
| 383 |
output->push_back(s.substr(pos, last_pos - pos)); |
|---|
| 384 |
if ( last_pos == string::npos ) { |
|---|
| 385 |
return; |
|---|
| 386 |
} |
|---|
| 387 |
pos = last_pos + separator.size(); |
|---|
| 388 |
} |
|---|
| 389 |
} |
|---|
| 390 |
|
|---|
| 391 |
void SplitPairs(const string& s, |
|---|
| 392 |
const string& elements_separator, // sep1 |
|---|
| 393 |
const string& pair_separator, // sep2 |
|---|
| 394 |
vector< pair<string, string> >* output) { |
|---|
| 395 |
vector<string> tmp; |
|---|
| 396 |
SplitString(s, elements_separator, &tmp); |
|---|
| 397 |
for ( size_t i = 0; i < tmp.size(); ++i ) { |
|---|
| 398 |
const size_t pos_sep = tmp[i].find(pair_separator); |
|---|
| 399 |
if (pos_sep != string::npos) { |
|---|
| 400 |
output->push_back(make_pair(tmp[i].substr(0, pos_sep).c_str(), |
|---|
| 401 |
tmp[i].substr(pos_sep + 1).c_str())); |
|---|
| 402 |
} else { |
|---|
| 403 |
output->push_back(make_pair(tmp[i], string(""))); |
|---|
| 404 |
} |
|---|
| 405 |
} |
|---|
| 406 |
} |
|---|
| 407 |
|
|---|
| 408 |
bool SplitBracketedString(const char* s, |
|---|
| 409 |
const char separator, |
|---|
| 410 |
const char open_bracket, |
|---|
| 411 |
const char close_bracket, |
|---|
| 412 |
vector<string>* output) { |
|---|
| 413 |
const char* b = s; |
|---|
| 414 |
const char* p = s; |
|---|
| 415 |
int in_paranthesis = 0; |
|---|
| 416 |
while ( *p ) { |
|---|
| 417 |
if ( *p == open_bracket ) { |
|---|
| 418 |
++in_paranthesis; |
|---|
| 419 |
} else if ( *p == close_bracket ) { |
|---|
| 420 |
--in_paranthesis; |
|---|
| 421 |
} else if ( *p == separator && in_paranthesis == 0 ) { |
|---|
| 422 |
output->push_back(string(b, p - b)); |
|---|
| 423 |
b = p + 1; |
|---|
| 424 |
} |
|---|
| 425 |
++p; |
|---|
| 426 |
if ( in_paranthesis < 0 ) |
|---|
| 427 |
return false; |
|---|
| 428 |
} |
|---|
| 429 |
if ( in_paranthesis == 0 && p > b && *b ) { |
|---|
| 430 |
output->push_back(string(b, p - b)); |
|---|
| 431 |
} |
|---|
| 432 |
return in_paranthesis == 0; |
|---|
| 433 |
} |
|---|
| 434 |
string RemoveOutermostBrackets(const string& s, |
|---|
| 435 |
const char open_bracket, |
|---|
| 436 |
const char close_bracket) { |
|---|
| 437 |
if (s.length() > 2) { |
|---|
| 438 |
if (s[0] == open_bracket && s[s.length()-1] == close_bracket) { |
|---|
| 439 |
return s.substr(1, s.length()-2); |
|---|
| 440 |
} |
|---|
| 441 |
} |
|---|
| 442 |
return s; |
|---|
| 443 |
} |
|---|
| 444 |
|
|---|
| 445 |
namespace { |
|---|
| 446 |
inline void escape_set_bit(uint32* escapes, char c) { |
|---|
| 447 |
const uint8 id = static_cast<uint8>(c); |
|---|
| 448 |
const int pos = (id >> 5); |
|---|
| 449 |
escapes[pos] = (escapes[pos]) | (1 << (id & 0x1f)); |
|---|
| 450 |
} |
|---|
| 451 |
inline bool is_set_bit(uint32* escapes, char c) { |
|---|
| 452 |
const uint8 id = static_cast<uint8>(c); |
|---|
| 453 |
const int pos = (id >> 5); |
|---|
| 454 |
return ((escapes[pos] & (1 << (id & 0x1f)))) != 0; |
|---|
| 455 |
} |
|---|
| 456 |
inline uint8 hexval(char c) { |
|---|
| 457 |
if ( c >= '0' && c <= '9' ) return static_cast<uint8>(c - '0'); |
|---|
| 458 |
if ( c >= 'a' && c <= 'f' ) return static_cast<uint8>(10 + c - 'a'); |
|---|
| 459 |
if ( c >= 'A' && c <= 'F' ) return static_cast<uint8>(10 + c - 'A'); |
|---|
| 460 |
return 0; |
|---|
| 461 |
} |
|---|
| 462 |
} |
|---|
| 463 |
|
|---|
| 464 |
string StrNEscape(const char* text, size_t size, char escape, |
|---|
| 465 |
const char* chars_to_escape) { |
|---|
| 466 |
uint32 escapes[256/32] = { 0, 0, 0, 0, |
|---|
| 467 |
0, 0, 0, 0 }; |
|---|
| 468 |
const char* p = chars_to_escape; |
|---|
| 469 |
while ( *p ) { |
|---|
| 470 |
escape_set_bit(escapes, *p++); |
|---|
| 471 |
} |
|---|
| 472 |
escape_set_bit(escapes, escape); |
|---|
| 473 |
const char* t = text; |
|---|
| 474 |
int i = 0; |
|---|
| 475 |
string ret; |
|---|
| 476 |
while ( i++ < size ) { |
|---|
| 477 |
if ( is_set_bit(escapes, *t) || *t < 32 || *t > 126 ) { |
|---|
| 478 |
ret.append(strutil::StringPrintf("%c%02x", |
|---|
| 479 |
escape, |
|---|
| 480 |
static_cast<unsigned int>( |
|---|
| 481 |
static_cast<uint8>(*t) & 0xff))); |
|---|
| 482 |
} else { |
|---|
| 483 |
ret.push_back(*t); |
|---|
| 484 |
} |
|---|
| 485 |
++t; |
|---|
| 486 |
} |
|---|
| 487 |
return ret; |
|---|
| 488 |
} |
|---|
| 489 |
|
|---|
| 490 |
string GetNextInLexicographicOrder(const string& prefix) { |
|---|
| 491 |
if ( prefix.empty() ) { |
|---|
| 492 |
return ""; |
|---|
| 493 |
} |
|---|
| 494 |
if ( prefix[prefix.size() - 1] == '\xff' ) { |
|---|
| 495 |
return GetNextInLexicographicOrder(prefix.substr(0, prefix.size() - 1)); |
|---|
| 496 |
} |
|---|
| 497 |
|
|---|
| 498 |
string upper_bound(prefix); |
|---|
| 499 |
upper_bound[upper_bound.size() - 1] = prefix[prefix.size() - 1] + 1; |
|---|
| 500 |
return upper_bound; |
|---|
| 501 |
} |
|---|
| 502 |
|
|---|
| 503 |
string StrEscape(const char* text, char escape, |
|---|
| 504 |
const char* chars_to_escape) { |
|---|
| 505 |
return StrNEscape(text, strlen(text), escape, chars_to_escape); |
|---|
| 506 |
} |
|---|
| 507 |
|
|---|
| 508 |
string StrEscape(const string& text, |
|---|
| 509 |
char escape, |
|---|
| 510 |
const char* chars_to_escape) { |
|---|
| 511 |
return StrNEscape(text.data(), text.size(),escape, chars_to_escape); |
|---|
| 512 |
} |
|---|
| 513 |
|
|---|
| 514 |
string StrUnescape(const char* text, char escape) { |
|---|
| 515 |
const char* p = text; |
|---|
| 516 |
size_t esc_count = 0; |
|---|
| 517 |
|
|---|
| 518 |
scoped_array<char> dest(new char[strlen(text)+1]); |
|---|
| 519 |
char* d = dest.get(); |
|---|
| 520 |
while ( *p ) { |
|---|
| 521 |
if ( *p == escape ) { |
|---|
| 522 |
esc_count = 1; |
|---|
| 523 |
} else if ( esc_count ) { |
|---|
| 524 |
if ( esc_count == 2 ) { |
|---|
| 525 |
*d++ = static_cast<char>(hexval(*p) + 16 * hexval(*(p-1))); |
|---|
| 526 |
esc_count = 0; |
|---|
| 527 |
} else { |
|---|
| 528 |
++esc_count; |
|---|
| 529 |
} |
|---|
| 530 |
} else { |
|---|
| 531 |
*d++ = *p; |
|---|
| 532 |
} |
|---|
| 533 |
++p; |
|---|
| 534 |
} |
|---|
| 535 |
return string(dest.get(), d - dest.get()); |
|---|
| 536 |
} |
|---|
| 537 |
|
|---|
| 538 |
string StrUnescape(const string& text, char escape) { |
|---|
| 539 |
return StrUnescape(text.c_str(), escape); |
|---|
| 540 |
} |
|---|
| 541 |
} |
|---|
| 542 |
|
|---|
| 543 |
namespace { |
|---|
| 544 |
static char kHexaDigits[] = "0123456789abcdef"; |
|---|
| 545 |
} |
|---|
| 546 |
|
|---|
| 547 |
namespace strutil { |
|---|
| 548 |
// Old implementation : user 0m6.368s |
|---|
| 549 |
// New implementation : user 0m0.568s |
|---|
| 550 |
|
|---|
| 551 |
string JsonStrEscape(const char* text, size_t size) { |
|---|
| 552 |
const char* p = text; |
|---|
| 553 |
scoped_array<char> dest(new char[size * 3 + 4]); |
|---|
| 554 |
uint8* d = reinterpret_cast<uint8*>(dest.get()); |
|---|
| 555 |
string s; |
|---|
| 556 |
while ( size-- ) { |
|---|
| 557 |
const uint8 c = *p++; |
|---|
| 558 |
if ( c >= ' ' && c <= '~' ) { |
|---|
| 559 |
// ASCII printable |
|---|
| 560 |
if ( c == '\\' || c == '\"' || c == '/' ) { |
|---|
| 561 |
*d++ = '\\'; |
|---|
| 562 |
} |
|---|
| 563 |
*d++ = c; |
|---|
| 564 |
} else { |
|---|
| 565 |
*d++ = '\\'; |
|---|
| 566 |
switch ( c ) { |
|---|
| 567 |
case '\b': *d++ = 'b'; break; |
|---|
| 568 |
case '\f': *d++ = 'f'; break; |
|---|
| 569 |
case '\n': *d++ = 'n'; break; |
|---|
| 570 |
case '\r': *d++ = 'r'; break; |
|---|
| 571 |
case '\t': *d++ = 't'; break; |
|---|
| 572 |
default: |
|---|
| 573 |
*d++ = 'u'; |
|---|
| 574 |
if ( size > 0 ) { |
|---|
| 575 |
--size; |
|---|
| 576 |
const uint8 c2 = *p++; |
|---|
| 577 |
*d++ = kHexaDigits[c >> 4]; |
|---|
| 578 |
*d++ = kHexaDigits[c & 0xf]; |
|---|
| 579 |
*d++ = kHexaDigits[c2 >> 4]; |
|---|
| 580 |
*d++ = kHexaDigits[c2 & 0xf]; |
|---|
| 581 |
} else { |
|---|
| 582 |
*d++ = '0'; |
|---|
| 583 |
*d++ = '0'; |
|---|
| 584 |
*d++ = kHexaDigits[c >> 4]; |
|---|
| 585 |
*d++ = kHexaDigits[c & 0xf]; |
|---|
| 586 |
} |
|---|
| 587 |
} |
|---|
| 588 |
} |
|---|
| 589 |
} |
|---|
| 590 |
return string(dest.get(), d - reinterpret_cast<uint8*>(dest.get())); |
|---|
| 591 |
} |
|---|
| 592 |
|
|---|
| 593 |
string JsonStrUnescape(const char* text, size_t size) { |
|---|
| 594 |
const char* p = text; |
|---|
| 595 |
scoped_array<char> dest(new char[size * 3 + 4]); |
|---|
| 596 |
uint8* d = reinterpret_cast<uint8*>(dest.get()); |
|---|
| 597 |
bool in_escape = false; |
|---|
| 598 |
while ( size ) { |
|---|
| 599 |
--size; |
|---|
| 600 |
if ( in_escape ) { |
|---|
| 601 |
switch ( *p ) { |
|---|
| 602 |
case '\\': *d++ = '\\'; break; |
|---|
| 603 |
case '\"': *d++ = '\"'; break; |
|---|
| 604 |
case '/': *d++ = '/' ; break; |
|---|
| 605 |
case 'b': *d++ = '\b'; break; |
|---|
| 606 |
case 'f': *d++ = '\f'; break; |
|---|
| 607 |
case 'n': *d++ = '\n'; break; |
|---|
| 608 |
case 'r': *d++ = '\r'; break; |
|---|
| 609 |
case 't': *d++ = '\t'; break; |
|---|
| 610 |
case 'u': { |
|---|
| 611 |
if ( size < 4 ) { |
|---|
| 612 |
break; |
|---|
| 613 |
} |
|---|
| 614 |
++p; |
|---|
| 615 |
*d++ = static_cast<uint8>(hexval(*(p+1)) + |
|---|
| 616 |
16 * hexval(*p-1)); |
|---|
| 617 |
++p; ++p; |
|---|
| 618 |
*d++ = static_cast<uint8>(hexval(*(p+1)) + |
|---|
| 619 |
16 * hexval(*p-1)); |
|---|
| 620 |
++p; // leave room for the next ++ |
|---|
| 621 |
size -= 4; |
|---|
| 622 |
} |
|---|
| 623 |
break; |
|---|
| 624 |
default: *d++ = *p; break; |
|---|
| 625 |
} |
|---|
| 626 |
in_escape = false; |
|---|
| 627 |
} else if ( *p == '\\' ) { |
|---|
| 628 |
in_escape = true; |
|---|
| 629 |
} else { |
|---|
| 630 |
if ( !U8_IS_SINGLE(*p) ) { |
|---|
| 631 |
// this is not a US-ASCII 0..0x7f but a UTF-8 lead or trail byte |
|---|
| 632 |
} else { |
|---|
| 633 |
*d++ = *p; |
|---|
| 634 |
} |
|---|
| 635 |
} |
|---|
| 636 |
++p; |
|---|
| 637 |
} |
|---|
| 638 |
return string(dest.get(), d - reinterpret_cast<uint8*>(dest.get())); |
|---|
| 639 |
} |
|---|
| 640 |
|
|---|
| 641 |
} |
|---|
| 642 |
|
|---|
| 643 |
namespace { |
|---|
| 644 |
enum StrFormatState { |
|---|
| 645 |
IN_TEXT, |
|---|
| 646 |
IN_SYMBOL, |
|---|
| 647 |
IN_ESCAPE_TEXT, |
|---|
| 648 |
IN_ESCAPE_SYMBOL, |
|---|
| 649 |
}; |
|---|
| 650 |
} |
|---|
| 651 |
|
|---|
| 652 |
namespace strutil { |
|---|
| 653 |
string StrMapFormat(const char* s, |
|---|
| 654 |
const map<string, string>& m, |
|---|
| 655 |
const char* arg_begin, |
|---|
| 656 |
const char* arg_end, |
|---|
| 657 |
char escape_char) { |
|---|
| 658 |
CHECK(*arg_begin != '\0'); |
|---|
| 659 |
CHECK(*arg_end != '\0'); |
|---|
| 660 |
const int size_begin = strlen(arg_begin); |
|---|
| 661 |
const int size_end = strlen(arg_end); |
|---|
| 662 |
StrFormatState state = IN_TEXT; |
|---|
| 663 |
|
|---|
| 664 |
string out; |
|---|
| 665 |
string symbol; |
|---|
| 666 |
const char* begin = s; |
|---|
| 667 |
const char* p = s; |
|---|
| 668 |
while ( *p ) { |
|---|
| 669 |
switch ( state ) { |
|---|
| 670 |
case IN_TEXT: |
|---|
| 671 |
if ( *p == escape_char ) { |
|---|
| 672 |
state = IN_ESCAPE_TEXT; |
|---|
| 673 |
out.append(begin, p - begin); |
|---|
| 674 |
++p; |
|---|
| 675 |
begin = p; |
|---|
| 676 |
} else if ( StrPrefix(p, arg_begin) ) { |
|---|
| 677 |
out.append(begin, p - begin); |
|---|
| 678 |
state = IN_SYMBOL; |
|---|
| 679 |
symbol.clear(); |
|---|
| 680 |
p += size_begin; |
|---|
| 681 |
begin = p; |
|---|
| 682 |
} else { |
|---|
| 683 |
++p; |
|---|
| 684 |
} |
|---|
| 685 |
break; |
|---|
| 686 |
case IN_SYMBOL: |
|---|
| 687 |
if ( *p == escape_char ) { |
|---|
| 688 |
state = IN_ESCAPE_SYMBOL; |
|---|
| 689 |
symbol.append(begin, p - begin); |
|---|
| 690 |
++p; |
|---|
| 691 |
begin = p; |
|---|
| 692 |
} else if ( StrPrefix(p, arg_end) ) { |
|---|
| 693 |
symbol.append(begin, p - begin); |
|---|
| 694 |
state = IN_TEXT; |
|---|
| 695 |
p += size_end; |
|---|
| 696 |
begin = p; |
|---|
| 697 |
map<string, string>::const_iterator it = m.find(symbol); |
|---|
| 698 |
if ( it != m.end() ) { |
|---|
| 699 |
out.append(it->second); |
|---|
| 700 |
} |
|---|
| 701 |
} else { |
|---|
| 702 |
++p; |
|---|
| 703 |
} |
|---|
| 704 |
break; |
|---|
| 705 |
case IN_ESCAPE_TEXT: |
|---|
| 706 |
if ( *p != *arg_begin ) { |
|---|
| 707 |
out.append(1, escape_char); |
|---|
| 708 |
} |
|---|
| 709 |
out.append(1, *p); |
|---|
| 710 |
++p; |
|---|
| 711 |
begin = p; |
|---|
| 712 |
state = IN_TEXT; |
|---|
| 713 |
break; |
|---|
| 714 |
case IN_ESCAPE_SYMBOL: |
|---|
| 715 |
if ( *p != *arg_end ) { |
|---|
| 716 |
symbol.append(1, escape_char); |
|---|
| 717 |
} |
|---|
| 718 |
symbol.append(1, *p); |
|---|
| 719 |
++p; |
|---|
| 720 |
begin = p; |
|---|
| 721 |
state = IN_SYMBOL; |
|---|
| 722 |
break; |
|---|
| 723 |
} |
|---|
| 724 |
} |
|---|
| 725 |
switch ( state ) { |
|---|
| 726 |
case IN_TEXT: |
|---|
| 727 |
out.append(begin, p - begin); |
|---|
| 728 |
break; |
|---|
| 729 |
case IN_SYMBOL: |
|---|
| 730 |
case IN_ESCAPE_SYMBOL: |
|---|
| 731 |
// LOG_WARNING << " Invalid escape string received for formatting: [" |
|---|
| 732 |
// << JsonStrEscape(s) << "]"; |
|---|
| 733 |
break; |
|---|
| 734 |
case IN_ESCAPE_TEXT: |
|---|
| 735 |
out.append(1, escape_char); |
|---|
| 736 |
break; |
|---|
| 737 |
} |
|---|
| 738 |
return out; |
|---|
| 739 |
} |
|---|
| 740 |
|
|---|
| 741 |
} |
|---|