C++ Utilities  4.17.0
Useful C++ classes and routines such as argument parser, IO and conversion utilities
stringconversion.h
Go to the documentation of this file.
1 #ifndef CONVERSION_UTILITIES_STRINGCONVERSION_H
2 #define CONVERSION_UTILITIES_STRINGCONVERSION_H
3 
4 #include "./binaryconversion.h"
6 
7 #include "../misc/traits.h"
8 
9 #include <cstdlib>
10 #include <cstring>
11 #include <initializer_list>
12 #include <iomanip>
13 #include <list>
14 #include <memory>
15 #include <sstream>
16 #include <string>
17 #include <vector>
18 
19 namespace ConversionUtilities {
20 
29  void operator()(char *stringData)
30  {
31  std::free(stringData);
32  }
33 };
34 
38 using StringData = std::pair<std::unique_ptr<char[], StringDataDeleter>, std::size_t>;
39 //using StringData = std::pair<std::unique_ptr<char>, std::size_t>; // might work too
40 
42  const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor = 1.0f);
43 CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize);
44 CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
45 CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize);
46 CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
47 CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize);
48 CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize);
49 
50 #ifdef PLATFORM_WINDOWS
51 using WideStringData = std::pair<std::unique_ptr<wchar_t[]>, int>;
52 CPP_UTILITIES_EXPORT WideStringData convertMultiByteToWide(const char *inputBuffer, int inputBufferSize = -1);
53 CPP_UTILITIES_EXPORT WideStringData convertMultiByteToWide(const std::string &inputBuffer);
54 #endif
55 
56 CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar = '\0');
57 
71 template <class Container = std::initializer_list<std::string>>
72 typename Container::value_type joinStrings(const Container &strings,
73  const typename Container::value_type &delimiter = typename Container::value_type(), bool omitEmpty = false,
74  const typename Container::value_type &leftClosure = typename Container::value_type(),
75  const typename Container::value_type &rightClosure = typename Container::value_type())
76 {
77  typename Container::value_type res;
78  if (!strings.size()) {
79  return res;
80  }
81  std::size_t entries = 0, size = 0;
82  for (const auto &str : strings) {
83  if (omitEmpty && str.empty()) {
84  continue;
85  }
86  size += str.size();
87  ++entries;
88  }
89  if (!entries) {
90  return res;
91  }
92  size += (entries * leftClosure.size()) + (entries * rightClosure.size()) + ((entries - 1) * delimiter.size());
93  res.reserve(size);
94  for (const auto &str : strings) {
95  if (omitEmpty && str.empty()) {
96  continue;
97  }
98  if (!res.empty()) {
99  res.append(delimiter);
100  }
101  res.append(leftClosure);
102  res.append(str);
103  res.append(rightClosure);
104  }
105  return res;
106 }
107 
111 template <class Container = std::initializer_list<std::string>> inline std::vector<std::string> toMultiline(const Container &arrayOfLines)
112 {
113  return joinStrings(arrayOfLines, "\n", false);
114 }
115 
119 enum class EmptyPartsTreat {
120  Keep,
121  Omit,
122  Merge
123 };
124 
134 template <class Container = std::list<std::string>>
135 Container splitString(const typename Container::value_type &string, const typename Container::value_type &delimiter,
136  EmptyPartsTreat emptyPartsRole = EmptyPartsTreat::Keep, int maxParts = -1)
137 {
138  --maxParts;
139  Container res;
140  bool merge = false;
141  for (typename Container::value_type::size_type i = 0, end = string.size(), delimPos; i < end; i = delimPos + delimiter.size()) {
142  delimPos = string.find(delimiter, i);
143  if (!merge && maxParts >= 0 && res.size() == static_cast<typename Container::value_type::size_type>(maxParts)) {
144  if (delimPos == i && emptyPartsRole == EmptyPartsTreat::Merge) {
145  if (!res.empty()) {
146  merge = true;
147  continue;
148  }
149  }
150  delimPos = Container::value_type::npos;
151  }
152  if (delimPos == Container::value_type::npos) {
153  delimPos = string.size();
154  }
155  if (emptyPartsRole == EmptyPartsTreat::Keep || i != delimPos) {
156  if (merge) {
157  res.back().append(delimiter);
158  res.back().append(string.substr(i, delimPos - i));
159  merge = false;
160  } else {
161  res.emplace_back(string.substr(i, delimPos - i));
162  }
163  } else if (emptyPartsRole == EmptyPartsTreat::Merge) {
164  if (!res.empty()) {
165  merge = true;
166  }
167  }
168  }
169  return res;
170 }
171 
181 template <class Container = std::list<std::string>>
182 Container splitStringSimple(const typename Container::value_type &string, const typename Container::value_type &delimiter, int maxParts = -1)
183 {
184  --maxParts;
185  Container res;
186  for (typename Container::value_type::size_type i = 0, end = string.size(), delimPos; i < end; i = delimPos + delimiter.size()) {
187  delimPos = string.find(delimiter, i);
188  if (maxParts >= 0 && res.size() == static_cast<typename Container::value_type::size_type>(maxParts)) {
189  delimPos = Container::value_type::npos;
190  }
191  if (delimPos == Container::value_type::npos) {
192  delimPos = string.size();
193  }
194  res.emplace_back(string.substr(i, delimPos - i));
195  }
196  return res;
197 }
198 
202 template <class Container = std::vector<std::string>> inline std::vector<std::string> toArrayOfLines(const std::string &multilineString)
203 {
204  return splitString<Container>(multilineString, "\n", EmptyPartsTreat::Keep);
205 }
206 
210 template <typename StringType> bool startsWith(const StringType &str, const StringType &phrase)
211 {
212  if (str.size() < phrase.size()) {
213  return false;
214  }
215  for (auto stri = str.cbegin(), strend = str.cend(), phrasei = phrase.cbegin(), phraseend = phrase.cend(); stri != strend; ++stri, ++phrasei) {
216  if (phrasei == phraseend) {
217  return true;
218  } else if (*stri != *phrasei) {
219  return false;
220  }
221  }
222  return false;
223 }
224 
228 template <typename StringType> bool startsWith(const StringType &str, const typename StringType::value_type *phrase)
229 {
230  for (auto stri = str.cbegin(), strend = str.cend(); stri != strend; ++stri, ++phrase) {
231  if (!*phrase) {
232  return true;
233  } else if (*stri != *phrase) {
234  return false;
235  }
236  }
237  return false;
238 }
239 
244 template <typename StringType> bool containsSubstrings(const StringType &str, std::initializer_list<StringType> substrings)
245 {
246  typename StringType::size_type currentPos = 0;
247  for (const auto &substr : substrings) {
248  if ((currentPos = str.find(substr, currentPos)) == StringType::npos) {
249  return false;
250  }
251  currentPos += substr.size();
252  }
253  return true;
254 }
255 
260 template <typename StringType>
261 bool containsSubstrings(const StringType &str, std::initializer_list<const typename StringType::value_type *> substrings)
262 {
263  typename StringType::size_type currentPos = 0;
264  for (const auto *substr : substrings) {
265  if ((currentPos = str.find(substr, currentPos)) == StringType::npos) {
266  return false;
267  }
268  currentPos += std::strlen(substr);
269  }
270  return true;
271 }
272 
276 template <typename StringType> void findAndReplace(StringType &str, const StringType &find, const StringType &replace)
277 {
278  for (typename StringType::size_type i = 0; (i = str.find(find, i)) != StringType::npos; i += replace.size()) {
279  str.replace(i, find.size(), replace);
280  }
281 }
282 
289 template <typename CharType> constexpr CharType digitToChar(CharType digit)
290 {
291  return digit <= 9 ? (digit + '0') : (digit + 'A' - 10);
292 }
293 
300 template <typename IntegralType, class StringType = std::string,
301  Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
302 StringType numberToString(IntegralType number, typename StringType::value_type base = 10)
303 {
304  std::size_t resSize = 0;
305  for (auto n = number; n; n /= base, ++resSize)
306  ;
307  StringType res;
308  res.reserve(resSize);
309  do {
310  res.insert(res.begin(), digitToChar<typename StringType::value_type>(number % base));
311  number /= base;
312  } while (number);
313  return res;
314 }
315 
322 template <typename IntegralType, class StringType = std::string,
323  Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
324 StringType numberToString(IntegralType number, typename StringType::value_type base = 10)
325 {
326  const bool negative = number < 0;
327  std::size_t resSize;
328  if (negative) {
329  number = -number, resSize = 1;
330  } else {
331  resSize = 0;
332  }
333  for (auto n = number; n; n /= base, ++resSize)
334  ;
335  StringType res;
336  res.reserve(resSize);
337  do {
338  res.insert(res.begin(), digitToChar<typename StringType::value_type>(number % base));
339  number /= base;
340  } while (number);
341  if (negative) {
342  res.insert(res.begin(), '-');
343  }
344  return res;
345 }
346 
355 template <typename FloatingType, class StringType = std::string, Traits::EnableIf<std::is_floating_point<FloatingType>> * = nullptr>
356 StringType numberToString(FloatingType number, typename StringType::value_type base = 10)
357 {
358  std::basic_stringstream<typename StringType::value_type> ss;
359  ss << std::setbase(base) << number;
360  return ss.str();
361 }
362 
368 template <typename CharType> CharType charToDigit(CharType character, CharType base)
369 {
370  CharType res = base;
371  if (character >= '0' && character <= '9') {
372  res = character - '0';
373  } else if (character >= 'a' && character <= 'z') {
374  res = character - 'a' + 10;
375  } else if (character >= 'A' && character <= 'Z') {
376  res = character - 'A' + 10;
377  }
378  if (res < base) {
379  return res;
380  }
381  std::string errorMsg;
382  errorMsg.reserve(36);
383  errorMsg += "The character \"";
384  errorMsg += character;
385  errorMsg += "\" is no valid digit.";
386  throw ConversionException(std::move(errorMsg));
387 }
388 
397 template <typename IntegralType, typename StringType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
398 IntegralType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
399 {
400  IntegralType result = 0;
401  for (const auto &c : string) {
402  if (c == ' ') {
403  continue;
404  }
405  result *= base;
406  result += charToDigit<typename StringType::value_type>(c, base);
407  }
408  return result;
409 }
410 
419 template <typename IntegralType, class StringType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
420 IntegralType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
421 {
422  auto i = string.begin();
423  auto end = string.end();
424  for (; i != end && *i == ' '; ++i)
425  ;
426  if (i == end) {
427  return 0;
428  }
429  const bool negative = (*i == '-');
430  if (negative) {
431  ++i;
432  }
433  IntegralType result = 0;
434  for (; i != end; ++i) {
435  if (*i == ' ') {
436  continue;
437  }
438  result *= base;
439  result += charToDigit<typename StringType::value_type>(*i, base);
440  }
441  return negative ? -result : result;
442 }
443 
454 template <typename FloatingType, class StringType, Traits::EnableIf<std::is_floating_point<FloatingType>> * = nullptr>
455 FloatingType stringToNumber(const StringType &string, typename StringType::value_type base = 10)
456 {
457  std::basic_stringstream<typename StringType::value_type> ss;
458  ss << std::setbase(base) << string;
459  FloatingType result;
460  if ((ss >> result) && ss.eof()) {
461  return result;
462  }
463  std::string errorMsg;
464  errorMsg.reserve(42 + string.size());
465  errorMsg += "The string \"";
466  errorMsg += string;
467  errorMsg += "\" is no valid floating number.";
468  throw ConversionException(errorMsg);
469 }
470 
479 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
480 IntegralType stringToNumber(const CharType *string, unsigned char base = 10)
481 {
482  IntegralType result = 0;
483  for (; *string; ++string) {
484  if (*string == ' ') {
485  continue;
486  }
487  result *= base;
488  result += charToDigit<CharType>(*string, base);
489  }
490  return result;
491 }
492 
503 template <typename FloatingType, class CharType, Traits::EnableIf<std::is_floating_point<FloatingType>> * = nullptr>
504 FloatingType stringToNumber(const CharType *string, unsigned char base = 10)
505 {
506  std::basic_stringstream<CharType> ss;
507  ss << std::setbase(base) << string;
508  FloatingType result;
509  if ((ss >> result) && ss.eof()) {
510  return result;
511  }
512  std::string errorMsg;
513  errorMsg.reserve(42 + std::char_traits<CharType>::length(string));
514  errorMsg += "The string \"";
515  errorMsg += string;
516  errorMsg += "\" is no valid floating number.";
517  throw ConversionException(errorMsg);
518 }
519 
527 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_unsigned<IntegralType>> * = nullptr>
528 IntegralType bufferToNumber(const CharType *string, std::size_t size, unsigned char base = 10)
529 {
530  IntegralType result = 0;
531  for (const CharType *end = string + size; string != end; ++string) {
532  if (*string == ' ') {
533  continue;
534  }
535  result *= base;
536  result += charToDigit<CharType>(*string, base);
537  }
538  return result;
539 }
540 
548 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
549 IntegralType stringToNumber(const CharType *string, unsigned char base = 10)
550 {
551  if (!*string) {
552  return 0;
553  }
554  for (; *string && *string == ' '; ++string)
555  ;
556  if (!*string) {
557  return 0;
558  }
559  const bool negative = (*string == '-');
560  if (negative) {
561  ++string;
562  }
563  IntegralType result = 0;
564  for (; *string; ++string) {
565  if (*string == ' ') {
566  continue;
567  }
568  result *= base;
569  result += charToDigit<CharType>(*string, base);
570  }
571  return negative ? -result : result;
572 }
573 
581 template <typename IntegralType, class CharType, Traits::EnableIf<std::is_integral<IntegralType>, std::is_signed<IntegralType>> * = nullptr>
582 IntegralType bufferToNumber(const CharType *string, std::size_t size, unsigned char base = 10)
583 {
584  if (!size) {
585  return 0;
586  }
587  const CharType *end = string + size;
588  for (; string != end && *string == ' '; ++string)
589  ;
590  if (string == end) {
591  return 0;
592  }
593  const bool negative = (*string == '-');
594  if (negative) {
595  ++string;
596  }
597  IntegralType result = 0;
598  for (; string != end; ++string) {
599  if (*string == ' ') {
600  continue;
601  }
602  result *= base;
603  result += charToDigit<CharType>(*string, base);
604  }
605  return negative ? -result : result;
606 }
607 
617 template <typename T> std::string interpretIntegerAsString(T integer, int startOffset = 0)
618 {
619  char buffer[sizeof(T)];
620  ConversionUtilities::BE::getBytes(integer, buffer);
621  return std::string(buffer + startOffset, sizeof(T) - startOffset);
622 }
623 
624 CPP_UTILITIES_EXPORT std::string dataSizeToString(uint64 sizeInByte, bool includeByte = false);
625 CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits = false);
626 CPP_UTILITIES_EXPORT std::string encodeBase64(const byte *data, uint32 dataSize);
627 CPP_UTILITIES_EXPORT std::pair<std::unique_ptr<byte[]>, uint32> decodeBase64(const char *encodedStr, const uint32 strSize);
628 } // namespace ConversionUtilities
629 
630 #endif // CONVERSION_UTILITIES_STRINGCONVERSION_H
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16LE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (little-endian).
bool startsWith(const StringType &str, const StringType &phrase)
Returns whether str starts with phrase.
void operator()(char *stringData)
Deletes the specified stringData with std::free(), because the memory has been allocated using std::m...
CPP_UTILITIES_EXPORT std::string encodeBase64(const byte *data, uint32 dataSize)
Encodes the specified data to Base64.
std::pair< std::unique_ptr< char[], StringDataDeleter >, std::size_t > StringData
Type used to return string encoding conversion result.
IntegralType stringToNumber(const StringType &string, typename StringType::value_type base=10)
Converts the given string to an unsigned number assuming string uses the specified base.
Container splitStringSimple(const typename Container::value_type &string, const typename Container::value_type &delimiter, int maxParts=-1)
Splits the given string (which might also be a string view) at the specified delimiter.
#define CPP_UTILITIES_EXPORT
bool containsSubstrings(const StringType &str, std::initializer_list< StringType > substrings)
Returns whether str contains the specified substrings.
The ConversionException class is thrown by the various conversion functions of this library when a co...
std::vector< std::string > toArrayOfLines(const std::string &multilineString)
Converts the specified multilineString to an array of lines.
CPP_UTILITIES_EXPORT StringData convertUtf16BEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (big-endian) string to UTF-8.
CPP_UTILITIES_EXPORT void truncateString(std::string &str, char terminationChar='\0')
Truncates all characters after the first occurrence of the specified terminationChar and the terminat...
StringType numberToString(IntegralType number, typename StringType::value_type base=10)
Converts the given number to its equivalent string representation using the specified base.
std::uint64_t uint64
unsigned 64-bit integer
Definition: types.h:49
The StringDataDeleter struct deletes the data of a StringData instance.
typename std::enable_if< All< Condition... >::value, Detail::Enabler >::type EnableIf
Shortcut for std::enable_if to omit ::value and ::type.
Definition: traits.h:48
std::string interpretIntegerAsString(T integer, int startOffset=0)
Interprets the given integer at the specified position as std::string using the specified byte order.
CPP_UTILITIES_EXPORT StringData convertUtf8ToUtf16BE(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to UTF-16 (big-endian).
EmptyPartsTreat
Specifies the role of empty parts when splitting strings.
constexpr int i
CPP_UTILITIES_EXPORT StringData convertUtf8ToLatin1(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-8 string to Latin-1.
void findAndReplace(StringType &str, const StringType &find, const StringType &replace)
Replaces all occurences of find with relpace in the specified str.
Contains several functions providing conversions between different data types.
std::uint32_t uint32
unsigned 32-bit integer
Definition: types.h:44
IntegralType bufferToNumber(const CharType *string, std::size_t size, unsigned char base=10)
Converts the given string of size characters to an unsigned numeric value using the specified base.
CPP_UTILITIES_EXPORT StringData convertUtf16LEToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified UTF-16 (little-endian) string to UTF-8.
CharType charToDigit(CharType character, CharType base)
Returns number/digit of the specified character representation using the specified base.
Container::value_type joinStrings(const Container &strings, const typename Container::value_type &delimiter=typename Container::value_type(), bool omitEmpty=false, const typename Container::value_type &leftClosure=typename Container::value_type(), const typename Container::value_type &rightClosure=typename Container::value_type())
Joins the given strings using the specified delimiter.
CPP_UTILITIES_EXPORT StringData convertString(const char *fromCharset, const char *toCharset, const char *inputBuffer, std::size_t inputBufferSize, float outputBufferSizeFactor=1.0f)
Converts the specified string from one character set to another.
CPP_UTILITIES_EXPORT StringData convertLatin1ToUtf8(const char *inputBuffer, std::size_t inputBufferSize)
Converts the specified Latin-1 string to UTF-8.
std::vector< std::string > toMultiline(const Container &arrayOfLines)
Converts the specified arrayOfLines to a multiline string.
Container splitString(const typename Container::value_type &string, const typename Container::value_type &delimiter, EmptyPartsTreat emptyPartsRole=EmptyPartsTreat::Keep, int maxParts=-1)
Splits the given string at the specified delimiter.
CPP_UTILITIES_EXPORT std::string dataSizeToString(uint64 sizeInByte, bool includeByte=false)
Converts the specified data size in byte to its equivalent std::string representation.
CPP_UTILITIES_EXPORT std::pair< std::unique_ptr< byte[]>, uint32 > decodeBase64(const char *encodedStr, const uint32 strSize)
Decodes the specified Base64 encoded string.
constexpr CharType digitToChar(CharType digit)
Returns the character representation of the specified digit.
CPP_UTILITIES_EXPORT std::string bitrateToString(double speedInKbitsPerSecond, bool useByteInsteadOfBits=false)
Converts the specified bitrate in kbit/s to its equivalent std::string representation.
CPP_UTILITIES_EXPORT void getBytes(int16 value, char *outputbuffer)
Stores the specified 16-bit signed integer value at a specified position in a char array.