ROSE  0.9.13.81
StringUtility.h
1 #ifndef ROSE_StringUtility_H
2 #define ROSE_StringUtility_H
3 
4 #include "commandline_processing.h"
5 
6 #include <boost/algorithm/string/erase.hpp>
7 #include <boost/algorithm/string/predicate.hpp>
8 #include <boost/algorithm/string/replace.hpp>
9 #include <limits.h>
10 #include <map>
11 #include <rose_constants.h>
12 #include <sstream>
13 #include <stdint.h>
14 #include <string>
15 #include <vector>
16 #include <Sawyer/IntervalSet.h>
17 
18 #if ROSE_MICROSOFT_OS
19 // This is the boost solution for lack of support for stdint.h (e.g. types such as "uint64_t")
20 #include <msvc_stdint.h>
21 #endif
22 
23 namespace Rose {
24 
29 namespace StringUtility {
30 
31 
32 
33 
35 // Character-escaping functions
37 
41 ROSE_UTIL_API std::string htmlEscape(const std::string&);
42 
55 ROSE_UTIL_API std::string cEscape(const std::string&, char context = '"');
56 ROSE_UTIL_API std::string cEscape(char, char context = '\'');
67 ROSE_UTIL_API std::string cUnescape(const std::string&);
68 
72 ROSE_UTIL_API std::string bourneEscape(const std::string&);
73 
74 // [Robb Matzke 2016-05-06]: I am deprecating escapeNewLineCharaters because:
75 // 1. Its name is spelled wrong: "Charater"
76 // 2. "newline" in this context is a single word and should be capitalized as "Newline" not "NewLine"
77 // 3. Its name is misleading because it also escapes double quotes.
78 // 4. It escapes newlines using "l" rather than the more customary "n".
79 // I would just remove it, but it seems to be used in some projects and the tutorial.
80 
85 ROSE_UTIL_API std::string escapeNewLineCharaters(const std::string&);
86 
87 // DQ (12/8/2016): This is ued in the generation of dot files.
88 ROSE_UTIL_API std::string escapeNewlineAndDoubleQuoteCharacters(const std::string&);
89 
90 
91 
92 
94 // Splitting and joining strings
96 
111 ROSE_UTIL_API std::vector<std::string> split(const std::string &separator, const std::string &str, size_t maxparts = UNLIMITED,
112  bool trim_white_space = false);
113 ROSE_UTIL_API std::vector<std::string> split(char separator, const std::string &str, size_t maxparts = UNLIMITED,
114  bool trim_white_space = false);
121 ROSE_UTIL_API std::list<std::string> tokenize(const std::string&, char delim);
122 
130 template<class Iterator>
131 std::string join_range(const std::string &separator, Iterator begin, Iterator end) {
132  std::ostringstream retval;
133  for (Iterator i=begin; i!=end; ++i)
134  retval <<(i==begin ? std::string() : separator) <<*i;
135  return retval.str();
136 }
137 
138 template<class Container>
139 std::string join(const std::string &separator, const Container &container) {
140  return join_range(separator, container.begin(), container.end());
141 }
142 
143 template<class Container>
144 std::string join(char separator, const Container &container) {
145  return join_range(std::string(1, separator), container.begin(), container.end());
146 }
147 
148 ROSE_UTIL_API std::string join(const std::string &separator, char *strings[], size_t nstrings);
149 ROSE_UTIL_API std::string join(const std::string &separator, const char *strings[], size_t nstrings);
150 ROSE_UTIL_API std::string join(char separator, char *strings[], size_t nstrings);
151 ROSE_UTIL_API std::string join(char separator, const char *strings[], size_t nstrings);
152 
174 ROSE_UTIL_API std::string joinEnglish(const std::vector<std::string> &phrases,
175  const std::string &separator = ",",
176  const std::string &finalIntro = "and");
177 
178 
179 
181 // Container versus scalar functions
182 //
183 // Functions that convert containers of things to a string and vice versa.
185 
191 ROSE_UTIL_API std::string listToString(const std::list<int>&, bool separateStrings = false);
192 
199 ROSE_UTIL_API std::string listToString(const std::list<std::string>&, bool separateStrings = false);
200 ROSE_UTIL_API std::string listToString(const std::vector<std::string>&, bool separateStrings = false);
211 ROSE_UTIL_API std::list<std::string> stringToList(const std::string&);
212 
217 ROSE_UTIL_API void splitStringIntoStrings(const std::string& inputString, char separator, std::vector<std::string>& stringList);
218 
219 
220 
221 
223 // Functions for converting numbers to strings
225 
231 ROSE_UTIL_API std::string numberToString(long long);
232 ROSE_UTIL_API std::string numberToString(unsigned long long);
233 ROSE_UTIL_API std::string numberToString(long);
234 ROSE_UTIL_API std::string numberToString(unsigned long);
235 ROSE_UTIL_API std::string numberToString(int);
236 ROSE_UTIL_API std::string numberToString(unsigned int);
237 
238 #if !defined(_MSC_VER) && \
239  defined(BACKEND_CXX_IS_GNU_COMPILER) && \
240  ((BACKEND_CXX_COMPILER_MAJOR_VERSION_NUMBER == 4 && BACKEND_CXX_COMPILER_MINOR_VERSION_NUMBER > 6) || \
241  BACKEND_CXX_COMPILER_MAJOR_VERSION_NUMBER > 4) && \
242  __WORDSIZE == 64
243 ROSE_UTIL_API std::string numberToString(__int128 x);
244 ROSE_UTIL_API std::string numberToString(unsigned __int128 x);
245 #endif
246 
250 ROSE_UTIL_API std::string numberToString(const void*);
251 
255 ROSE_UTIL_API std::string numberToString(double);
256 
258 ROSE_UTIL_API std::string intToHex(uint64_t);
259 
269 ROSE_UTIL_API std::string toHex2(uint64_t value, size_t nbits,
270  bool show_unsigned_decimal=true, bool show_signed_decimal=true,
271  uint64_t decimal_threshold=256);
272 ROSE_UTIL_API std::string signedToHex2(uint64_t value, size_t nbits);
273 ROSE_UTIL_API std::string unsignedToHex2(uint64_t value, size_t nbits);
274 
275 template<typename T> std::string toHex(T value) { return toHex2((uint64_t)value, 8*sizeof(T)); }
276 template<typename T> std::string signedToHex(T value) { return signedToHex2((uint64_t)value, 8*sizeof(T)); }
277 template<typename T> std::string unsignedToHex(T value) { return unsignedToHex2((uint64_t)value, 8*sizeof(T)); }
285 ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits = 0);
286 
291 ROSE_UTIL_API std::string addrToString(const Sawyer::Container::Interval<uint64_t> &interval, size_t nbits = 0);
292 
298  size_t nbits = 0);
299 
300 
301 
302 
304 // Number parsing
306 
311 ROSE_UTIL_API unsigned hexadecimalToInt(char);
312 
342 template<class Container, class Stringifier>
343 std::vector<std::string> toStrings(const Container &numbers, const Stringifier &stringifier=numberToString) {
344  return toStrings_range(numbers.begin(), numbers.end(), stringifier);
345 }
346 template<class Iterator, class Stringifier>
347 std::vector<std::string> toStrings_range(Iterator begin, Iterator end, const Stringifier &stringifier=numberToString) {
348  std::vector<std::string> retval;
349  for (/*void*/; begin!=end; ++begin)
350  retval.push_back(stringifier(*begin));
351  return retval;
352 }
358 // String conversion functions
361 
365 ROSE_UTIL_API std::string convertToLowerCase(const std::string &inputString);
366 
373 ROSE_UTIL_API std::string fixLineTermination(const std::string &input);
374 
382 ROSE_UTIL_API std::string prefixLines(const std::string &lines, const std::string &prefix,
383  bool prefixAtFront=true, bool prefixAtBack=false);
384 
394 ROSE_UTIL_API std::string makeOneLine(const std::string &s, std::string replacement=" ");
395 
399 ROSE_UTIL_API std::string trim(const std::string &str, const std::string &strip=" \t\r\n",
400  bool at_beginning=true, bool at_end=true);
401 
403 ROSE_UTIL_API std::string untab(const std::string &str, size_t tabstops=8, size_t firstcol=0);
404 
414 ROSE_UTIL_API std::string removeRedundantSubstrings(const std::string&);
415 
416 
417 
419 // Functions for string encoding/decoding/hashing
421 
428 ROSE_UTIL_API std::string encode_base64(const std::vector<uint8_t> &data, bool do_pad=true);
429 ROSE_UTIL_API std::string encode_base64(const uint8_t *data, size_t nbytes, bool do_padd=true);
433 ROSE_UTIL_API std::vector<uint8_t> decode_base64(const std::string &encoded);
434 
442 ROSE_UTIL_API unsigned long generate_checksum(std::string s);
443 
444 
445 
446 
448 // Predicates
450 
454 inline bool isContainedIn(const std::string &longString, const std::string &shortString) {
455  return longString.find(shortString) != std::string::npos;
456 }
457 
461 ROSE_UTIL_API bool isLineTerminated(const std::string &s);
462 
463 
464 
465 
467 // Functions related to diagnostic messages
469 
471 ROSE_UTIL_API std::string indentMultilineString(const std::string& inputString, int statementColumnNumber);
472 
474 ROSE_UTIL_API void add_to_reason_string(std::string &result, bool isset, bool do_pad,
475  const std::string &abbr, const std::string &full);
476 
477 
492 ROSE_UTIL_API std::string appendAsmComment(const std::string &s, const std::string &comment);
493 
516 template<typename T>
517 std::string plural(T n, const std::string &plural_phrase, const std::string &singular_phrase="") {
518  assert(!plural_phrase.empty());
519  std::string retval = numberToString(n) + " ";
520  if (1==n) {
521  if (!singular_phrase.empty()) {
522  retval += singular_phrase;
523  } else if (boost::ends_with(plural_phrase, "vertices")) {
524  retval += boost::replace_tail_copy(plural_phrase, 8, "vertex");
525  } else if (boost::ends_with(plural_phrase, "indices")) {
526  retval += boost::replace_tail_copy(plural_phrase, 7, "index");
527  } else if (boost::ends_with(plural_phrase, "ies") && plural_phrase.size() > 3) {
528  // string ends with "ies", as in "parties", so emit "party" instead
529  retval += boost::replace_tail_copy(plural_phrase, 3, "y");
530  } else if (boost::ends_with(plural_phrase, "sses") || boost::ends_with(plural_phrase, "indexes")) {
531  // Sometimes we need to drop an "es" rather than just the "s"
532  retval += boost::erase_tail_copy(plural_phrase, 2);
533  } else if (boost::ends_with(plural_phrase, "s") && plural_phrase.size() > 1) {
534  // strings ends with "s", as in "runners", so drop the final "s" to get "runner"
535  retval += boost::erase_tail_copy(plural_phrase, 1);
536  } else {
537  // I give up. Use the plural and risk being grammatically incorrect.
538  retval += plural_phrase;
539  }
540  } else {
541  retval += plural_phrase;
542  }
543  return retval;
544 }
545 
546 // demangledName is defined in rose_support.cpp
552 std::string demangledName(std::string);
553 
554 
555 } // namespace
556 } // namespace
557 
558 
559 #endif
ROSE_UTIL_API std::string intToHex(uint64_t)
Convert an integer to a hexadecimal string.
std::string demangledName(std::string)
Compute demangled version of mangled name.
ROSE_UTIL_API std::string signedToHex2(uint64_t value, size_t nbits)
Convert a number to a hexadecimal and decimal string.
ROSE_UTIL_API std::string cUnescape(const std::string &)
Unescapes C++ string literals.
ROSE_UTIL_API std::vector< std::string > split(const std::string &separator, const std::string &str, size_t maxparts=UNLIMITED, bool trim_white_space=false)
Splits strings into parts.
ROSE_UTIL_API std::string numberToString(long long)
Convert an integer to a string.
const size_t UNLIMITED(-1)
Effictively unlimited size.
std::string toHex(T value)
Convert a number to a hexadecimal and decimal string.
std::string plural(T n, const std::string &plural_phrase, const std::string &singular_phrase="")
Helpful way to print singular or plural words.
ROSE_UTIL_API std::string removeRedundantSubstrings(const std::string &)
Remove redundant and blank lines.
std::vector< std::string > toStrings_range(Iterator begin, Iterator end, const Stringifier &stringifier=numberToString)
Converts a bunch of numbers to strings.
ROSE_UTIL_API std::string encode_base64(const std::vector< uint8_t > &data, bool do_pad=true)
Convert binary data to base-64.
Main namespace for the ROSE library.
ROSE_UTIL_API std::string cEscape(const std::string &, char context= '"')
Escapes characters that are special to C/C++.
ROSE_UTIL_API std::string prefixLines(const std::string &lines, const std::string &prefix, bool prefixAtFront=true, bool prefixAtBack=false)
Insert a prefix string before every line.
A container holding a set of values.
Definition: IntervalSet.h:55
ROSE_UTIL_API std::list< std::string > stringToList(const std::string &)
Split a string into substrings at line feeds.
ROSE_UTIL_API std::vector< uint8_t > decode_base64(const std::string &encoded)
Convert base-64 to binary.
std::string join_range(const std::string &separator, Iterator begin, Iterator end)
Join individual items to form a single string.
ROSE_UTIL_API bool isLineTerminated(const std::string &s)
Returns true if the string ends with line termination.
ROSE_UTIL_API std::list< std::string > tokenize(const std::string &, char delim)
Split a string into a list based on a separator character.
ROSE_UTIL_API void splitStringIntoStrings(const std::string &inputString, char separator, std::vector< std::string > &stringList)
Splits string into substring based on a separator character.
ROSE_UTIL_API std::string unsignedToHex2(uint64_t value, size_t nbits)
Convert a number to a hexadecimal and decimal string.
ROSE_UTIL_API std::string bourneEscape(const std::string &)
Escapes characters that are special to the Bourne shell.
ROSE_UTIL_API std::string appendAsmComment(const std::string &s, const std::string &comment)
Append an assembly comment to a string.
ROSE_UTIL_API std::string untab(const std::string &str, size_t tabstops=8, size_t firstcol=0)
Expand horizontal tab characters.
ROSE_UTIL_API std::string convertToLowerCase(const std::string &inputString)
Convert to lower case.
ROSE_UTIL_API unsigned hexadecimalToInt(char)
Convert an ASCII hexadecimal character to an integer.
std::vector< std::string > toStrings(const Container &numbers, const Stringifier &stringifier=numberToString)
Converts a bunch of numbers to strings.
ROSE_UTIL_API std::string htmlEscape(const std::string &)
Escapes HTML special characters.
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
ROSE_UTIL_API std::string makeOneLine(const std::string &s, std::string replacement=" ")
Converts a multi-line string to a single line.
ROSE_UTIL_API void add_to_reason_string(std::string &result, bool isset, bool do_pad, const std::string &abbr, const std::string &full)
Append an abbreviation or full name to a string.
ROSE_UTIL_API std::string trim(const std::string &str, const std::string &strip=" \t\r\n", bool at_beginning=true, bool at_end=true)
Trims white space from the beginning and end of a string.
ROSE_UTIL_API unsigned long generate_checksum(std::string s)
Compute a checkshum.
ROSE_UTIL_API std::string listToString(const std::list< int > &, bool separateStrings=false)
Generate a string from a list of integers.
ROSE_UTIL_API std::string fixLineTermination(const std::string &input)
Normalizes line termination.
std::string unsignedToHex(T value)
Convert a number to a hexadecimal and decimal string.
std::string join(const std::string &separator, const Container &container)
Join individual items to form a single string.
Range of values delimited by endpoints.
Definition: Interval.h:33
std::string signedToHex(T value)
Convert a number to a hexadecimal and decimal string.
Shortens names of int64_t stringifiers.
Definition: stringify.h:20
ROSE_UTIL_API std::string escapeNewLineCharaters(const std::string &)
Escapes line feeds and double quotes.
ROSE_UTIL_API std::string indentMultilineString(const std::string &inputString, int statementColumnNumber)
Formatting support for generated code strings.
ROSE_UTIL_API std::string toHex2(uint64_t value, size_t nbits, bool show_unsigned_decimal=true, bool show_signed_decimal=true, uint64_t decimal_threshold=256)
Convert a number to a hexadecimal and decimal string.
ROSE_UTIL_API std::string joinEnglish(const std::vector< std::string > &phrases, const std::string &separator=",", const std::string &finalIntro="and")
Join strings as if they were English prose.
bool isContainedIn(const std::string &longString, const std::string &shortString)
Determines whether one string contains another.