ROSE  0.11.51.0
Rose/StringUtility.h
1 #ifndef ROSE_StringUtility_H
2 #define ROSE_StringUtility_H
3 
4 #include "commandline_processing.h"
5 
6 #include <Rose/BitOps.h>
7 #include <boost/algorithm/string/erase.hpp>
8 #include <boost/algorithm/string/predicate.hpp>
9 #include <boost/algorithm/string/replace.hpp>
10 #include <limits.h>
11 #include <map>
12 #include <Rose/Constants.h>
13 #include <sstream>
14 #include <stdint.h>
15 #include <string>
16 #include <vector>
17 #include <Sawyer/IntervalSet.h>
18 
19 #if ROSE_MICROSOFT_OS
20 // This is the boost solution for lack of support for stdint.h (e.g. types such as "uint64_t")
21 #include <msvc_stdint.h>
22 #endif
23 
24 namespace Rose {
25 
30 namespace StringUtility {
31 
32 
33 
34 
36 // Character-escaping functions
38 
42 ROSE_UTIL_API std::string htmlEscape(const std::string&);
43 
56 ROSE_UTIL_API std::string cEscape(const std::string&, char context = '"');
57 ROSE_UTIL_API std::string cEscape(char, char context = '\'');
68 ROSE_UTIL_API std::string cUnescape(const std::string&);
69 
73 ROSE_UTIL_API std::string bourneEscape(const std::string&);
74 
78 ROSE_UTIL_API std::string yamlEscape(const std::string&);
79 
80 // [Robb Matzke 2016-05-06]: I am deprecating escapeNewLineCharaters because:
81 // 1. Its name is spelled wrong: "Charater"
82 // 2. "newline" in this context is a single word and should be capitalized as "Newline" not "NewLine"
83 // 3. Its name is misleading because it also escapes double quotes.
84 // 4. It escapes newlines using "l" rather than the more customary "n".
85 // I would just remove it, but it seems to be used in some projects and the tutorial.
86 
91 ROSE_UTIL_API std::string escapeNewLineCharaters(const std::string&);
92 
93 // DQ (12/8/2016): This is ued in the generation of dot files.
94 ROSE_UTIL_API std::string escapeNewlineAndDoubleQuoteCharacters(const std::string&);
95 
96 
97 
98 
100 // Splitting and joining strings
102 
117 ROSE_UTIL_API std::vector<std::string> split(const std::string &separator, const std::string &str, size_t maxparts = UNLIMITED,
118  bool trim_white_space = false);
119 ROSE_UTIL_API std::vector<std::string> split(char separator, const std::string &str, size_t maxparts = UNLIMITED,
120  bool trim_white_space = false);
127 ROSE_UTIL_API std::list<std::string> tokenize(const std::string&, char delim);
128 
136 template<class Iterator>
137 std::string join_range(const std::string &separator, Iterator begin, Iterator end) {
138  std::ostringstream retval;
139  for (Iterator i=begin; i!=end; ++i)
140  retval <<(i==begin ? std::string() : separator) <<*i;
141  return retval.str();
142 }
143 
144 template<class Container>
145 std::string join(const std::string &separator, const Container &container) {
146  return join_range(separator, container.begin(), container.end());
147 }
148 
149 template<class Container>
150 std::string join(char separator, const Container &container) {
151  return join_range(std::string(1, separator), container.begin(), container.end());
152 }
153 
154 ROSE_UTIL_API std::string join(const std::string &separator, char *strings[], size_t nstrings);
155 ROSE_UTIL_API std::string join(const std::string &separator, const char *strings[], size_t nstrings);
156 ROSE_UTIL_API std::string join(char separator, char *strings[], size_t nstrings);
157 ROSE_UTIL_API std::string join(char separator, const char *strings[], size_t nstrings);
158 
180 ROSE_UTIL_API std::string joinEnglish(const std::vector<std::string> &phrases,
181  const std::string &separator = ",",
182  const std::string &finalIntro = "and");
183 
184 
185 
187 // Container versus scalar functions
188 //
189 // Functions that convert containers of things to a string and vice versa.
191 
197 ROSE_UTIL_API std::string listToString(const std::list<int>&, bool separateStrings = false);
198 
205 ROSE_UTIL_API std::string listToString(const std::list<std::string>&, bool separateStrings = false);
206 ROSE_UTIL_API std::string listToString(const std::vector<std::string>&, bool separateStrings = false);
217 ROSE_UTIL_API std::list<std::string> stringToList(const std::string&);
218 
223 ROSE_UTIL_API void splitStringIntoStrings(const std::string& inputString, char separator, std::vector<std::string>& stringList);
224 
225 
226 
227 
229 // Functions for converting numbers to strings
231 
237 ROSE_UTIL_API std::string numberToString(long long);
238 ROSE_UTIL_API std::string numberToString(unsigned long long);
239 ROSE_UTIL_API std::string numberToString(long);
240 ROSE_UTIL_API std::string numberToString(unsigned long);
241 ROSE_UTIL_API std::string numberToString(int);
242 ROSE_UTIL_API std::string numberToString(unsigned int);
243 
244 #if !defined(_MSC_VER) && \
245  defined(BACKEND_CXX_IS_GNU_COMPILER) && \
246  ((BACKEND_CXX_COMPILER_MAJOR_VERSION_NUMBER == 4 && BACKEND_CXX_COMPILER_MINOR_VERSION_NUMBER > 6) || \
247  BACKEND_CXX_COMPILER_MAJOR_VERSION_NUMBER > 4) && \
248  __WORDSIZE == 64
249 ROSE_UTIL_API std::string numberToString(__int128 x);
250 ROSE_UTIL_API std::string numberToString(unsigned __int128 x);
251 #endif
252 
256 ROSE_UTIL_API std::string numberToString(const void*);
257 
261 ROSE_UTIL_API std::string numberToString(double);
262 
264 ROSE_UTIL_API std::string intToHex(uint64_t);
265 
275 ROSE_UTIL_API std::string toHex2(uint64_t value, size_t nbits,
276  bool show_unsigned_decimal=true, bool show_signed_decimal=true,
277  uint64_t decimal_threshold=256);
278 ROSE_UTIL_API std::string signedToHex2(uint64_t value, size_t nbits);
279 ROSE_UTIL_API std::string unsignedToHex2(uint64_t value, size_t nbits);
280 
281 template<typename T> std::string toHex(T value) { return toHex2((uint64_t)value, 8*sizeof(T)); }
282 template<typename T> std::string signedToHex(T value) { return signedToHex2((uint64_t)value, 8*sizeof(T)); }
283 template<typename T> std::string unsignedToHex(T value) { return unsignedToHex2((uint64_t)value, 8*sizeof(T)); }
291 ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits = 0);
292 
297 ROSE_UTIL_API std::string addrToString(const Sawyer::Container::Interval<uint64_t> &interval, size_t nbits = 0);
298 
304  size_t nbits = 0);
305 
306 
308 template<typename Unsigned>
309 std::string toBinary(Unsigned value, size_t nBits = 0, size_t groupSize = 4, const std::string groupSeparator = "_") {
310  if (0 == nBits)
311  nBits = BitOps::nBits(value);
312  std::string retval;
313  for (size_t i = nBits; i > 0; --i) {
314  retval += BitOps::bit(value, i-1) ? '1' : '0';
315  if (groupSize > 0 && i > 1 && (i-1) % groupSize == 0)
316  retval += groupSeparator;
317  }
318  return retval;
319 }
320 
322 // Number parsing
324 
329 ROSE_UTIL_API unsigned hexadecimalToInt(char);
330 
360 template<class Container, class Stringifier>
361 std::vector<std::string> toStrings(const Container &numbers, const Stringifier &stringifier=numberToString) {
362  return toStrings_range(numbers.begin(), numbers.end(), stringifier);
363 }
364 template<class Iterator, class Stringifier>
365 std::vector<std::string> toStrings_range(Iterator begin, Iterator end, const Stringifier &stringifier=numberToString) {
366  std::vector<std::string> retval;
367  for (/*void*/; begin!=end; ++begin)
368  retval.push_back(stringifier(*begin));
369  return retval;
370 }
376 // String conversion functions
379 
383 ROSE_UTIL_API std::string convertToLowerCase(const std::string &inputString);
384 
391 ROSE_UTIL_API std::string fixLineTermination(const std::string &input);
392 
400 ROSE_UTIL_API std::string prefixLines(const std::string &lines, const std::string &prefix,
401  bool prefixAtFront=true, bool prefixAtBack=false);
402 
412 ROSE_UTIL_API std::string makeOneLine(const std::string &s, std::string replacement=" ");
413 
417 ROSE_UTIL_API std::string trim(const std::string &str, const std::string &strip=" \t\r\n",
418  bool at_beginning=true, bool at_end=true);
419 
421 ROSE_UTIL_API std::string untab(const std::string &str, size_t tabstops=8, size_t firstcol=0);
422 
432 ROSE_UTIL_API std::string removeRedundantSubstrings(const std::string&);
433 
438 ROSE_UTIL_API std::string removeAnsiEscapes(const std::string&);
439 
441 // Functions for string encoding/decoding/hashing
443 
450 ROSE_UTIL_API std::string encode_base64(const std::vector<uint8_t> &data, bool do_pad=true);
451 ROSE_UTIL_API std::string encode_base64(const uint8_t *data, size_t nbytes, bool do_padd=true);
455 ROSE_UTIL_API std::vector<uint8_t> decode_base64(const std::string &encoded);
456 
464 ROSE_UTIL_API unsigned long generate_checksum(std::string s);
465 
466 
467 
468 
470 // Predicates
472 
476 inline bool isContainedIn(const std::string &longString, const std::string &shortString) {
477  return longString.find(shortString) != std::string::npos;
478 }
479 
483 ROSE_UTIL_API bool isLineTerminated(const std::string &s);
484 
485 
486 
487 
489 // Functions related to diagnostic messages
491 
493 ROSE_UTIL_API std::string indentMultilineString(const std::string& inputString, int statementColumnNumber);
494 
496 ROSE_UTIL_API void add_to_reason_string(std::string &result, bool isset, bool do_pad,
497  const std::string &abbr, const std::string &full);
498 
499 
514 ROSE_UTIL_API std::string appendAsmComment(const std::string &s, const std::string &comment);
515 
538 template<typename T>
539 std::string plural(T n, const std::string &plural_phrase, const std::string &singular_phrase="") {
540  assert(!plural_phrase.empty());
541  std::string retval = numberToString(n) + " ";
542  if (1==n) {
543  if (!singular_phrase.empty()) {
544  retval += singular_phrase;
545  } else if (boost::ends_with(plural_phrase, "vertices")) {
546  retval += boost::replace_tail_copy(plural_phrase, 8, "vertex");
547  } else if (boost::ends_with(plural_phrase, "indices")) {
548  retval += boost::replace_tail_copy(plural_phrase, 7, "index");
549  } else if (boost::ends_with(plural_phrase, "ies") && plural_phrase.size() > 3) {
550  // string ends with "ies", as in "parties", so emit "party" instead
551  retval += boost::replace_tail_copy(plural_phrase, 3, "y");
552  } else if (boost::ends_with(plural_phrase, "sses") || boost::ends_with(plural_phrase, "indexes")) {
553  // Sometimes we need to drop an "es" rather than just the "s"
554  retval += boost::erase_tail_copy(plural_phrase, 2);
555  } else if (boost::ends_with(plural_phrase, "s") && plural_phrase.size() > 1) {
556  // strings ends with "s", as in "runners", so drop the final "s" to get "runner"
557  retval += boost::erase_tail_copy(plural_phrase, 1);
558  } else {
559  // I give up. Use the plural and risk being grammatically incorrect.
560  retval += plural_phrase;
561  }
562  } else {
563  retval += plural_phrase;
564  }
565  return retval;
566 }
567 
568 // demangledName is defined in rose_support.cpp
574 std::string demangledName(std::string);
575 
576 
577 } // namespace
578 } // namespace
579 
580 
581 #endif
ROSE_UTIL_API std::string intToHex(uint64_t)
Convert an integer to a hexadecimal string.
std::string demangledName(std::string)
Compute demangled version of mangled name.
ROSE_UTIL_API std::string signedToHex2(uint64_t value, size_t nbits)
Convert a number to a hexadecimal and decimal string.
ROSE_UTIL_API std::string cUnescape(const std::string &)
Unescapes C++ string literals.
ROSE_UTIL_API std::vector< std::string > split(const std::string &separator, const std::string &str, size_t maxparts=UNLIMITED, bool trim_white_space=false)
Splits strings into parts.
ROSE_UTIL_API std::string numberToString(long long)
Convert an integer to a string.
std::string toBinary(Unsigned value, size_t nBits=0, size_t groupSize=4, const std::string groupSeparator="_")
Convert a number to a binary string.
const size_t UNLIMITED(-1)
Effictively unlimited size.
std::string toHex(T value)
Convert a number to a hexadecimal and decimal string.
std::string plural(T n, const std::string &plural_phrase, const std::string &singular_phrase="")
Helpful way to print singular or plural words.
ROSE_UTIL_API std::string removeRedundantSubstrings(const std::string &)
Remove redundant and blank lines.
size_t nBits(Unsigned x=Unsigned(0))
Number of bits in a type or value.
Definition: Rose/BitOps.h:17
std::vector< std::string > toStrings_range(Iterator begin, Iterator end, const Stringifier &stringifier=numberToString)
Converts a bunch of numbers to strings.
ROSE_UTIL_API std::string encode_base64(const std::vector< uint8_t > &data, bool do_pad=true)
Convert binary data to base-64.
Main namespace for the ROSE library.
ROSE_UTIL_API std::string cEscape(const std::string &, char context= '"')
Escapes characters that are special to C/C++.
ROSE_UTIL_API std::string prefixLines(const std::string &lines, const std::string &prefix, bool prefixAtFront=true, bool prefixAtBack=false)
Insert a prefix string before every line.
A container holding a set of values.
Definition: IntervalSet.h:55
ROSE_UTIL_API std::list< std::string > stringToList(const std::string &)
Split a string into substrings at line feeds.
ROSE_UTIL_API std::vector< uint8_t > decode_base64(const std::string &encoded)
Convert base-64 to binary.
std::string join_range(const std::string &separator, Iterator begin, Iterator end)
Join individual items to form a single string.
ROSE_UTIL_API bool isLineTerminated(const std::string &s)
Returns true if the string ends with line termination.
ROSE_UTIL_API std::list< std::string > tokenize(const std::string &, char delim)
Split a string into a list based on a separator character.
ROSE_UTIL_API void splitStringIntoStrings(const std::string &inputString, char separator, std::vector< std::string > &stringList)
Splits string into substring based on a separator character.
ROSE_UTIL_API std::string unsignedToHex2(uint64_t value, size_t nbits)
Convert a number to a hexadecimal and decimal string.
ROSE_UTIL_API std::string bourneEscape(const std::string &)
Escapes characters that are special to the Bourne shell.
ROSE_UTIL_API std::string appendAsmComment(const std::string &s, const std::string &comment)
Append an assembly comment to a string.
ROSE_UTIL_API std::string untab(const std::string &str, size_t tabstops=8, size_t firstcol=0)
Expand horizontal tab characters.
ROSE_UTIL_API std::string convertToLowerCase(const std::string &inputString)
Convert to lower case.
ROSE_UTIL_API unsigned hexadecimalToInt(char)
Convert an ASCII hexadecimal character to an integer.
std::vector< std::string > toStrings(const Container &numbers, const Stringifier &stringifier=numberToString)
Converts a bunch of numbers to strings.
ROSE_UTIL_API std::string htmlEscape(const std::string &)
Escapes HTML special characters.
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
ROSE_UTIL_API std::string makeOneLine(const std::string &s, std::string replacement=" ")
Converts a multi-line string to a single line.
ROSE_UTIL_API void add_to_reason_string(std::string &result, bool isset, bool do_pad, const std::string &abbr, const std::string &full)
Append an abbreviation or full name to a string.
ROSE_UTIL_API std::string trim(const std::string &str, const std::string &strip=" \t\r\n", bool at_beginning=true, bool at_end=true)
Trims white space from the beginning and end of a string.
ROSE_UTIL_API unsigned long generate_checksum(std::string s)
Compute a checkshum.
ROSE_UTIL_API std::string listToString(const std::list< int > &, bool separateStrings=false)
Generate a string from a list of integers.
ROSE_UTIL_API std::string fixLineTermination(const std::string &input)
Normalizes line termination.
std::string unsignedToHex(T value)
Convert a number to a hexadecimal and decimal string.
std::string join(const std::string &separator, const Container &container)
Join individual items to form a single string.
bool bit(Unsigned src, size_t i)
Extract a single bit.
Definition: Rose/BitOps.h:172
Range of values delimited by endpoints.
Definition: Interval.h:33
ROSE_UTIL_API std::string yamlEscape(const std::string &)
Escapes characters that are special to YAML strings.
std::string signedToHex(T value)
Convert a number to a hexadecimal and decimal string.
Shortens names of int64_t stringifiers.
Definition: stringify.h:20
ROSE_UTIL_API std::string removeAnsiEscapes(const std::string &)
Remove ANSI escape characters.
ROSE_UTIL_API std::string escapeNewLineCharaters(const std::string &)
Escapes line feeds and double quotes.
ROSE_UTIL_API std::string indentMultilineString(const std::string &inputString, int statementColumnNumber)
Formatting support for generated code strings.
ROSE_UTIL_API std::string toHex2(uint64_t value, size_t nbits, bool show_unsigned_decimal=true, bool show_signed_decimal=true, uint64_t decimal_threshold=256)
Convert a number to a hexadecimal and decimal string.
ROSE_UTIL_API std::string joinEnglish(const std::vector< std::string > &phrases, const std::string &separator=",", const std::string &finalIntro="and")
Join strings as if they were English prose.
bool isContainedIn(const std::string &longString, const std::string &shortString)
Determines whether one string contains another.