#include <wireshark.h>
#include <wsutil/wmem/wmem.h>

Macros
#define	FORMAT_SIZE_PREFIX_SI (1 << 0)

#define	FORMAT_SIZE_PREFIX_IEC (1 << 1)

#define	format_size(size, unit, flags) format_size_wmem(NULL, size, unit, flags)

#define	HEXDUMP_ASCII_MASK (0x0003U)

#define	HEXDUMP_ASCII_OPTION(option) ((option) & HEXDUMP_ASCII_MASK)

#define	HEXDUMP_ASCII_INCLUDE (0x0000U) /* include ASCII section no delimiters (legacy tshark behavior) */

#define	HEXDUMP_ASCII_DELIMIT (0x0001U) /* include ASCII section with delimiters, useful for reliable detection of last hexdata */

#define	HEXDUMP_ASCII_EXCLUDE (0x0002U) /* exclude ASCII section from hexdump reports, if we really don't want or need it */

#define	plurality(d, s, p) ((d) == 1 ? (s) : (p))

#define	true_or_false(val) ((val) ? "TRUE" : "FALSE")

#define	string_or_null(val) ((val) ? (val) : "[NULL]")

Enumerations
enum	format_size_units_e { FORMAT_SIZE_UNIT_NONE , FORMAT_SIZE_UNIT_BYTES , FORMAT_SIZE_UNIT_BITS , FORMAT_SIZE_UNIT_BITS_S , FORMAT_SIZE_UNIT_BYTES_S , FORMAT_SIZE_UNIT_PACKETS , FORMAT_SIZE_UNIT_PACKETS_S , FORMAT_SIZE_UNIT_EVENTS , FORMAT_SIZE_UNIT_EVENTS_S , FORMAT_SIZE_UNIT_FIELDS , FORMAT_SIZE_UNIT_SECONDS , FORMAT_SIZE_UNIT_ERLANGS }

enum	hex_dump_enc { HEXDUMP_ENC_ASCII = 0 , HEXDUMP_ENC_EBCDIC = 1 }

Functions
WS_DLL_PUBLIC char *	wmem_strconcat (wmem_allocator_t allocator, const char first,...) G_GNUC_MALLOC G_GNUC_NULL_TERMINATED

WS_DLL_PUBLIC char *	wmem_strjoin (wmem_allocator_t allocator, const char separator, const char *first,...) G_GNUC_MALLOC G_GNUC_NULL_TERMINATED

WS_DLL_PUBLIC char *	wmem_strjoinv (wmem_allocator_t allocator, const char separator, char **str_array) G_GNUC_MALLOC

WS_DLL_PUBLIC char **	wmem_strsplit (wmem_allocator_t allocator, const char src, const char *delimiter, int max_tokens)

WS_DLL_PUBLIC char *	wmem_ascii_strdown (wmem_allocator_t allocator, const char str, ssize_t len)

WS_DLL_PUBLIC char *	ascii_strdown_inplace (char *str)

WS_DLL_PUBLIC char *	ascii_strup_inplace (char *str)

WS_DLL_PUBLIC bool	isprint_string (const char *str)

WS_DLL_PUBLIC bool	isprint_utf8_string (const char *str, const unsigned length)

WS_DLL_PUBLIC bool	isdigit_string (const unsigned char *str)

WS_DLL_PUBLIC const char *	ws_ascii_strcasestr (const char haystack, const char needle)

WS_DLL_PUBLIC const uint8_t *	ws_memrchr (const void *haystack, int ch, size_t n)

WS_DLL_PUBLIC char *	ws_escape_string (wmem_allocator_t alloc, const char string, bool add_quotes)

WS_DLL_PUBLIC char *	ws_escape_string_len (wmem_allocator_t alloc, const char string, ssize_t len, bool add_quotes)

WS_DLL_PUBLIC char *	ws_escape_null (wmem_allocator_t alloc, const char string, size_t len, bool add_quotes)

WS_DLL_PUBLIC char *	ws_escape_csv (wmem_allocator_t alloc, const char string, bool add_quotes, char quote_char, bool double_quote, bool escape_whitespace)

WS_DLL_PUBLIC int	ws_xton (char ch)

WS_DLL_PUBLIC char *	format_units (wmem_allocator_t *allocator, double size, format_size_units_e unit, uint16_t flags, int precision)

WS_DLL_PUBLIC char *	format_size_wmem (wmem_allocator_t *allocator, int64_t size, format_size_units_e unit, uint16_t flags)

WS_DLL_PUBLIC char	printable_char_or_period (char c)

WS_DLL_PUBLIC WS_RETNONNULL const char *	ws_strerrorname_r (int errnum, char *buf, size_t buf_size)

WS_DLL_PUBLIC char *	ws_strdup_underline (wmem_allocator_t *allocator, long offset, size_t len)

WS_DLL_PUBLIC char *	format_text (wmem_allocator_t allocator, const char string, size_t len)

WS_DLL_PUBLIC char *	format_text_string (wmem_allocator_t allocator, const char string)

WS_DLL_PUBLIC char *	format_text_wsp (wmem_allocator_t allocator, const char line, size_t len)

WS_DLL_PUBLIC char *	format_text_chr (wmem_allocator_t allocator, const char string, size_t len, char chr)

WS_DLL_PUBLIC char *	format_char (wmem_allocator_t *allocator, char c)

WS_DLL_PUBLIC char *	ws_utf8_truncate (char *string, size_t len)

WS_DLL_PUBLIC void	EBCDIC_to_ASCII (uint8_t *buf, unsigned bytes)

WS_DLL_PUBLIC uint8_t	EBCDIC_to_ASCII1 (uint8_t c)

WS_DLL_PUBLIC bool	hex_dump_buffer (bool(print_line)(void , const char ), void fp, const unsigned char *cp, unsigned length, hex_dump_enc encoding, unsigned ascii_option)

Detailed Description

String utility definitions

SPDX-License-Identifier: GPL-2.0-or-later

Macro Definition Documentation

◆ FORMAT_SIZE_PREFIX_IEC

#define FORMAT_SIZE_PREFIX_IEC (1 << 1)

IEC (power of 1024) prefixes will be used.

◆ FORMAT_SIZE_PREFIX_SI

#define FORMAT_SIZE_PREFIX_SI (1 << 0)

SI (power of 1000) prefixes will be used.

Enumeration Type Documentation

◆ format_size_units_e

enum format_size_units_e

Enumerator
FORMAT_SIZE_UNIT_NONE	No unit will be appended. You must supply your own.
FORMAT_SIZE_UNIT_BYTES	"bytes" for un-prefixed sizes, "B" otherwise.
FORMAT_SIZE_UNIT_BITS	"bits" for un-prefixed sizes, "b" otherwise.
FORMAT_SIZE_UNIT_BITS_S	"bits/s" for un-prefixed sizes, "bps" otherwise.
FORMAT_SIZE_UNIT_BYTES_S	"bytes/s" for un-prefixed sizes, "Bps" otherwise.
FORMAT_SIZE_UNIT_PACKETS	"packets"
FORMAT_SIZE_UNIT_PACKETS_S	"packets/s"
FORMAT_SIZE_UNIT_EVENTS	"events"
FORMAT_SIZE_UNIT_EVENTS_S	"events/s"
FORMAT_SIZE_UNIT_FIELDS	"fields"
FORMAT_SIZE_UNIT_SECONDS	"seconds" for un-prefixed sizes, "s" otherwise.
FORMAT_SIZE_UNIT_ERLANGS	"erlangs" for un-prefixed sizes, "E" otherwise.

Function Documentation

◆ ascii_strdown_inplace()

WS_DLL_PUBLIC char * ascii_strdown_inplace ( char * str )

Convert all upper-case ASCII letters to their ASCII lower-case equivalents, in place, with a simple non-locale-dependent ASCII mapping (A-Z -> a-z). All other characters are left unchanged, as the mapping to lower case may be locale-dependent.

The string is assumed to be in a character encoding, such as an ISO 8859 or other EUC encoding, or UTF-8, in which all bytes in the range 0x00 through 0x7F are ASCII characters and non-ASCII characters are constructed from one or more bytes in the range 0x80 through 0xFF.

Parameters

str	The string to be lower-cased.

Returns: ptr to the string

◆ ascii_strup_inplace()

WS_DLL_PUBLIC char * ascii_strup_inplace ( char * str )

Convert all lower-case ASCII letters to their ASCII upper-case equivalents, in place, with a simple non-locale-dependent ASCII mapping (a-z -> A-Z). All other characters are left unchanged, as the mapping to lower case may be locale-dependent.

The string is assumed to be in a character encoding, such as an ISO 8859 or other EUC encoding, or UTF-8, in which all bytes in the range 0x00 through 0x7F are ASCII characters and non-ASCII characters are constructed from one or more bytes in the range 0x80 through 0xFF.

Parameters

str	The string to be upper-cased.

Returns: ptr to the string

◆ format_char()

WS_DLL_PUBLIC char * format_char	(	wmem_allocator_t *	allocator,
		char	c
	)

Given a wmem scope and an 8-bit character generate a valid UTF-8 string from it, allocated in the specified wmem scope, that:

shows printable Unicode characters as themselves;

shows non-printable ASCII characters as C-style escapes (hex if not one of the standard ones such as LF -> '
');

and return a pointer to it.

Parameters

allocator	The wmem scope
c	A character to format

Returns: A pointer to the formatted string

◆ format_size_wmem()

WS_DLL_PUBLIC char * format_size_wmem	(	wmem_allocator_t *	allocator,
		int64_t	size,
		format_size_units_e	unit,
		uint16_t	flags
	)

Given a size, return its value in a human-readable format

Prefixes up to "T/Ti" (tera, tebi) are currently supported.

Parameters

size	The size value
flags	Flags to control the output (unit of measurement, SI vs IEC, etc). Unit and prefix flags may be ORed together.

Returns: A newly-allocated string representing the value.

◆ format_text()

WS_DLL_PUBLIC char * format_text	(	wmem_allocator_t *	allocator,
		const char *	string,
		size_t	len
	)

Given a wmem scope, a not-necessarily-null-terminated string, expected to be in UTF-8 but possibly containing invalid sequences (as it may have come from packet data), and the length of the string, generate a valid UTF-8 string from it, allocated in the specified wmem scope, that:

shows printable Unicode characters as themselves;

shows non-printable ASCII characters as C-style escapes (octal if not one of the standard ones such as LF -> '
');

shows non-printable Unicode-but-not-ASCII characters as their universal character names;

Replaces illegal UTF-8 sequences with U+FFFD (replacement character) ;

and return a pointer to it.

Parameters

allocator	The wmem scope
string	A pointer to the input string
len	The length of the input string

Returns: A pointer to the formatted string

See also: tvb_format_text()

◆ format_text_chr()

WS_DLL_PUBLIC char * format_text_chr	(	wmem_allocator_t *	allocator,
		const char *	string,
		size_t	len,
		char	chr
	)

Given a string, generate a string from it that shows non-printable characters as the chr parameter passed, except a whitespace character (space, tab, carriage return, new line, vertical tab, or formfeed) which will be replaced by a space, and return a pointer to it.

This does not treat the input string as UTF-8.

This is useful for displaying binary data that frequently but not always contains text; otherwise the number of C escape codes makes it unreadable.

Parameters

allocator	The wmem scope
string	A pointer to the input string
len	The length of the input string
chr	The character to use to replace non-printable characters

Returns: A pointer to the formatted string

◆ format_text_string()

WS_DLL_PUBLIC char * format_text_string	(	wmem_allocator_t *	allocator,
		const char *	string
	)

Same as format_text() but accepts a nul-terminated string.

Parameters

allocator	The wmem scope
string	A pointer to the input string

Returns: A pointer to the formatted string

See also: tvb_format_text()

Given a wmem scope and a null-terminated string, expected to be in UTF-8 but possibly containing invalid sequences (as it may have come from packet data), and the length of the string, generate a valid UTF-8 string from it, allocated in the specified wmem scope, that:

shows printable Unicode characters as themselves;

shows non-printable ASCII characters as C-style escapes (octal if not one of the standard ones such as LF -> '
');

shows non-printable Unicode-but-not-ASCII characters as their universal character names;

shows illegal UTF-8 sequences as a sequence of bytes represented as C-style hex escapes;

and return a pointer to it.

◆ format_text_wsp()

WS_DLL_PUBLIC char * format_text_wsp	(	wmem_allocator_t *	allocator,
		const char *	line,
		size_t	len
	)

Same as format_text() but replaces any whitespace characters (space, tab, carriage return, new line, vertical tab, or formfeed) with a space.

Parameters

allocator	The wmem scope
line	A pointer to the input string
len	The length of the input string

Returns: A pointer to the formatted string

◆ format_units()

WS_DLL_PUBLIC char * format_units	(	wmem_allocator_t *	allocator,
		double	size,
		format_size_units_e	unit,
		uint16_t	flags,
		int	precision
	)

Given a floating point value, return it in a human-readable format

Prefixes up to "E/Ei" (exa, exbi) and down to "a" (atto; negative prefixes are SI-only) are currently supported. Values outside that range will use scientific notation.

Parameters

size	The size value
flags	Flags to control the output (unit of measurement, SI vs IEC, etc). Unit and prefix flags may be ORed together.
precision	Maximum number of digits to appear after the decimal point. Trailing zeros are removed, as is the decimal point if not digits follow it.

Returns: A newly-allocated string representing the value.

◆ isdigit_string()

WS_DLL_PUBLIC bool isdigit_string ( const unsigned char * str )

Check if an entire string consists of digits

Parameters

str	The string to be checked

Returns: true if the entire string is digits, otherwise false

◆ isprint_string()

WS_DLL_PUBLIC bool isprint_string ( const char * str )

Check if an entire string consists of printable characters

Parameters

str	The string to be checked

Returns: true if the entire string is printable, otherwise false

◆ isprint_utf8_string()

WS_DLL_PUBLIC bool isprint_utf8_string	(	const char *	str,
		const unsigned	length
	)

Given a not-necessarily-null-terminated string, expected to be in UTF-8 but possibly containing invalid sequences (as it may have come from packet data), and the length of the string, deterimine if the string is valid UTF-8 consisting entirely of printable characters.

This means that it:

does not contain an illegal UTF-8 sequence (including overlong encodings, the sequences reserved for UTF-16 surrogate halves, and the values for code points above U+10FFFF that are no longer in Unicode)

does not contain a non-printable Unicode character such as control characters (including internal NULL bytes)

does not end in a partial sequence that could begin a valid character;

does not start with a partial sequence that could end a valid character;

and thus guarantees that the result of format_text() would be the same as that of wmem_strndup() with the same parameters.

Parameters

str	The string to be checked
length	The number of bytes to validate

Returns: true if the entire string is valid and printable UTF-8, otherwise false

◆ wmem_ascii_strdown()

WS_DLL_PUBLIC char * wmem_ascii_strdown	(	wmem_allocator_t *	allocator,
		const char *	str,
		ssize_t	len
	)

wmem_ascii_strdown: Based on g_ascii_strdown

Parameters

allocator	An enumeration of the different types of available allocators.
str	a string.
len	length of str in bytes, or -1 if str is nul-terminated.

Converts all upper case ASCII letters to lower case ASCII letters.

Return value: a newly-allocated string, with all the upper case characters in str converted to lower case, with semantics that exactly match g_ascii_tolower(). (Note that this is unlike the old g_strdown(), which modified the string in place.)

◆ wmem_strjoinv()

WS_DLL_PUBLIC char * wmem_strjoinv	(	wmem_allocator_t *	allocator,
		const char *	separator,
		char **	str_array
	)

As g_strjoinv, with the returned string wmem allocated. Joins a number of strings together to form one long string, with the optional separator inserted between each of them.

Parameters

allocator	The wmem scope to use to allocate the returned string
separator	A string to insert between each of the strings, or NULL.
str_array	A NULL-terminated array of strings to join

Note: If str_array has no items, the return value is an empty string. str_array should not be NULL (NULL is returned with an warning.) NULL as a separator is equivalent to the empty string.

◆ wmem_strsplit()

WS_DLL_PUBLIC char ** wmem_strsplit	(	wmem_allocator_t *	allocator,
		const char *	src,
		const char *	delimiter,
		int	max_tokens
	)

Splits a string into a maximum of max_tokens pieces, using the given delimiter. If max_tokens is reached, the remainder of string is appended to the last token. Successive tokens are not folded and will instead result in an empty string as element.

If src or delimiter are NULL, or if delimiter is empty, this will return NULL.

Do not use with a NULL allocator, use g_strsplit instead.

◆ ws_ascii_strcasestr()

WS_DLL_PUBLIC const char * ws_ascii_strcasestr	(	const char *	haystack,
		const char *	needle
	)

Finds the first occurrence of string 'needle' in string 'haystack'. The matching is done ignoring the case of ASCII characters in a non-locale-dependent way.

The string is assumed to be in a character encoding, such as an ISO 8859 or other EUC encoding, or UTF-8, in which all bytes in the range 0x00 through 0x7F are ASCII characters and non-ASCII characters are constructed from one or more bytes in the range 0x80 through 0xFF.

Parameters

haystack	The string possibly containing the substring
needle	The substring to be searched

Returns: A pointer into 'haystack' where 'needle' is first found. Otherwise it returns NULL.

◆ ws_memrchr()

WS_DLL_PUBLIC const uint8_t * ws_memrchr	(	const void *	haystack,
		int	ch,
		size_t	n
	)

Like the memchr() function, except it scans backwards from the end.

Parameters

haystack	Pointer to the bytes of memory to search
ch	The character to search
n	The length of bytes to search from the end

Returns: A pointer to the last occurrence of "ch" in "haystack". If "ch" isn't found or "n" is 0, returns NULL.

◆ ws_utf8_truncate()

WS_DLL_PUBLIC char * ws_utf8_truncate	(	char *	string,
		size_t	len
	)

Truncate a UTF-8 string in place so that it is no larger than len bytes, ensuring that the string is null terminated and ends with a complete character instead of a partial sequence (e.g., possibly truncating up to 3 additional bytes if the terminal character is 4 bytes long).

The buffer holding the string must be large enough (at least len + 1 including the null terminator), and the first len bytes of the buffer must be a valid UTF-8 string, except for possibly ending in a partial sequence or not being null terminated. This is a convenience function that for speed does not check either of those conditions.

A common use case is when a valid UTF-8 string has been copied into a buffer of length len+1 via snprintf, strlcpy, or strlcat and truncated, to ensure that the final UTF-8 character is not a partial sequence.

Parameters

string	A pointer to the input string
len	The maximum length to truncate to

Returns: ptr to the string

Macros

Enumerations

Functions

Detailed Description

Macro Definition Documentation

◆ FORMAT_SIZE_PREFIX_IEC

◆ FORMAT_SIZE_PREFIX_SI

Enumeration Type Documentation

◆ format_size_units_e

Function Documentation

◆ ascii_strdown_inplace()

◆ ascii_strup_inplace()

◆ format_char()

◆ format_size_wmem()

◆ format_text()

◆ format_text_chr()

◆ format_text_string()

◆ format_text_wsp()

◆ format_units()

◆ isdigit_string()

◆ isprint_string()

◆ isprint_utf8_string()

◆ wmem_ascii_strdown()

◆ wmem_strjoinv()

◆ wmem_strsplit()

◆ ws_ascii_strcasestr()

◆ ws_memrchr()

◆ ws_utf8_truncate()