#include <wireshark.h>

Macros
#define	DEBUG_UTF_8_ENABLED false

#define	_CHECK_UTF_8(level, str, len)

#define	WS_UTF_8_CHECK(str, len) _CHECK_UTF_8(LOG_LEVEL_DEBUG, str, len)

#define	WS_UTF_8_DEBUG_HERE(str, len) _CHECK_UTF_8(LOG_LEVEL_ECHO, str, len)

#define	ws_utf8_char_len(ch) (ws_utf8_seqlen[(ch)])
	Returns the length of a UTF-8 multibyte sequence from its first byte.

#define	IS_LEAD_SURROGATE(uchar2) ((uchar2) >= 0xd800 && (uchar2) < 0xdc00)

#define	IS_TRAIL_SURROGATE(uchar2) ((uchar2) >= 0xdc00 && (uchar2) < 0xe000)

#define	SURROGATE_VALUE(lead, trail) (((((lead) - 0xd800) << 10) \| ((trail) - 0xdc00)) + 0x10000)

Functions
WS_DLL_PUBLIC uint8_t *	ws_utf8_make_valid (wmem_allocator_t scope, const uint8_t ptr, ssize_t length)
	Validates and sanitizes a UTF-8 byte sequence.

WS_DLL_PUBLIC wmem_strbuf_t *	ws_utf8_make_valid_strbuf (wmem_allocator_t scope, const uint8_t ptr, ssize_t length)
	Validates a UTF-8 byte sequence and returns a string buffer.

Variables
WSUTIL_EXPORT const int	ws_utf8_seqlen [256]

Detailed Description

Unicode convenience routines.

Macro Definition Documentation

◆ _CHECK_UTF_8

#define _CHECK_UTF_8	(	level,
		str,
		len
	)

Value:

    do {                                                                \
        const char *__uni_endptr;                                       \
        if (DEBUG_UTF_8_ENABLED && (str) != NULL &&                     \
                        !g_utf8_validate(str, len, &__uni_endptr)) {    \
            ws_log_utf8(str, len, __uni_endptr);                        \
        }                                                               \
    } while (0)

◆ ws_utf8_char_len

#define ws_utf8_char_len ( ch ) (ws_utf8_seqlen[(ch)])

Returns the length of a UTF-8 multibyte sequence from its first byte.

Determines the expected number of bytes in a UTF-8 encoded code point based on the leading byte. Returns 0 if the byte is invalid as a UTF-8 starter.

Parameters

ch	The first byte of a UTF-8 sequence.

Returns: Length of the UTF-8 sequence (1–4), or 0 if invalid.

Function Documentation

◆ ws_utf8_make_valid()

WS_DLL_PUBLIC uint8_t * ws_utf8_make_valid	(	wmem_allocator_t *	scope,
		const uint8_t *	ptr,
		ssize_t	length
	)

Validates and sanitizes a UTF-8 byte sequence.

Processes a raw byte string of length length, replacing any ill-formed UTF-8 sequences with the Unicode REPLACEMENT CHARACTER (U+FFFD). The result is allocated using the provided wmem scope.

Parameters

scope	Memory allocator scope for the returned string.
ptr	Pointer to the input byte sequence.
length	Length of the input sequence.

Returns: Pointer to a valid UTF-8 string, allocated via scope.

◆ ws_utf8_make_valid_strbuf()

WS_DLL_PUBLIC wmem_strbuf_t * ws_utf8_make_valid_strbuf	(	wmem_allocator_t *	scope,
		const uint8_t *	ptr,
		ssize_t	length
	)

Validates a UTF-8 byte sequence and returns a string buffer.

Similar to ws_utf8_make_valid(), but returns a wmem_strbuf_t object for easier manipulation and appending. Ill-formed sequences are replaced with the Unicode REPLACEMENT CHARACTER.

Parameters

scope	Memory allocator scope for the returned buffer.
ptr	Pointer to the input byte sequence.
length	Length of the input sequence.

Returns: Pointer to a valid UTF-8 string buffer.

Macros

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ _CHECK_UTF_8

◆ ws_utf8_char_len

Function Documentation

◆ ws_utf8_make_valid()

◆ ws_utf8_make_valid_strbuf()