Wireshark 4.7.0
The Wireshark network protocol analyzer
Loading...
Searching...
No Matches
charsets.h
Go to the documentation of this file.
1
10#pragma once
11#include "ws_symbol_export.h"
12
13#ifdef __cplusplus
14extern "C" {
15#endif /* __cplusplus */
16
17/*
18 * Translation tables that map the upper 128 code points in single-byte
19 * "extended ASCII" character encodings to Unicode code points in the
20 * Basic Multilingual Plane.
21 */
22
23/* Table for windows-1250 */
24extern const gunichar2 charset_table_cp1250[0x80];
25/* Table for windows-1251 */
26extern const gunichar2 charset_table_cp1251[0x80];
27/* Table for windows-1252 */
28extern const gunichar2 charset_table_cp1252[0x80];
29
30/* Tables for ISO-8859-X */
31extern const gunichar2 charset_table_iso_8859_2[0x80];
32extern const gunichar2 charset_table_iso_8859_3[0x80];
33extern const gunichar2 charset_table_iso_8859_4[0x80];
34extern const gunichar2 charset_table_iso_8859_5[0x80];
35extern const gunichar2 charset_table_iso_8859_6[0x80];
36extern const gunichar2 charset_table_iso_8859_7[0x80];
37extern const gunichar2 charset_table_iso_8859_8[0x80];
38extern const gunichar2 charset_table_iso_8859_9[0x80];
39extern const gunichar2 charset_table_iso_8859_10[0x80];
40extern const gunichar2 charset_table_iso_8859_11[0x80];
41extern const gunichar2 charset_table_iso_8859_13[0x80];
42extern const gunichar2 charset_table_iso_8859_14[0x80];
43extern const gunichar2 charset_table_iso_8859_15[0x80];
44extern const gunichar2 charset_table_iso_8859_16[0x80];
45
46/* Tables for Mac character sets */
47extern const gunichar2 charset_table_mac_roman[0x80];
48
49/* Tables for DOS code pages */
50extern const gunichar2 charset_table_cp437[0x80];
51extern const gunichar2 charset_table_cp855[0x80];
52extern const gunichar2 charset_table_cp866[0x80];
53
54/*
55 * Translation tables that map the lower 128 code points in single-byte
56 * ISO 646-based character encodings to Unicode code points in the
57 * Basic Multilingual Plane.
58 */
59extern const gunichar2 charset_table_iso_646_basic[0x80];
60
61/* Tables for EBCDIC code pages */
62extern const gunichar2 charset_table_ebcdic[256];
63extern const gunichar2 charset_table_ebcdic_cp037[256];
64extern const gunichar2 charset_table_ebcdic_cp500[256];
65
82WS_DLL_PUBLIC uint8_t *
83get_ascii_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length);
84
102WS_DLL_PUBLIC uint8_t *
103get_utf_8_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length);
104
124WS_DLL_PUBLIC uint8_t *
125get_iso_646_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, const gunichar2 table[0x80]);
126
139WS_DLL_PUBLIC uint8_t *
140get_8859_1_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length);
141
142/*
143 * Given a wmem scope, a pointer, a length, and a translation table with
144 * 128 entries, treat the string of bytes referred to by the pointer and
145 * length as a string encoded using one octet per character, with octets
146 * with the high-order bit clear being ASCII and octets with the high-order
147 * bit set being mapped by the translation table to 2-byte Unicode Basic
148 * Multilingual Plane characters (including REPLACEMENT CHARACTER), and
149 * return a pointer to a UTF-8 string, allocated using the wmem scope.
150 */
151WS_DLL_PUBLIC uint8_t *
152get_unichar2_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, const gunichar2 table[0x80]);
153
173WS_DLL_PUBLIC uint8_t *
174get_ucs_2_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, unsigned encoding);
175
195WS_DLL_PUBLIC uint8_t *
196get_utf_16_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, unsigned encoding);
197
215WS_DLL_PUBLIC uint8_t *
216get_ucs_4_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, unsigned encoding);
217
230WS_DLL_PUBLIC uint8_t *
231get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, const uint8_t *ptr,
232 const size_t bit_offset, size_t no_of_chars);
233
242WS_DLL_PUBLIC uint8_t *
244 size_t length);
245
254WS_DLL_PUBLIC uint8_t *
256 size_t length);
257
267WS_DLL_PUBLIC uint8_t *
268get_ascii_7bits_string(wmem_allocator_t *scope, const uint8_t *ptr,
269 const size_t bit_offset, size_t no_of_chars);
270
287WS_DLL_PUBLIC uint8_t *
288get_nonascii_unichar2_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, const gunichar2 table[256]);
289
307WS_DLL_PUBLIC uint8_t *
308get_gb18030_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length);
309
325WS_DLL_PUBLIC uint8_t *
326get_euc_kr_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length);
327
336WS_DLL_PUBLIC uint8_t *
337get_t61_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length);
338
347WS_DLL_PUBLIC uint8_t *
348get_dect_standard_8bits_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length);
349#ifdef __cplusplus
350}
351#endif /* __cplusplus */
352
353/*
354 * Editor modelines - https://www.wireshark.org/tools/modelines.html
355 *
356 * Local variables:
357 * c-basic-offset: 4
358 * tab-width: 8
359 * indent-tabs-mode: nil
360 * End:
361 *
362 * vi: set shiftwidth=4 tabstop=8 expandtab:
363 * :indentSize=4:tabSize=8:noTabs=true:
364 */
WS_DLL_PUBLIC uint8_t * get_ascii_7bits_string(wmem_allocator_t *scope, const uint8_t *ptr, const size_t bit_offset, size_t no_of_chars)
Convert a sequence of 7-bit ASCII characters to a Unicode string.
Definition charsets.c:1222
WS_DLL_PUBLIC uint8_t * get_utf_8_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length)
Validate and normalize a UTF‑8 byte sequence, replacing invalid sequences with the Unicode REPLACEMEN...
Definition charsets.c:114
WS_DLL_PUBLIC uint8_t * get_8859_1_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length)
Convert an ISO 8859/1 string to UTF‑8.
Definition charsets.c:178
WS_DLL_PUBLIC uint8_t * get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length)
Retrieves a string based on ETSI TS 102 221 Annex A encoding.
Definition charsets.c:1095
WS_DLL_PUBLIC uint8_t * get_t61_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length)
Converts a T.61 encoded string to a UTF-8 string.
Definition charsets.c:1828
WS_DLL_PUBLIC uint8_t * get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope, const uint8_t *ptr, size_t length)
Extracts a 7-bit string from TS 23.038 data.
Definition charsets.c:1076
WS_DLL_PUBLIC uint8_t * get_ascii_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length)
Convert an ASCII byte sequence to a UTF‑8 string using a wmem scope.
Definition charsets.c:83
WS_DLL_PUBLIC uint8_t * get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, const uint8_t *ptr, const size_t bit_offset, size_t no_of_chars)
Extracts a 7-bit encoded string from packed data.
Definition charsets.c:1004
WS_DLL_PUBLIC uint8_t * get_euc_kr_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length)
Convert a EUC-KR encoded string to UTF-8.
Definition charsets.c:1567
WS_DLL_PUBLIC uint8_t * get_nonascii_unichar2_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, const gunichar2 table[256])
Convert a GB18030 encoded string to a UTF-8 string, substituting REPLACEMENT CHARACTER for non-ASCII ...
Definition charsets.c:1412
WS_DLL_PUBLIC uint8_t * get_utf_16_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, unsigned encoding)
Convert a UTF‑16 encoded string to UTF‑8.
Definition charsets.c:763
WS_DLL_PUBLIC uint8_t * get_gb18030_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length)
Convert a GB18030 encoded string to UTF-8.
Definition charsets.c:1535
WS_DLL_PUBLIC uint8_t * get_ucs_2_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, unsigned encoding)
Convert a UCS‑2 encoded string to UTF‑8.
Definition charsets.c:711
WS_DLL_PUBLIC uint8_t * get_iso_646_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, const gunichar2 table[0x80])
Convert a string encoded in an ISO 646-based character set to UTF‑8.
Definition charsets.c:152
WS_DLL_PUBLIC uint8_t * get_dect_standard_8bits_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length)
Converts a DECT standard 8-bit string to a Unicode string.
Definition charsets.c:1890
WS_DLL_PUBLIC uint8_t * get_ucs_4_string(wmem_allocator_t *scope, const uint8_t *ptr, size_t length, unsigned encoding)
Convert a UCS‑4 encoded string to UTF‑8.
Definition charsets.c:863
Internal memory allocator interface used by the wmem subsystem.
Definition wmem_allocator.h:34