Line data Source code
1 : #include "test_helpers.h"
2 : #include "imap_util.h"
3 : #include "raii.h"
4 : #include <string.h>
5 : #include <stdlib.h>
6 :
7 1 : void test_imap_util(void) {
8 :
9 : /* Pure ASCII — no encoding, output identical to input */
10 : {
11 2 : RAII_STRING char *r = imap_utf7_decode("INBOX");
12 1 : ASSERT(r != NULL, "imap_utf7_decode: ASCII should not return NULL");
13 1 : ASSERT(strcmp(r, "INBOX") == 0, "ASCII passthrough mismatch");
14 : }
15 :
16 : /* Literal ampersand: "&-" → "&" */
17 : {
18 2 : RAII_STRING char *r = imap_utf7_decode("foo&-bar");
19 1 : ASSERT(r != NULL, "imap_utf7_decode: literal & should not return NULL");
20 1 : ASSERT(strcmp(r, "foo&bar") == 0, "Literal & decoding mismatch");
21 : }
22 :
23 : /* Single accented character: é = U+00E9 → "&AOk-" */
24 : {
25 2 : RAII_STRING char *r = imap_utf7_decode("&AOk-");
26 1 : ASSERT(r != NULL, "imap_utf7_decode: single char should not return NULL");
27 1 : ASSERT(strcmp(r, "\xC3\xA9") == 0, "é (U+00E9) decoding mismatch");
28 : }
29 :
30 : /* Hungarian pangram: árvíztűrőtükörfúrógép */
31 : {
32 : /* Modified UTF-7 encoded version of the pangram */
33 1 : const char *encoded =
34 : "&AOE-rv&AO0-zt&AXE-r&AVE-t&APw-k&APY-rf&APo-r&APM-g&AOk-p";
35 2 : RAII_STRING char *r = imap_utf7_decode(encoded);
36 1 : ASSERT(r != NULL, "imap_utf7_decode: pangram should not return NULL");
37 1 : ASSERT(strcmp(r, "\xC3\xA1" /* á */
38 : "rv"
39 : "\xC3\xAD" /* í */
40 : "zt"
41 : "\xC5\xB1" /* ű */
42 : "r"
43 : "\xC5\x91" /* ő */
44 : "t"
45 : "\xC3\xBC" /* ü */
46 : "k"
47 : "\xC3\xB6" /* ö */
48 : "rf"
49 : "\xC3\xBA" /* ú */
50 : "r"
51 : "\xC3\xB3" /* ó */
52 : "g"
53 : "\xC3\xA9" /* é */
54 : "p") == 0,
55 : "Hungarian pangram decoding mismatch");
56 : }
57 :
58 : /* Mixed ASCII and encoded segments (folder path) */
59 : {
60 2 : RAII_STRING char *r = imap_utf7_decode("INBOX.&AOk-rtes&AO0-t&AOk-s");
61 1 : ASSERT(r != NULL, "imap_utf7_decode: mixed path should not return NULL");
62 1 : ASSERT(strcmp(r, "INBOX."
63 : "\xC3\xA9" /* é */
64 : "rtes"
65 : "\xC3\xAD" /* í */
66 : "t"
67 : "\xC3\xA9" /* é */
68 : "s") == 0,
69 : "Mixed folder path decoding mismatch");
70 : }
71 :
72 : /* Empty string */
73 : {
74 2 : RAII_STRING char *r = imap_utf7_decode("");
75 1 : ASSERT(r != NULL, "imap_utf7_decode: empty string should not return NULL");
76 1 : ASSERT(strcmp(r, "") == 0, "Empty string result mismatch");
77 : }
78 :
79 : /* NULL input */
80 : {
81 1 : char *r = imap_utf7_decode(NULL);
82 1 : ASSERT(r == NULL, "imap_utf7_decode: NULL input should return NULL");
83 : }
84 :
85 : /* mod64 '+' (62) and ',' (63) characters: U+FBF0 → "&+,A-" */
86 : {
87 : /* UTF-16BE: FB F0 → 6-bit groups: 111110(+) 111111(,) 000000(A) */
88 2 : RAII_STRING char *r = imap_utf7_decode("&+,A-");
89 1 : ASSERT(r != NULL, "imap_utf7_decode: '+' ',' in base64 should not return NULL");
90 1 : ASSERT(strcmp(r, "\xEF\xAF\xB0") == 0, "U+FBF0 via '+' and ',' decoding mismatch");
91 : }
92 :
93 : /* Invalid character in base64 run: covers mod64_value() return -1 path */
94 : {
95 2 : RAII_STRING char *r = imap_utf7_decode("&!-");
96 1 : ASSERT(r != NULL, "imap_utf7_decode: invalid base64 char should not return NULL");
97 1 : ASSERT(strcmp(r, "") == 0, "Invalid base64 segment should produce empty output");
98 : }
99 :
100 : /* ASCII codepoint via encoded path: U+0041 'A' → "&AEE-"
101 : * Tests utf8_encode() cp < 0x80 branch (1-byte output). */
102 : {
103 : /* UTF-16BE: 00 41 → base64: A(0) E(4) E(4), decodes to 0x00 0x41 = U+0041 */
104 2 : RAII_STRING char *r = imap_utf7_decode("&AEE-");
105 1 : ASSERT(r != NULL, "imap_utf7_decode: ASCII-via-encoding should not return NULL");
106 1 : ASSERT(strcmp(r, "A") == 0, "U+0041 via encoded path mismatch");
107 : }
108 :
109 : /* CJK 3-byte UTF-8: U+4E2D (中) → "&Ti0-"
110 : * Tests utf8_encode() 0x800 <= cp < 0x10000 branch. */
111 : {
112 : /* UTF-16BE: 4E 2D → base64: T(19) i(34) 0(52) */
113 2 : RAII_STRING char *r = imap_utf7_decode("&Ti0-");
114 1 : ASSERT(r != NULL, "imap_utf7_decode: CJK should not return NULL");
115 1 : ASSERT(strcmp(r, "\xE4\xB8\xAD") == 0, "U+4E2D (middle) CJK decoding mismatch");
116 : }
117 :
118 : /* UTF-16BE surrogate pair: U+10000 (𐀀) → "&2ADcAA-"
119 : * Tests utf8_encode() cp >= 0x10000 branch and surrogate-pair reassembly. */
120 : {
121 : /* High=0xD800, Low=0xDC00; UTF-16BE: D8 00 DC 00 → 2ADcAA */
122 2 : RAII_STRING char *r = imap_utf7_decode("&2ADcAA-");
123 1 : ASSERT(r != NULL, "imap_utf7_decode: surrogate pair should not return NULL");
124 1 : ASSERT(strcmp(r, "\xF0\x90\x80\x80") == 0, "U+10000 surrogate pair decoding mismatch");
125 : }
126 :
127 : /* Unpaired high surrogate followed by non-surrogate: covers cp=unit pass-through path */
128 : {
129 : /* UTF-16BE: D8 00 (high surrogate) 00 41 (U+0041, not a low surrogate)
130 : * → base64: 2AAAQQ; result is U+D800 (invalid UTF-8) + 'A' */
131 2 : RAII_STRING char *r = imap_utf7_decode("&2AAAQQ-");
132 1 : ASSERT(r != NULL, "imap_utf7_decode: unpaired high surrogate must not crash");
133 1 : ASSERT(strcmp(r, "\xED\xA0\x80" "A") == 0,
134 : "Unpaired high surrogate pass-through mismatch");
135 : }
136 : }
|