Line data Source code
1 : #include "imap_util.h"
2 : #include <stdio.h>
3 : #include <stdlib.h>
4 : #include <string.h>
5 : #include <stdint.h>
6 :
7 : /**
8 : * @file imap_util.c
9 : * @brief IMAP Modified UTF-7 decoder (RFC 3501 ยง5.1.3).
10 : */
11 :
12 : /* Modified base64 alphabet: A-Z(0-25), a-z(26-51), 0-9(52-61), +(62), ,(63) */
13 361 : static int mod64_value(char c) {
14 361 : if (c >= 'A' && c <= 'Z') return c - 'A';
15 141 : if (c >= 'a' && c <= 'z') return c - 'a' + 26;
16 80 : if (c >= '0' && c <= '9') return c - '0' + 52;
17 6 : if (c == '+') return 62;
18 5 : if (c == ',') return 63;
19 1 : return -1;
20 : }
21 :
22 : /* Encode one Unicode code point as UTF-8. Returns bytes written (1-4). */
23 96 : static int utf8_encode(uint32_t cp, char *out) {
24 96 : if (cp < 0x80) {
25 2 : out[0] = (char)cp;
26 2 : return 1;
27 : }
28 94 : if (cp < 0x800) {
29 43 : out[0] = (char)(0xC0 | (cp >> 6));
30 43 : out[1] = (char)(0x80 | (cp & 0x3F));
31 43 : return 2;
32 : }
33 51 : if (cp < 0x10000) {
34 27 : out[0] = (char)(0xE0 | (cp >> 12));
35 27 : out[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
36 27 : out[2] = (char)(0x80 | (cp & 0x3F));
37 27 : return 3;
38 : }
39 24 : out[0] = (char)(0xF0 | (cp >> 18));
40 24 : out[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
41 24 : out[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
42 24 : out[3] = (char)(0x80 | (cp & 0x3F));
43 24 : return 4;
44 : }
45 :
46 236 : char *imap_utf7_decode(const char *s) {
47 236 : if (!s) return NULL;
48 235 : size_t len = strlen(s);
49 :
50 : /* Worst case: each input byte expands to at most 4 UTF-8 bytes */
51 235 : char *out = malloc(len * 4 + 1);
52 235 : if (!out) return NULL;
53 235 : char *dst = out;
54 :
55 235 : const char *p = s;
56 2243 : while (*p) {
57 2008 : if (*p != '&') {
58 1889 : *dst++ = *p++;
59 1913 : continue;
60 : }
61 119 : p++; /* skip '&' */
62 :
63 119 : if (*p == '-') {
64 : /* "&-" is a literal '&' */
65 24 : *dst++ = '&';
66 24 : p++;
67 24 : continue;
68 : }
69 :
70 : /* Decode modified-base64 run into raw bytes (UTF-16BE) */
71 : uint8_t bytes[256];
72 95 : int byte_cnt = 0;
73 95 : int bits = 0, bit_cnt = 0;
74 :
75 455 : while (*p && *p != '-') {
76 361 : int v = mod64_value(*p++);
77 361 : if (v < 0) break;
78 360 : bits = (bits << 6) | v;
79 360 : bit_cnt += 6;
80 360 : if (bit_cnt >= 8) {
81 240 : bit_cnt -= 8;
82 240 : if (byte_cnt < (int)sizeof(bytes))
83 240 : bytes[byte_cnt++] = (uint8_t)((unsigned)bits >> bit_cnt);
84 240 : bits &= (1 << bit_cnt) - 1;
85 : }
86 : }
87 95 : if (*p == '-') p++;
88 :
89 : /* Interpret raw bytes as UTF-16BE, emit as UTF-8 */
90 191 : for (int i = 0; i + 1 < byte_cnt; i += 2) {
91 96 : uint16_t unit = ((uint16_t)bytes[i] << 8) | bytes[i + 1];
92 : uint32_t cp;
93 :
94 121 : if (unit >= 0xD800 && unit <= 0xDBFF && i + 3 < byte_cnt) {
95 : /* High surrogate โ pair with following low surrogate */
96 25 : uint16_t low = ((uint16_t)bytes[i + 2] << 8) | bytes[i + 3];
97 25 : if (low >= 0xDC00 && low <= 0xDFFF) {
98 24 : cp = 0x10000u
99 24 : + ((uint32_t)(unit - 0xD800) << 10)
100 24 : + (uint32_t)(low - 0xDC00);
101 24 : i += 2;
102 : } else {
103 1 : cp = unit; /* unpaired surrogate โ pass through */
104 : }
105 : } else {
106 71 : cp = unit;
107 : }
108 :
109 : char utf8[4];
110 96 : int n = utf8_encode(cp, utf8);
111 96 : memcpy(dst, utf8, (size_t)n);
112 96 : dst += n;
113 : }
114 : }
115 235 : *dst = '\0';
116 235 : return out;
117 : }
118 :
119 19 : int imap_uid_set_expand(const char *set, char (**uids_out)[17], int *count_out) {
120 19 : *uids_out = NULL;
121 19 : *count_out = 0;
122 19 : if (!set || !*set) return 0;
123 :
124 17 : int cap = 32, cnt = 0;
125 17 : char (*uids)[17] = NULL;
126 :
127 17 : const char *p = set;
128 44 : while (*p) {
129 28 : while (*p == ' ' || *p == '\t') p++;
130 29 : if (!*p) break;
131 :
132 : char *end;
133 28 : unsigned long lo = strtoul(p, &end, 10);
134 28 : if (end == p) break; /* not a number โ stop */
135 27 : p = end;
136 :
137 27 : unsigned long hi = lo;
138 27 : if (*p == ':') {
139 14 : p++;
140 14 : hi = strtoul(p, &end, 10);
141 14 : if (end == p) hi = lo; /* malformed range */
142 13 : else p = end;
143 : }
144 :
145 116 : for (unsigned long uid = lo; uid <= hi; uid++) {
146 89 : if (!uids) {
147 16 : uids = malloc((size_t)cap * sizeof(char[17]));
148 16 : if (!uids) return -1;
149 : }
150 89 : if (cnt == cap) {
151 1 : cap *= 2;
152 1 : char (*tmp)[17] = realloc(uids, (size_t)cap * sizeof(char[17]));
153 1 : if (!tmp) { free(uids); return -1; }
154 1 : uids = tmp;
155 : }
156 89 : snprintf(uids[cnt++], 17, "%016u", (unsigned)uid);
157 : }
158 :
159 27 : if (*p == ',') p++;
160 : }
161 :
162 17 : *uids_out = uids;
163 17 : *count_out = cnt;
164 17 : return 0;
165 : }
166 :
167 376 : char *imap_utf7_encode(const char *s) {
168 376 : if (!s) return NULL;
169 375 : size_t len = strlen(s);
170 : /* Upper bound: every input byte can expand to at most 8 output chars. */
171 375 : char *out = malloc(len * 8 + 4);
172 375 : if (!out) return NULL;
173 375 : char *dst = out;
174 :
175 : static const char mod64[] =
176 : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
177 :
178 375 : const unsigned char *p = (const unsigned char *)s;
179 2967 : while (*p) {
180 2592 : if (*p >= 0x20 && *p <= 0x7E && *p != '&') {
181 : /* Printable ASCII (except '&'): pass through */
182 2503 : *dst++ = (char)*p++;
183 89 : } else if (*p == '&') {
184 : /* '&' is escaped as "&-" */
185 22 : *dst++ = '&';
186 22 : *dst++ = '-';
187 22 : p++;
188 : } else {
189 : /* Non-ASCII run: encode as UTF-16BE in modified Base64 */
190 67 : *dst++ = '&';
191 67 : unsigned int bits = 0;
192 67 : int bit_cnt = 0;
193 :
194 135 : while (*p && !(*p >= 0x20 && *p <= 0x7E)) {
195 : /* Decode one UTF-8 code point. */
196 : uint32_t cp;
197 : int seqlen;
198 68 : if (*p < 0x80) { cp = *p; seqlen = 1; }
199 68 : else if (*p < 0xC2) { cp = 0xFFFD; seqlen = 1; }
200 67 : else if (*p < 0xE0) { cp = *p & 0x1Fu; seqlen = 2; }
201 42 : else if (*p < 0xF0) { cp = *p & 0x0Fu; seqlen = 3; }
202 21 : else { cp = *p & 0x07u; seqlen = 4; }
203 198 : for (int i = 1; i < seqlen; i++) {
204 130 : if ((p[i] & 0xC0) != 0x80) { seqlen = i; cp = 0xFFFD; break; }
205 130 : cp = (cp << 6) | (p[i] & 0x3Fu);
206 : }
207 68 : p += seqlen;
208 :
209 : /* Emit as UTF-16BE (BMP char or surrogate pair). */
210 : uint16_t units[2];
211 : int nunit;
212 68 : if (cp <= 0xFFFFu) {
213 47 : units[0] = (uint16_t)cp;
214 47 : nunit = 1;
215 : } else {
216 21 : cp -= 0x10000u;
217 21 : units[0] = (uint16_t)(0xD800u | (cp >> 10));
218 21 : units[1] = (uint16_t)(0xDC00u | (cp & 0x3FFu));
219 21 : nunit = 2;
220 : }
221 :
222 : /* Feed each byte of UTF-16BE into the Base64 bit stream. */
223 157 : for (int j = 0; j < nunit; j++) {
224 89 : uint8_t hi = (uint8_t)(units[j] >> 8);
225 89 : uint8_t lo = (uint8_t)(units[j] & 0xFF);
226 :
227 89 : bits = (bits << 8) | hi;
228 89 : bit_cnt += 8;
229 200 : while (bit_cnt >= 6) {
230 111 : bit_cnt -= 6;
231 111 : *dst++ = mod64[(bits >> bit_cnt) & 0x3F];
232 111 : bits &= (1u << bit_cnt) - 1u;
233 : }
234 :
235 89 : bits = (bits << 8) | lo;
236 89 : bit_cnt += 8;
237 178 : while (bit_cnt >= 6) {
238 89 : bit_cnt -= 6;
239 89 : *dst++ = mod64[(bits >> bit_cnt) & 0x3F];
240 89 : bits &= (1u << bit_cnt) - 1u;
241 : }
242 : }
243 : }
244 : /* Flush remaining bits (zero-padded to the next 6-bit boundary). */
245 67 : if (bit_cnt > 0)
246 67 : *dst++ = mod64[(bits << (6 - bit_cnt)) & 0x3F];
247 :
248 67 : *dst++ = '-';
249 : }
250 : }
251 375 : *dst = '\0';
252 375 : return out;
253 : }
|