Line data Source code
1 : #include "imap_util.h"
2 : #include <stdio.h>
3 : #include <stdlib.h>
4 : #include <string.h>
5 : #include <stdint.h>
6 :
7 : /**
8 : * @file imap_util.c
9 : * @brief IMAP Modified UTF-7 decoder (RFC 3501 ยง5.1.3).
10 : */
11 :
12 : /* Modified base64 alphabet: A-Z(0-25), a-z(26-51), 0-9(52-61), +(62), ,(63) */
13 264 : static int mod64_value(char c) {
14 264 : if (c >= 'A' && c <= 'Z') return c - 'A';
15 110 : if (c >= 'a' && c <= 'z') return c - 'a' + 26;
16 66 : if (c >= '0' && c <= '9') return c - '0' + 52;
17 0 : if (c == '+') return 62;
18 0 : if (c == ',') return 63;
19 0 : return -1;
20 : }
21 :
22 : /* Encode one Unicode code point as UTF-8. Returns bytes written (1-4). */
23 66 : static int utf8_encode(uint32_t cp, char *out) {
24 66 : if (cp < 0x80) {
25 0 : out[0] = (char)cp;
26 0 : return 1;
27 : }
28 66 : if (cp < 0x800) {
29 22 : out[0] = (char)(0xC0 | (cp >> 6));
30 22 : out[1] = (char)(0x80 | (cp & 0x3F));
31 22 : return 2;
32 : }
33 44 : if (cp < 0x10000) {
34 22 : out[0] = (char)(0xE0 | (cp >> 12));
35 22 : out[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
36 22 : out[2] = (char)(0x80 | (cp & 0x3F));
37 22 : return 3;
38 : }
39 22 : out[0] = (char)(0xF0 | (cp >> 18));
40 22 : out[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
41 22 : out[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
42 22 : out[3] = (char)(0x80 | (cp & 0x3F));
43 22 : return 4;
44 : }
45 :
46 205 : char *imap_utf7_decode(const char *s) {
47 205 : if (!s) return NULL;
48 205 : size_t len = strlen(s);
49 :
50 : /* Worst case: each input byte expands to at most 4 UTF-8 bytes */
51 205 : char *out = malloc(len * 4 + 1);
52 205 : if (!out) return NULL;
53 205 : char *dst = out;
54 :
55 205 : const char *p = s;
56 2057 : while (*p) {
57 1852 : if (*p != '&') {
58 1764 : *dst++ = *p++;
59 1786 : continue;
60 : }
61 88 : p++; /* skip '&' */
62 :
63 88 : if (*p == '-') {
64 : /* "&-" is a literal '&' */
65 22 : *dst++ = '&';
66 22 : p++;
67 22 : continue;
68 : }
69 :
70 : /* Decode modified-base64 run into raw bytes (UTF-16BE) */
71 : uint8_t bytes[256];
72 66 : int byte_cnt = 0;
73 66 : int bits = 0, bit_cnt = 0;
74 :
75 330 : while (*p && *p != '-') {
76 264 : int v = mod64_value(*p++);
77 264 : if (v < 0) break;
78 264 : bits = (bits << 6) | v;
79 264 : bit_cnt += 6;
80 264 : if (bit_cnt >= 8) {
81 176 : bit_cnt -= 8;
82 176 : if (byte_cnt < (int)sizeof(bytes))
83 176 : bytes[byte_cnt++] = (uint8_t)((unsigned)bits >> bit_cnt);
84 176 : bits &= (1 << bit_cnt) - 1;
85 : }
86 : }
87 66 : if (*p == '-') p++;
88 :
89 : /* Interpret raw bytes as UTF-16BE, emit as UTF-8 */
90 132 : for (int i = 0; i + 1 < byte_cnt; i += 2) {
91 66 : uint16_t unit = ((uint16_t)bytes[i] << 8) | bytes[i + 1];
92 : uint32_t cp;
93 :
94 88 : if (unit >= 0xD800 && unit <= 0xDBFF && i + 3 < byte_cnt) {
95 : /* High surrogate โ pair with following low surrogate */
96 22 : uint16_t low = ((uint16_t)bytes[i + 2] << 8) | bytes[i + 3];
97 22 : if (low >= 0xDC00 && low <= 0xDFFF) {
98 22 : cp = 0x10000u
99 22 : + ((uint32_t)(unit - 0xD800) << 10)
100 22 : + (uint32_t)(low - 0xDC00);
101 22 : i += 2;
102 : } else {
103 0 : cp = unit; /* unpaired surrogate โ pass through */
104 : }
105 : } else {
106 44 : cp = unit;
107 : }
108 :
109 : char utf8[4];
110 66 : int n = utf8_encode(cp, utf8);
111 66 : memcpy(dst, utf8, (size_t)n);
112 66 : dst += n;
113 : }
114 : }
115 205 : *dst = '\0';
116 205 : return out;
117 : }
118 :
119 8 : int imap_uid_set_expand(const char *set, char (**uids_out)[17], int *count_out) {
120 8 : *uids_out = NULL;
121 8 : *count_out = 0;
122 8 : if (!set || !*set) return 0;
123 :
124 8 : int cap = 32, cnt = 0;
125 8 : char (*uids)[17] = NULL;
126 :
127 8 : const char *p = set;
128 24 : while (*p) {
129 16 : while (*p == ' ' || *p == '\t') p++;
130 16 : if (!*p) break;
131 :
132 : char *end;
133 16 : unsigned long lo = strtoul(p, &end, 10);
134 16 : if (end == p) break; /* not a number โ stop */
135 16 : p = end;
136 :
137 16 : unsigned long hi = lo;
138 16 : if (*p == ':') {
139 8 : p++;
140 8 : hi = strtoul(p, &end, 10);
141 8 : if (end == p) hi = lo; /* malformed range */
142 8 : else p = end;
143 : }
144 :
145 48 : for (unsigned long uid = lo; uid <= hi; uid++) {
146 32 : if (!uids) {
147 8 : uids = malloc((size_t)cap * sizeof(char[17]));
148 8 : if (!uids) return -1;
149 : }
150 32 : if (cnt == cap) {
151 0 : cap *= 2;
152 0 : char (*tmp)[17] = realloc(uids, (size_t)cap * sizeof(char[17]));
153 0 : if (!tmp) { free(uids); return -1; }
154 0 : uids = tmp;
155 : }
156 32 : snprintf(uids[cnt++], 17, "%016u", (unsigned)uid);
157 : }
158 :
159 16 : if (*p == ',') p++;
160 : }
161 :
162 8 : *uids_out = uids;
163 8 : *count_out = cnt;
164 8 : return 0;
165 : }
166 :
167 348 : char *imap_utf7_encode(const char *s) {
168 348 : if (!s) return NULL;
169 348 : size_t len = strlen(s);
170 : /* Upper bound: every input byte can expand to at most 8 output chars. */
171 348 : char *out = malloc(len * 8 + 4);
172 348 : if (!out) return NULL;
173 348 : char *dst = out;
174 :
175 : static const char mod64[] =
176 : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
177 :
178 348 : const unsigned char *p = (const unsigned char *)s;
179 2826 : while (*p) {
180 2478 : if (*p >= 0x20 && *p <= 0x7E && *p != '&') {
181 : /* Printable ASCII (except '&'): pass through */
182 2399 : *dst++ = (char)*p++;
183 79 : } else if (*p == '&') {
184 : /* '&' is escaped as "&-" */
185 20 : *dst++ = '&';
186 20 : *dst++ = '-';
187 20 : p++;
188 : } else {
189 : /* Non-ASCII run: encode as UTF-16BE in modified Base64 */
190 59 : *dst++ = '&';
191 59 : unsigned int bits = 0;
192 59 : int bit_cnt = 0;
193 :
194 118 : while (*p && !(*p >= 0x20 && *p <= 0x7E)) {
195 : /* Decode one UTF-8 code point. */
196 : uint32_t cp;
197 : int seqlen;
198 59 : if (*p < 0x80) { cp = *p; seqlen = 1; }
199 59 : else if (*p < 0xC2) { cp = 0xFFFD; seqlen = 1; }
200 59 : else if (*p < 0xE0) { cp = *p & 0x1Fu; seqlen = 2; }
201 40 : else if (*p < 0xF0) { cp = *p & 0x0Fu; seqlen = 3; }
202 20 : else { cp = *p & 0x07u; seqlen = 4; }
203 178 : for (int i = 1; i < seqlen; i++) {
204 119 : if ((p[i] & 0xC0) != 0x80) { seqlen = i; cp = 0xFFFD; break; }
205 119 : cp = (cp << 6) | (p[i] & 0x3Fu);
206 : }
207 59 : p += seqlen;
208 :
209 : /* Emit as UTF-16BE (BMP char or surrogate pair). */
210 : uint16_t units[2];
211 : int nunit;
212 59 : if (cp <= 0xFFFFu) {
213 39 : units[0] = (uint16_t)cp;
214 39 : nunit = 1;
215 : } else {
216 20 : cp -= 0x10000u;
217 20 : units[0] = (uint16_t)(0xD800u | (cp >> 10));
218 20 : units[1] = (uint16_t)(0xDC00u | (cp & 0x3FFu));
219 20 : nunit = 2;
220 : }
221 :
222 : /* Feed each byte of UTF-16BE into the Base64 bit stream. */
223 138 : for (int j = 0; j < nunit; j++) {
224 79 : uint8_t hi = (uint8_t)(units[j] >> 8);
225 79 : uint8_t lo = (uint8_t)(units[j] & 0xFF);
226 :
227 79 : bits = (bits << 8) | hi;
228 79 : bit_cnt += 8;
229 178 : while (bit_cnt >= 6) {
230 99 : bit_cnt -= 6;
231 99 : *dst++ = mod64[(bits >> bit_cnt) & 0x3F];
232 99 : bits &= (1u << bit_cnt) - 1u;
233 : }
234 :
235 79 : bits = (bits << 8) | lo;
236 79 : bit_cnt += 8;
237 158 : while (bit_cnt >= 6) {
238 79 : bit_cnt -= 6;
239 79 : *dst++ = mod64[(bits >> bit_cnt) & 0x3F];
240 79 : bits &= (1u << bit_cnt) - 1u;
241 : }
242 : }
243 : }
244 : /* Flush remaining bits (zero-padded to the next 6-bit boundary). */
245 59 : if (bit_cnt > 0)
246 59 : *dst++ = mod64[(bits << (6 - bit_cnt)) & 0x3F];
247 :
248 59 : *dst++ = '-';
249 : }
250 : }
251 348 : *dst = '\0';
252 348 : return out;
253 : }
|