Line data Source code
1 : #include "mime_util.h"
2 : #include "html_render.h"
3 : #include "raii.h"
4 : #include <stdio.h>
5 : #include <stdlib.h>
6 : #include <string.h>
7 : #include <ctype.h>
8 : #include <time.h>
9 : #include <iconv.h>
10 : #include <errno.h>
11 :
12 : /* ── Header extraction ──────────────────────────────────────────────── */
13 :
14 6090 : char *mime_get_header(const char *msg, const char *name) {
15 6090 : if (!msg || !name) return NULL;
16 6090 : size_t nlen = strlen(name);
17 6090 : const char *p = msg;
18 :
19 23084 : while (p && *p) {
20 : /* Stop at the blank line separating headers from body. */
21 16994 : if (*p == '\r' || *p == '\n')
22 : break;
23 :
24 15766 : if (strncasecmp(p, name, nlen) == 0 && p[nlen] == ':') {
25 4862 : const char *val = p + nlen + 1;
26 9724 : while (*val == ' ' || *val == '\t') val++;
27 :
28 4862 : size_t cap = 512, n = 0;
29 4862 : char *result = malloc(cap);
30 4862 : if (!result) return NULL;
31 :
32 : /* Collect value, unfolding continuation lines */
33 140522 : while (*val) {
34 140522 : if (*val == '\r' || *val == '\n') {
35 5048 : const char *next = val;
36 5048 : if (*next == '\r') next++;
37 5048 : if (*next == '\n') next++;
38 5048 : if (*next == ' ' || *next == '\t') {
39 : /* Continuation line: skip CRLF and the leading whitespace */
40 186 : val = next;
41 372 : while (*val == ' ' || *val == '\t') val++;
42 : /* Add a single space to separate folded content if needed */
43 186 : if (n > 0 && result[n-1] != ' ') {
44 186 : if (n + 1 >= cap) {
45 0 : cap *= 2;
46 0 : char *tmp = realloc(result, cap);
47 0 : if (!tmp) { free(result); return NULL; }
48 0 : result = tmp;
49 : }
50 186 : result[n++] = ' ';
51 : }
52 186 : continue;
53 : } else {
54 : /* Not a continuation line: we are done with this header */
55 : break;
56 : }
57 : }
58 :
59 135474 : if (n + 1 >= cap) {
60 0 : cap *= 2;
61 0 : char *tmp = realloc(result, cap);
62 0 : if (!tmp) { free(result); return NULL; }
63 0 : result = tmp;
64 : }
65 135474 : result[n++] = *val++;
66 : }
67 4862 : result[n] = '\0';
68 4862 : return result;
69 : }
70 :
71 : /* Advance to next line */
72 10904 : p = strchr(p, '\n');
73 10904 : if (p) p++;
74 : }
75 1228 : return NULL;
76 : }
77 :
78 : /* ── Base64 decoder ─────────────────────────────────────────────────── */
79 :
80 1128 : static int b64val(unsigned char c) {
81 1128 : if (c >= 'A' && c <= 'Z') return c - 'A';
82 620 : if (c >= 'a' && c <= 'z') return c - 'a' + 26;
83 260 : if (c >= '0' && c <= '9') return c - '0' + 52;
84 155 : if (c == '+') return 62;
85 155 : if (c == '/') return 63;
86 155 : return -1;
87 : }
88 :
89 62 : static char *decode_base64(const char *in, size_t inlen, size_t *out_len) {
90 62 : size_t max = (inlen / 4 + 1) * 3 + 4;
91 62 : char *out = malloc(max);
92 62 : if (!out) return NULL;
93 62 : size_t n = 0;
94 62 : int buf = 0, bits = 0;
95 1190 : for (size_t i = 0; i < inlen; i++) {
96 1128 : int v = b64val((unsigned char)in[i]);
97 1128 : if (v < 0) continue;
98 973 : buf = (buf << 6) | v;
99 973 : bits += 6;
100 973 : if (bits >= 8) {
101 722 : bits -= 8;
102 722 : out[n++] = (char)((buf >> bits) & 0xFF);
103 : }
104 : }
105 62 : out[n] = '\0';
106 62 : if (out_len) *out_len = n;
107 62 : return out;
108 : }
109 :
110 : /* ── Quoted-Printable decoder ───────────────────────────────────────── */
111 :
112 0 : static char *decode_qp(const char *in, size_t inlen) {
113 0 : char *out = malloc(inlen + 1);
114 0 : if (!out) return NULL;
115 0 : size_t n = 0, i = 0;
116 0 : while (i < inlen) {
117 0 : if (in[i] == '=' && i + 1 < inlen &&
118 0 : (in[i + 1] == '\r' || in[i + 1] == '\n')) {
119 : /* Soft line break — skip */
120 0 : i++;
121 0 : if (i < inlen && in[i] == '\r') i++;
122 0 : if (i < inlen && in[i] == '\n') i++;
123 0 : } else if (in[i] == '=' && i + 2 < inlen &&
124 0 : isxdigit((unsigned char)in[i + 1]) &&
125 0 : isxdigit((unsigned char)in[i + 2])) {
126 0 : char hex[3] = { in[i + 1], in[i + 2], '\0' };
127 0 : out[n++] = (char)strtol(hex, NULL, 16);
128 0 : i += 3;
129 : } else {
130 0 : out[n++] = in[i++];
131 : }
132 : }
133 0 : out[n] = '\0';
134 0 : return out;
135 : }
136 :
137 : /* ── Body helpers ───────────────────────────────────────────────────── */
138 :
139 204 : static const char *body_start(const char *msg) {
140 204 : const char *p = strstr(msg, "\r\n\r\n");
141 204 : if (p) return p + 4;
142 0 : p = strstr(msg, "\n\n");
143 0 : if (p) return p + 2;
144 0 : return NULL;
145 : }
146 :
147 46 : static char *decode_transfer(const char *body, size_t len, const char *enc) {
148 46 : if (enc && strcasecmp(enc, "base64") == 0)
149 0 : return decode_base64(body, len, NULL);
150 46 : if (enc && strcasecmp(enc, "quoted-printable") == 0)
151 0 : return decode_qp(body, len);
152 46 : return strndup(body, len);
153 : }
154 :
155 : /* Extract the charset parameter value from a Content-Type header value.
156 : * E.g. "text/plain; charset=iso-8859-2" → "iso-8859-2".
157 : * Returns a malloc'd string or NULL if not found. */
158 142 : static char *extract_charset(const char *ctype) {
159 142 : if (!ctype) return NULL;
160 142 : const char *p = strcasestr(ctype, "charset=");
161 142 : if (!p) return NULL;
162 92 : p += 8;
163 92 : if (*p == '"') p++; /* skip optional opening quote */
164 92 : const char *start = p;
165 552 : while (*p && *p != ';' && *p != ' ' && *p != '\t' && *p != '"' && *p != '\r' && *p != '\n')
166 460 : p++;
167 92 : if (p == start) return NULL;
168 92 : return strndup(start, (size_t)(p - start));
169 : }
170 :
171 : /* Convert s from from_charset to UTF-8 via iconv.
172 : * Returns a malloc'd UTF-8 string; on failure returns strdup(s). */
173 46 : static char *charset_to_utf8(const char *s, const char *from_charset) {
174 46 : if (!s) return NULL;
175 46 : if (!from_charset ||
176 46 : strcasecmp(from_charset, "utf-8") == 0 ||
177 0 : strcasecmp(from_charset, "utf8") == 0 ||
178 0 : strcasecmp(from_charset, "us-ascii") == 0)
179 46 : return strdup(s);
180 :
181 0 : iconv_t cd = iconv_open("UTF-8", from_charset);
182 0 : if (cd == (iconv_t)-1) return strdup(s);
183 :
184 0 : size_t in_len = strlen(s);
185 0 : size_t out_size = in_len * 4 + 1;
186 0 : char *out = malloc(out_size);
187 0 : if (!out) { iconv_close(cd); return strdup(s); }
188 :
189 0 : char *inp = (char *)s;
190 0 : char *outp = out;
191 0 : size_t inbytes = in_len;
192 0 : size_t outbytes = out_size - 1;
193 0 : size_t r = iconv(cd, &inp, &inbytes, &outp, &outbytes);
194 0 : iconv_close(cd);
195 :
196 0 : if (r == (size_t)-1) { free(out); return strdup(s); }
197 0 : *outp = '\0';
198 0 : return out;
199 : }
200 :
201 : static char *text_from_part(const char *part);
202 :
203 6 : static char *text_from_multipart(const char *msg, const char *ctype) {
204 6 : const char *b = strcasestr(ctype, "boundary=");
205 6 : if (!b) return NULL;
206 6 : b += strlen("boundary=");
207 :
208 6 : char boundary[512] = {0};
209 6 : if (*b == '"') {
210 6 : b++;
211 6 : const char *end = strchr(b, '"');
212 6 : if (!end) return NULL;
213 6 : snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
214 : } else {
215 0 : size_t i = 0;
216 0 : while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
217 : i < sizeof(boundary) - 1)
218 0 : boundary[i++] = *b++;
219 0 : boundary[i] = '\0';
220 : }
221 6 : if (!boundary[0]) return NULL;
222 :
223 : char delim[520];
224 6 : snprintf(delim, sizeof(delim), "--%s", boundary);
225 6 : size_t dlen = strlen(delim);
226 :
227 6 : const char *p = strstr(msg, delim);
228 6 : while (p) {
229 6 : p = strchr(p + dlen, '\n');
230 6 : if (!p) break;
231 6 : p++;
232 :
233 6 : const char *next = strstr(p, delim);
234 6 : if (!next) break;
235 :
236 6 : size_t partlen = (size_t)(next - p);
237 6 : char *part = strndup(p, partlen);
238 6 : if (!part) break;
239 6 : char *result = text_from_part(part);
240 6 : free(part);
241 6 : if (result) return result;
242 :
243 0 : p = next + dlen;
244 0 : if (p[0] == '-' && p[1] == '-') break;
245 0 : p = strchr(p, '\n');
246 0 : if (p) p++;
247 : }
248 0 : return NULL;
249 : }
250 :
251 20 : static char *text_from_part(const char *part) {
252 20 : char *ctype = mime_get_header(part, "Content-Type");
253 20 : char *enc = mime_get_header(part, "Content-Transfer-Encoding");
254 20 : char *charset = extract_charset(ctype);
255 20 : const char *body = body_start(part);
256 20 : char *result = NULL;
257 :
258 20 : if (!ctype || strncasecmp(ctype, "text/plain", 10) == 0) {
259 14 : if (body) {
260 14 : char *raw = decode_transfer(body, strlen(body), enc);
261 14 : if (raw) {
262 14 : result = charset_to_utf8(raw, charset);
263 14 : free(raw);
264 : }
265 : }
266 6 : } else if (strncasecmp(ctype, "multipart/", 10) == 0) {
267 6 : result = text_from_multipart(part, ctype);
268 0 : } else if (strncasecmp(ctype, "text/html", 9) == 0) {
269 0 : if (body) {
270 0 : char *raw = decode_transfer(body, strlen(body), enc);
271 0 : if (raw) {
272 0 : char *utf8 = charset_to_utf8(raw, charset);
273 0 : free(raw);
274 0 : if (utf8) {
275 0 : result = html_render(utf8, 0, 0);
276 0 : free(utf8);
277 : }
278 : }
279 : }
280 : }
281 :
282 20 : free(ctype);
283 20 : free(enc);
284 20 : free(charset);
285 20 : return result;
286 : }
287 :
288 : /* ── RFC 2047 encoded-word decoder ──────────────────────────────────── */
289 :
290 : /**
291 : * Decode the text portion of one encoded word and convert to UTF-8.
292 : *
293 : * enc == 'Q'/'q': quoted-printable variant (underscore = space).
294 : * enc == 'B'/'b': base64.
295 : * charset: the declared charset of the encoded bytes.
296 : *
297 : * Returns a malloc'd NUL-terminated UTF-8 string, or NULL on failure.
298 : */
299 187 : static char *decode_encoded_word(const char *charset, char enc,
300 : const char *text, size_t text_len) {
301 187 : char *raw = NULL;
302 :
303 187 : if (enc == 'Q' || enc == 'q') {
304 187 : raw = malloc(text_len + 1);
305 187 : if (!raw) return NULL;
306 187 : size_t i = 0, j = 0;
307 1870 : while (i < text_len) {
308 1683 : if (text[i] == '_') {
309 187 : raw[j++] = ' ';
310 187 : i++;
311 1496 : } else if (text[i] == '=' && i + 2 < text_len &&
312 0 : isxdigit((unsigned char)text[i + 1]) &&
313 0 : isxdigit((unsigned char)text[i + 2])) {
314 0 : char hex[3] = { text[i + 1], text[i + 2], '\0' };
315 0 : raw[j++] = (char)strtol(hex, NULL, 16);
316 0 : i += 3;
317 : } else {
318 1496 : raw[j++] = text[i++];
319 : }
320 : }
321 187 : raw[j] = '\0';
322 : } else {
323 : /* B encoding */
324 0 : raw = decode_base64(text, text_len, NULL);
325 0 : if (!raw) return NULL;
326 : }
327 :
328 : /* If the declared charset is already UTF-8, return as-is. */
329 187 : if (strcasecmp(charset, "utf-8") == 0 || strcasecmp(charset, "utf8") == 0)
330 187 : return raw;
331 :
332 : /* Otherwise convert via iconv. */
333 0 : iconv_t cd = iconv_open("UTF-8", charset);
334 0 : if (cd == (iconv_t)-1)
335 0 : return raw; /* unknown charset — return raw bytes */
336 :
337 0 : size_t raw_len = strlen(raw);
338 0 : size_t out_size = raw_len * 4 + 1;
339 0 : char *utf8 = malloc(out_size);
340 0 : if (!utf8) { iconv_close(cd); return raw; }
341 :
342 0 : char *inp = raw;
343 0 : char *outp = utf8;
344 0 : size_t inbytes = raw_len;
345 0 : size_t outbytes = out_size - 1;
346 0 : size_t r = iconv(cd, &inp, &inbytes, &outp, &outbytes);
347 0 : iconv_close(cd);
348 :
349 0 : if (r == (size_t)-1) { free(utf8); return raw; }
350 :
351 0 : *outp = '\0';
352 0 : free(raw);
353 0 : return utf8;
354 : }
355 :
356 : /**
357 : * Try to parse and decode one encoded word starting exactly at *pp.
358 : * Format: =?charset?Q|B?encoded_text?=
359 : *
360 : * On success, *pp is advanced past the closing "?=" and the decoded
361 : * UTF-8 string (malloc'd) is returned.
362 : * On failure, *pp is unchanged and NULL is returned.
363 : */
364 187 : static char *try_decode_encoded_word(const char **pp) {
365 187 : const char *p = *pp;
366 187 : if (p[0] != '=' || p[1] != '?') return NULL;
367 187 : p += 2;
368 :
369 : /* charset */
370 187 : const char *cs = p;
371 1122 : while (*p && *p != '?') p++;
372 187 : if (!*p) return NULL;
373 187 : size_t cs_len = (size_t)(p - cs);
374 187 : if (cs_len == 0 || cs_len >= 64) return NULL;
375 : char charset[64];
376 187 : memcpy(charset, cs, cs_len);
377 187 : charset[cs_len] = '\0';
378 187 : p++; /* skip ? */
379 :
380 : /* encoding indicator */
381 187 : char enc = *p;
382 187 : if (enc != 'Q' && enc != 'q' && enc != 'B' && enc != 'b') return NULL;
383 187 : p++;
384 187 : if (*p != '?') return NULL;
385 187 : p++; /* skip ? */
386 :
387 : /* encoded text — ends at next ?= */
388 187 : const char *txt = p;
389 1870 : while (*p && !(*p == '?' && p[1] == '=')) p++;
390 187 : if (!*p) return NULL;
391 187 : size_t txt_len = (size_t)(p - txt);
392 187 : p += 2; /* skip ?= */
393 :
394 187 : char *decoded = decode_encoded_word(charset, enc, txt, txt_len);
395 187 : if (!decoded) return NULL;
396 187 : *pp = p;
397 187 : return decoded;
398 : }
399 :
400 2725 : char *mime_decode_words(const char *value) {
401 2725 : if (!value) return NULL;
402 :
403 2725 : size_t vlen = strlen(value);
404 : /* Upper bound: each raw byte can expand to at most 4 UTF-8 bytes. */
405 2725 : size_t cap = vlen * 4 + 1;
406 2725 : char *out = malloc(cap);
407 2725 : if (!out) return NULL;
408 :
409 2725 : size_t n = 0;
410 2725 : const char *p = value;
411 2725 : int prev_encoded = 0;
412 :
413 69482 : while (*p) {
414 : /* RFC 2047 §6.2: linear whitespace between adjacent encoded words
415 : * must be ignored. */
416 66757 : if (prev_encoded && (*p == ' ' || *p == '\t')) {
417 187 : const char *ws = p;
418 374 : while (*ws == ' ' || *ws == '\t') ws++;
419 187 : if (ws[0] == '=' && ws[1] == '?') {
420 0 : p = ws;
421 0 : continue;
422 : }
423 : }
424 :
425 66757 : if (p[0] == '=' && p[1] == '?') {
426 187 : char *decoded = try_decode_encoded_word(&p);
427 187 : if (decoded) {
428 187 : size_t dlen = strlen(decoded);
429 187 : if (n + dlen >= cap) {
430 0 : cap = n + dlen + vlen + 1;
431 0 : char *tmp = realloc(out, cap);
432 0 : if (!tmp) { free(decoded); break; }
433 0 : out = tmp;
434 : }
435 187 : memcpy(out + n, decoded, dlen);
436 187 : n += dlen;
437 187 : free(decoded);
438 187 : prev_encoded = 1;
439 187 : continue;
440 : }
441 : }
442 :
443 66570 : prev_encoded = 0;
444 66570 : out[n++] = *p++;
445 : }
446 :
447 2725 : out[n] = '\0';
448 2725 : return out;
449 : }
450 :
451 : /* ── Date formatting ────────────────────────────────────────────────── */
452 :
453 1461 : char *mime_format_date(const char *date) {
454 1461 : if (!date || !*date) return NULL;
455 :
456 : static const char * const fmts[] = {
457 : "%a, %d %b %Y %T %z", /* "Tue, 10 Mar 2026 15:07:40 +0000" */
458 : "%d %b %Y %T %z", /* "10 Mar 2026 15:07:40 +0000" */
459 : "%a, %d %b %Y %T %Z", /* "Tue, 24 Mar 2026 16:38:21 GMT" */
460 : "%d %b %Y %T %Z", /* "24 Mar 2026 16:38:21 UTC" */
461 : NULL
462 : };
463 :
464 : struct tm tm;
465 1461 : int parsed = 0;
466 1461 : for (int i = 0; fmts[i]; i++) {
467 1461 : memset(&tm, 0, sizeof(tm));
468 1461 : if (strptime(date, fmts[i], &tm)) { parsed = 1; break; }
469 : }
470 1461 : if (!parsed) return strdup(date);
471 :
472 : /* Save tm_gmtoff before calling timegm(): timegm() normalises the struct
473 : * and resets tm_gmtoff to 0. timegm() treats the fields as UTC, so
474 : * subtracting the original offset converts to true UTC. */
475 1461 : long gmtoff = tm.tm_gmtoff;
476 1461 : time_t utc = timegm(&tm) - gmtoff;
477 1461 : if (utc == (time_t)-1) return strdup(date);
478 :
479 : struct tm local;
480 1461 : localtime_r(&utc, &local);
481 :
482 1461 : char *buf = malloc(17); /* "YYYY-MM-DD HH:MM\0" */
483 1461 : if (!buf) return NULL;
484 1461 : if (strftime(buf, 17, "%Y-%m-%d %H:%M", &local) == 0) {
485 0 : free(buf);
486 0 : return strdup(date);
487 : }
488 1461 : return buf;
489 : }
490 :
491 : /* ── HTML part extractor ────────────────────────────────────────────── */
492 :
493 : static char *html_from_part(const char *part);
494 :
495 38 : static char *html_from_multipart(const char *msg, const char *ctype) {
496 38 : const char *b = strcasestr(ctype, "boundary=");
497 38 : if (!b) return NULL;
498 38 : b += strlen("boundary=");
499 :
500 38 : char boundary[512] = {0};
501 38 : if (*b == '"') {
502 38 : b++;
503 38 : const char *end = strchr(b, '"');
504 38 : if (!end) return NULL;
505 38 : snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
506 : } else {
507 0 : size_t i = 0;
508 0 : while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
509 : i < sizeof(boundary) - 1)
510 0 : boundary[i++] = *b++;
511 0 : boundary[i] = '\0';
512 : }
513 38 : if (!boundary[0]) return NULL;
514 :
515 : char delim[520];
516 38 : snprintf(delim, sizeof(delim), "--%s", boundary);
517 38 : size_t dlen = strlen(delim);
518 :
519 38 : const char *p = strstr(msg, delim);
520 82 : while (p) {
521 82 : if (p[dlen] == '-' && p[dlen+1] == '-') break; /* end boundary */
522 76 : p = strchr(p + dlen, '\n');
523 76 : if (!p) break;
524 76 : p++;
525 76 : const char *next = strstr(p, delim);
526 76 : if (!next) break;
527 76 : size_t partlen = (size_t)(next - p);
528 76 : char *part = strndup(p, partlen);
529 76 : if (!part) break;
530 76 : char *result = html_from_part(part);
531 76 : free(part);
532 76 : if (result) return result;
533 44 : p = next; /* keep p pointing at delimiter for next iteration */
534 : }
535 6 : return NULL;
536 : }
537 :
538 122 : static char *html_from_part(const char *part) {
539 122 : char *ctype = mime_get_header(part, "Content-Type");
540 122 : char *enc = mime_get_header(part, "Content-Transfer-Encoding");
541 122 : char *charset = extract_charset(ctype);
542 122 : const char *body = body_start(part);
543 122 : char *result = NULL;
544 :
545 122 : if (ctype && strncasecmp(ctype, "text/html", 9) == 0) {
546 32 : if (body) {
547 32 : char *raw = decode_transfer(body, strlen(body), enc);
548 32 : if (raw) {
549 32 : result = charset_to_utf8(raw, charset);
550 32 : free(raw);
551 : }
552 : }
553 90 : } else if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
554 38 : result = html_from_multipart(part, ctype);
555 : }
556 :
557 122 : free(ctype); free(enc); free(charset);
558 122 : return result;
559 : }
560 :
561 : /* ── Public API ─────────────────────────────────────────────────────── */
562 :
563 14 : char *mime_get_text_body(const char *msg) {
564 14 : if (!msg) return NULL;
565 14 : return text_from_part(msg);
566 : }
567 :
568 46 : char *mime_get_html_part(const char *msg) {
569 46 : if (!msg) return NULL;
570 46 : return html_from_part(msg);
571 : }
572 :
573 : /* ── Attachment extraction ──────────────────────────────────────────── */
574 :
575 : /* Extract a MIME header parameter value, e.g. filename="foo.pdf" or name=bar.
576 : * Handles quoted and unquoted values. Returns malloc'd string or NULL. */
577 101 : static char *extract_param(const char *header, const char *param) {
578 101 : if (!header || !param) return NULL;
579 : char search[64];
580 101 : snprintf(search, sizeof(search), "%s=", param);
581 101 : const char *p = strcasestr(header, search);
582 101 : if (!p) return NULL;
583 62 : p += strlen(search);
584 62 : if (*p == '"') {
585 62 : p++;
586 62 : const char *end = strchr(p, '"');
587 62 : if (!end) return NULL;
588 62 : return strndup(p, (size_t)(end - p));
589 : }
590 : /* unquoted value: ends at ';', whitespace, or end-of-string */
591 0 : const char *end = p;
592 0 : while (*end && *end != ';' && *end != ' ' && *end != '\t' &&
593 0 : *end != '\r' && *end != '\n')
594 0 : end++;
595 0 : if (end == p) return NULL;
596 0 : return strndup(p, (size_t)(end - p));
597 : }
598 :
599 : /* Sanitise a filename: strip directory separators and leading dots. */
600 62 : static char *sanitise_filename(const char *name) {
601 62 : if (!name || !*name) return NULL;
602 : /* take only the basename portion */
603 62 : const char *base = name;
604 631 : for (const char *p = name; *p; p++)
605 569 : if (*p == '/' || *p == '\\') base = p + 1;
606 62 : if (!*base) return NULL;
607 62 : char *s = strdup(base);
608 62 : if (!s) return NULL;
609 : /* strip leading dots (hidden files / directory traversal) */
610 62 : char *p = s;
611 62 : while (*p == '.') p++;
612 62 : if (!*p) { free(s); return strdup("attachment"); }
613 62 : if (p != s) memmove(s, p, strlen(p) + 1);
614 62 : return s;
615 : }
616 :
617 : /* Dynamic array for building the attachment list */
618 : typedef struct { MimeAttachment *data; int count; int cap; } AttachList;
619 :
620 62 : static int alist_push(AttachList *al, MimeAttachment att) {
621 62 : if (al->count >= al->cap) {
622 35 : int newcap = al->cap ? al->cap * 2 : 4;
623 35 : MimeAttachment *tmp = realloc(al->data,
624 35 : (size_t)newcap * sizeof(MimeAttachment));
625 35 : if (!tmp) return -1;
626 35 : al->data = tmp;
627 35 : al->cap = newcap;
628 : }
629 62 : al->data[al->count++] = att;
630 62 : return 0;
631 : }
632 :
633 : /* Forward declaration */
634 : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx);
635 :
636 : /* Walk a multipart body and collect attachments from each sub-part. */
637 35 : static void collect_multipart_attachments(const char *msg, const char *ctype,
638 : AttachList *al, int *idx) {
639 35 : const char *b = strcasestr(ctype, "boundary=");
640 35 : if (!b) return;
641 35 : b += strlen("boundary=");
642 :
643 35 : char boundary[512] = {0};
644 35 : if (*b == '"') {
645 35 : b++;
646 35 : const char *end = strchr(b, '"');
647 35 : if (!end) return;
648 35 : snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
649 : } else {
650 0 : size_t i = 0;
651 0 : while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
652 : i < sizeof(boundary) - 1)
653 0 : boundary[i++] = *b++;
654 0 : boundary[i] = '\0';
655 : }
656 35 : if (!boundary[0]) return;
657 :
658 : char delim[520];
659 35 : snprintf(delim, sizeof(delim), "--%s", boundary);
660 35 : size_t dlen = strlen(delim);
661 :
662 35 : const char *p = strstr(msg, delim);
663 124 : while (p) {
664 124 : p = strchr(p + dlen, '\n');
665 124 : if (!p) break;
666 124 : p++;
667 :
668 124 : const char *next = strstr(p, delim);
669 124 : if (!next) break;
670 :
671 124 : size_t partlen = (size_t)(next - p);
672 124 : char *part = strndup(p, partlen);
673 124 : if (!part) break;
674 124 : collect_parts(part, al, idx);
675 124 : free(part);
676 :
677 124 : p = next + dlen;
678 124 : if (p[0] == '-' && p[1] == '-') break;
679 89 : p = strchr(p, '\n');
680 89 : if (p) p++;
681 : }
682 : }
683 :
684 : /* Examine one MIME part (headers + body) and add to al if it is an attachment. */
685 163 : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx) {
686 163 : char *ctype = mime_get_header(msg, "Content-Type");
687 163 : char *disp = mime_get_header(msg, "Content-Disposition");
688 163 : char *enc = mime_get_header(msg, "Content-Transfer-Encoding");
689 :
690 : /* Recurse into multipart containers */
691 163 : if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
692 35 : collect_multipart_attachments(msg, ctype, al, unnamed_idx);
693 35 : free(ctype); free(disp); free(enc);
694 101 : return;
695 : }
696 :
697 : /* Determine filename from Content-Disposition or Content-Type name= */
698 128 : char *filename = NULL;
699 128 : int explicit_attach = 0;
700 128 : if (disp) {
701 62 : if (strncasecmp(disp, "attachment", 10) == 0) explicit_attach = 1;
702 62 : filename = extract_param(disp, "filename");
703 : /* RFC 5987: filename*=charset''encoded — simplified: strip trailing * */
704 62 : if (!filename) filename = extract_param(disp, "filename*");
705 : }
706 128 : if (!filename && ctype)
707 39 : filename = extract_param(ctype, "name");
708 :
709 : /* Skip non-attachment text and multipart parts unless explicitly marked */
710 128 : if (!explicit_attach) {
711 66 : if (!filename) {
712 66 : free(ctype); free(disp); free(enc);
713 66 : return; /* no filename → body part, skip */
714 : }
715 : /* text/plain and text/html without attachment disposition are body parts */
716 0 : if (ctype && (strncasecmp(ctype, "text/plain", 10) == 0 ||
717 0 : strncasecmp(ctype, "text/html", 9) == 0)) {
718 0 : free(ctype); free(disp); free(enc); free(filename);
719 0 : return;
720 : }
721 : }
722 :
723 62 : const char *body = body_start(msg);
724 62 : if (!body) {
725 0 : free(ctype); free(disp); free(enc); free(filename);
726 0 : return;
727 : }
728 :
729 : /* Decode body content; for base64 capture exact decoded byte count. */
730 62 : size_t data_size = 0;
731 : unsigned char *data;
732 62 : if (enc && strcasecmp(enc, "base64") == 0)
733 62 : data = (unsigned char *)decode_base64(body, strlen(body), &data_size);
734 : else {
735 0 : data = (unsigned char *)decode_transfer(body, strlen(body), enc);
736 0 : data_size = data ? strlen((char *)data) : 0;
737 : }
738 :
739 : /* Sanitise / generate filename */
740 62 : char *safe_name = NULL;
741 62 : if (filename) {
742 62 : char *decoded = mime_decode_words(filename);
743 62 : free(filename);
744 62 : safe_name = sanitise_filename(decoded ? decoded : "");
745 62 : free(decoded);
746 : }
747 62 : if (!safe_name) {
748 : char gen[32];
749 0 : snprintf(gen, sizeof(gen), "attachment-%d.bin", ++(*unnamed_idx));
750 0 : safe_name = strdup(gen);
751 : }
752 :
753 62 : MimeAttachment att = {0};
754 62 : att.filename = safe_name;
755 62 : att.content_type = ctype ? strdup(ctype) : strdup("application/octet-stream");
756 62 : att.data = data;
757 62 : att.size = data_size;
758 :
759 62 : if (alist_push(al, att) < 0) {
760 0 : free(att.filename); free(att.content_type); free(att.data);
761 : }
762 :
763 62 : free(ctype); free(disp); free(enc);
764 : }
765 :
766 39 : MimeAttachment *mime_list_attachments(const char *msg, int *count_out) {
767 39 : if (!msg || !count_out) { if (count_out) *count_out = 0; return NULL; }
768 39 : AttachList al = {NULL, 0, 0};
769 39 : int idx = 0;
770 39 : collect_parts(msg, &al, &idx);
771 39 : *count_out = al.count;
772 39 : if (al.count == 0) { free(al.data); return NULL; }
773 35 : return al.data;
774 : }
775 :
776 27 : void mime_free_attachments(MimeAttachment *list, int count) {
777 27 : if (!list) return;
778 61 : for (int i = 0; i < count; i++) {
779 38 : free(list[i].filename);
780 38 : free(list[i].content_type);
781 38 : free(list[i].data);
782 : }
783 23 : free(list);
784 : }
785 :
786 9 : int mime_save_attachment(const MimeAttachment *att, const char *dest_path) {
787 9 : if (!att || !dest_path || !att->data) return -1;
788 18 : RAII_FILE FILE *f = fopen(dest_path, "wb");
789 9 : if (!f) return -1;
790 : /* Write the full decoded buffer; for base64 the NUL terminator is not
791 : * part of the content — use att->size if accurate, else strlen fallback. */
792 9 : size_t n = att->size > 0 ? att->size : strlen((char *)att->data);
793 9 : size_t written = fwrite(att->data, 1, n, f);
794 9 : return (written != n) ? -1 : 0;
795 : }
796 :
797 0 : char *mime_extract_imap_literal(const char *response) {
798 0 : if (!response) return NULL;
799 0 : const char *brace = strchr(response, '{');
800 0 : if (!brace) return NULL;
801 :
802 0 : char *end = NULL;
803 0 : long size = strtol(brace + 1, &end, 10);
804 0 : if (!end || *end != '}' || size <= 0) return NULL;
805 :
806 0 : const char *content = end + 1;
807 0 : if (*content == '\r') content++;
808 0 : if (*content == '\n') content++;
809 :
810 : // Safety check
811 0 : size_t avail = strlen(content);
812 0 : if (avail < (size_t)size) {
813 0 : return strndup(content, avail);
814 : }
815 :
816 0 : return strndup(content, (size_t)size);
817 : }
|