Line data Source code
1 : #include "mime_util.h"
2 : #include "html_render.h"
3 : #include "raii.h"
4 : #include <stdio.h>
5 : #include <stdlib.h>
6 : #include <string.h>
7 : #include <ctype.h>
8 : #include <time.h>
9 : #include <iconv.h>
10 : #include <errno.h>
11 :
12 : /* ── Header extraction ──────────────────────────────────────────────── */
13 :
14 6451 : char *mime_get_header(const char *msg, const char *name) {
15 6451 : if (!msg || !name) return NULL;
16 6449 : size_t nlen = strlen(name);
17 6449 : const char *p = msg;
18 :
19 24352 : while (p && *p) {
20 : /* Stop at the blank line separating headers from body. */
21 17899 : if (*p == '\r' || *p == '\n')
22 : break;
23 :
24 16514 : if (strncasecmp(p, name, nlen) == 0 && p[nlen] == ':') {
25 5060 : const char *val = p + nlen + 1;
26 10120 : while (*val == ' ' || *val == '\t') val++;
27 :
28 5060 : size_t cap = 512, n = 0;
29 5060 : char *result = malloc(cap);
30 5060 : if (!result) return NULL;
31 :
32 : /* Collect value, unfolding continuation lines */
33 145714 : while (*val) {
34 145714 : if (*val == '\r' || *val == '\n') {
35 5248 : const char *next = val;
36 5248 : if (*next == '\r') next++;
37 5248 : if (*next == '\n') next++;
38 5248 : if (*next == ' ' || *next == '\t') {
39 : /* Continuation line: skip CRLF and the leading whitespace */
40 188 : val = next;
41 376 : while (*val == ' ' || *val == '\t') val++;
42 : /* Add a single space to separate folded content if needed */
43 188 : if (n > 0 && result[n-1] != ' ') {
44 188 : if (n + 1 >= cap) {
45 1 : cap *= 2;
46 1 : char *tmp = realloc(result, cap);
47 1 : if (!tmp) { free(result); return NULL; }
48 1 : result = tmp;
49 : }
50 188 : result[n++] = ' ';
51 : }
52 188 : continue;
53 : } else {
54 : /* Not a continuation line: we are done with this header */
55 : break;
56 : }
57 : }
58 :
59 140466 : if (n + 1 >= cap) {
60 1 : cap *= 2;
61 1 : char *tmp = realloc(result, cap);
62 1 : if (!tmp) { free(result); return NULL; }
63 1 : result = tmp;
64 : }
65 140466 : result[n++] = *val++;
66 : }
67 5060 : result[n] = '\0';
68 5060 : return result;
69 : }
70 :
71 : /* Advance to next line */
72 11454 : p = strchr(p, '\n');
73 11454 : if (p) p++;
74 : }
75 1389 : return NULL;
76 : }
77 :
78 : /* ── Base64 decoder ─────────────────────────────────────────────────── */
79 :
80 3632 : static int b64val(unsigned char c) {
81 3632 : if (c >= 'A' && c <= 'Z') return c - 'A';
82 1282 : if (c >= 'a' && c <= 'z') return c - 'a' + 26;
83 294 : if (c >= '0' && c <= '9') return c - '0' + 52;
84 176 : if (c == '+') return 62;
85 176 : if (c == '/') return 63;
86 176 : return -1;
87 : }
88 :
89 75 : static char *decode_base64(const char *in, size_t inlen, size_t *out_len) {
90 75 : size_t max = (inlen / 4 + 1) * 3 + 4;
91 75 : char *out = malloc(max);
92 75 : if (!out) return NULL;
93 75 : size_t n = 0;
94 75 : int buf = 0, bits = 0;
95 3707 : for (size_t i = 0; i < inlen; i++) {
96 3632 : int v = b64val((unsigned char)in[i]);
97 3632 : if (v < 0) continue;
98 3456 : buf = (buf << 6) | v;
99 3456 : bits += 6;
100 3456 : if (bits >= 8) {
101 2582 : bits -= 8;
102 2582 : out[n++] = (char)((buf >> bits) & 0xFF);
103 : }
104 : }
105 75 : out[n] = '\0';
106 75 : if (out_len) *out_len = n;
107 75 : return out;
108 : }
109 :
110 : /* ── Quoted-Printable decoder ───────────────────────────────────────── */
111 :
112 2 : static char *decode_qp(const char *in, size_t inlen) {
113 2 : char *out = malloc(inlen + 1);
114 2 : if (!out) return NULL;
115 2 : size_t n = 0, i = 0;
116 26 : while (i < inlen) {
117 24 : if (in[i] == '=' && i + 1 < inlen &&
118 4 : (in[i + 1] == '\r' || in[i + 1] == '\n')) {
119 : /* Soft line break — skip */
120 1 : i++;
121 1 : if (i < inlen && in[i] == '\r') i++;
122 1 : if (i < inlen && in[i] == '\n') i++;
123 23 : } else if (in[i] == '=' && i + 2 < inlen &&
124 3 : isxdigit((unsigned char)in[i + 1]) &&
125 3 : isxdigit((unsigned char)in[i + 2])) {
126 3 : char hex[3] = { in[i + 1], in[i + 2], '\0' };
127 3 : out[n++] = (char)strtol(hex, NULL, 16);
128 3 : i += 3;
129 : } else {
130 20 : out[n++] = in[i++];
131 : }
132 : }
133 2 : out[n] = '\0';
134 2 : return out;
135 : }
136 :
137 : /* ── Body helpers ───────────────────────────────────────────────────── */
138 :
139 269 : static const char *body_start(const char *msg) {
140 269 : const char *p = strstr(msg, "\r\n\r\n");
141 269 : if (p) return p + 4;
142 4 : p = strstr(msg, "\n\n");
143 4 : if (p) return p + 2;
144 3 : return NULL;
145 : }
146 :
147 81 : static char *decode_transfer(const char *body, size_t len, const char *enc) {
148 81 : if (enc && strcasecmp(enc, "base64") == 0)
149 1 : return decode_base64(body, len, NULL);
150 80 : if (enc && strcasecmp(enc, "quoted-printable") == 0)
151 2 : return decode_qp(body, len);
152 78 : return strndup(body, len);
153 : }
154 :
155 : /* Extract the charset parameter value from a Content-Type header value.
156 : * E.g. "text/plain; charset=iso-8859-2" → "iso-8859-2".
157 : * Returns a malloc'd string or NULL if not found. */
158 195 : static char *extract_charset(const char *ctype) {
159 195 : if (!ctype) return NULL;
160 180 : const char *p = strcasestr(ctype, "charset=");
161 180 : if (!p) return NULL;
162 109 : p += 8;
163 109 : if (*p == '"') p++; /* skip optional opening quote */
164 109 : const char *start = p;
165 654 : while (*p && *p != ';' && *p != ' ' && *p != '\t' && *p != '"' && *p != '\r' && *p != '\n')
166 545 : p++;
167 109 : if (p == start) return NULL;
168 108 : return strndup(start, (size_t)(p - start));
169 : }
170 :
171 : /* Convert s from from_charset to UTF-8 via iconv.
172 : * Returns a malloc'd UTF-8 string; on failure returns strdup(s). */
173 75 : static char *charset_to_utf8(const char *s, const char *from_charset) {
174 75 : if (!s) return NULL;
175 75 : if (!from_charset ||
176 56 : strcasecmp(from_charset, "utf-8") == 0 ||
177 1 : strcasecmp(from_charset, "utf8") == 0 ||
178 1 : strcasecmp(from_charset, "us-ascii") == 0)
179 74 : return strdup(s);
180 :
181 1 : iconv_t cd = iconv_open("UTF-8", from_charset);
182 1 : if (cd == (iconv_t)-1) return strdup(s);
183 :
184 1 : size_t in_len = strlen(s);
185 1 : size_t out_size = in_len * 4 + 1;
186 1 : char *out = malloc(out_size);
187 1 : if (!out) { iconv_close(cd); return strdup(s); }
188 :
189 1 : char *inp = (char *)s;
190 1 : char *outp = out;
191 1 : size_t inbytes = in_len;
192 1 : size_t outbytes = out_size - 1;
193 1 : size_t r = iconv(cd, &inp, &inbytes, &outp, &outbytes);
194 1 : iconv_close(cd);
195 :
196 1 : if (r == (size_t)-1) { free(out); return strdup(s); }
197 1 : *outp = '\0';
198 1 : return out;
199 : }
200 :
201 : static char *text_from_part(const char *part);
202 :
203 9 : static char *text_from_multipart(const char *msg, const char *ctype) {
204 9 : const char *b = strcasestr(ctype, "boundary=");
205 9 : if (!b) return NULL;
206 9 : b += strlen("boundary=");
207 :
208 9 : char boundary[512] = {0};
209 9 : if (*b == '"') {
210 7 : b++;
211 7 : const char *end = strchr(b, '"');
212 7 : if (!end) return NULL;
213 7 : snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
214 : } else {
215 2 : size_t i = 0;
216 11 : while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
217 : i < sizeof(boundary) - 1)
218 9 : boundary[i++] = *b++;
219 2 : boundary[i] = '\0';
220 : }
221 9 : if (!boundary[0]) return NULL;
222 :
223 : char delim[520];
224 9 : snprintf(delim, sizeof(delim), "--%s", boundary);
225 9 : size_t dlen = strlen(delim);
226 :
227 9 : const char *p = strstr(msg, delim);
228 10 : while (p) {
229 10 : p = strchr(p + dlen, '\n');
230 10 : if (!p) break;
231 10 : p++;
232 :
233 10 : const char *next = strstr(p, delim);
234 10 : if (!next) break;
235 :
236 10 : size_t partlen = (size_t)(next - p);
237 10 : char *part = strndup(p, partlen);
238 10 : if (!part) break;
239 10 : char *result = text_from_part(part);
240 10 : free(part);
241 10 : if (result) return result;
242 :
243 2 : p = next + dlen;
244 2 : if (p[0] == '-' && p[1] == '-') break;
245 1 : p = strchr(p, '\n');
246 1 : if (p) p++;
247 : }
248 1 : return NULL;
249 : }
250 :
251 51 : static char *text_from_part(const char *part) {
252 51 : char *ctype = mime_get_header(part, "Content-Type");
253 51 : char *enc = mime_get_header(part, "Content-Transfer-Encoding");
254 51 : char *charset = extract_charset(ctype);
255 51 : const char *body = body_start(part);
256 51 : char *result = NULL;
257 :
258 51 : if (!ctype || strncasecmp(ctype, "text/plain", 10) == 0) {
259 40 : if (body) {
260 38 : char *raw = decode_transfer(body, strlen(body), enc);
261 38 : if (raw) {
262 38 : result = charset_to_utf8(raw, charset);
263 38 : free(raw);
264 : }
265 : }
266 11 : } else if (strncasecmp(ctype, "multipart/", 10) == 0) {
267 9 : result = text_from_multipart(part, ctype);
268 2 : } else if (strncasecmp(ctype, "text/html", 9) == 0) {
269 1 : if (body) {
270 1 : char *raw = decode_transfer(body, strlen(body), enc);
271 1 : if (raw) {
272 1 : char *utf8 = charset_to_utf8(raw, charset);
273 1 : free(raw);
274 1 : if (utf8) {
275 1 : result = html_render(utf8, 0, 0);
276 1 : free(utf8);
277 : }
278 : }
279 : }
280 : }
281 :
282 51 : free(ctype);
283 51 : free(enc);
284 51 : free(charset);
285 51 : return result;
286 : }
287 :
288 : /* ── RFC 2047 encoded-word decoder ──────────────────────────────────── */
289 :
290 : /**
291 : * Decode the text portion of one encoded word and convert to UTF-8.
292 : *
293 : * enc == 'Q'/'q': quoted-printable variant (underscore = space).
294 : * enc == 'B'/'b': base64.
295 : * charset: the declared charset of the encoded bytes.
296 : *
297 : * Returns a malloc'd NUL-terminated UTF-8 string, or NULL on failure.
298 : */
299 200 : static char *decode_encoded_word(const char *charset, char enc,
300 : const char *text, size_t text_len) {
301 200 : char *raw = NULL;
302 :
303 200 : if (enc == 'Q' || enc == 'q') {
304 193 : raw = malloc(text_len + 1);
305 193 : if (!raw) return NULL;
306 193 : size_t i = 0, j = 0;
307 1936 : while (i < text_len) {
308 1743 : if (text[i] == '_') {
309 191 : raw[j++] = ' ';
310 191 : i++;
311 1552 : } else if (text[i] == '=' && i + 2 < text_len &&
312 11 : isxdigit((unsigned char)text[i + 1]) &&
313 11 : isxdigit((unsigned char)text[i + 2])) {
314 11 : char hex[3] = { text[i + 1], text[i + 2], '\0' };
315 11 : raw[j++] = (char)strtol(hex, NULL, 16);
316 11 : i += 3;
317 : } else {
318 1541 : raw[j++] = text[i++];
319 : }
320 : }
321 193 : raw[j] = '\0';
322 : } else {
323 : /* B encoding */
324 7 : raw = decode_base64(text, text_len, NULL);
325 7 : if (!raw) return NULL;
326 : }
327 :
328 : /* If the declared charset is already UTF-8, return as-is. */
329 200 : if (strcasecmp(charset, "utf-8") == 0 || strcasecmp(charset, "utf8") == 0)
330 198 : return raw;
331 :
332 : /* Otherwise convert via iconv. */
333 2 : iconv_t cd = iconv_open("UTF-8", charset);
334 2 : if (cd == (iconv_t)-1)
335 1 : return raw; /* unknown charset — return raw bytes */
336 :
337 1 : size_t raw_len = strlen(raw);
338 1 : size_t out_size = raw_len * 4 + 1;
339 1 : char *utf8 = malloc(out_size);
340 1 : if (!utf8) { iconv_close(cd); return raw; }
341 :
342 1 : char *inp = raw;
343 1 : char *outp = utf8;
344 1 : size_t inbytes = raw_len;
345 1 : size_t outbytes = out_size - 1;
346 1 : size_t r = iconv(cd, &inp, &inbytes, &outp, &outbytes);
347 1 : iconv_close(cd);
348 :
349 1 : if (r == (size_t)-1) { free(utf8); return raw; }
350 :
351 1 : *outp = '\0';
352 1 : free(raw);
353 1 : return utf8;
354 : }
355 :
356 : /**
357 : * Try to parse and decode one encoded word starting exactly at *pp.
358 : * Format: =?charset?Q|B?encoded_text?=
359 : *
360 : * On success, *pp is advanced past the closing "?=" and the decoded
361 : * UTF-8 string (malloc'd) is returned.
362 : * On failure, *pp is unchanged and NULL is returned.
363 : */
364 200 : static char *try_decode_encoded_word(const char **pp) {
365 200 : const char *p = *pp;
366 200 : if (p[0] != '=' || p[1] != '?') return NULL;
367 200 : p += 2;
368 :
369 : /* charset */
370 200 : const char *cs = p;
371 1217 : while (*p && *p != '?') p++;
372 200 : if (!*p) return NULL;
373 200 : size_t cs_len = (size_t)(p - cs);
374 200 : if (cs_len == 0 || cs_len >= 64) return NULL;
375 : char charset[64];
376 200 : memcpy(charset, cs, cs_len);
377 200 : charset[cs_len] = '\0';
378 200 : p++; /* skip ? */
379 :
380 : /* encoding indicator */
381 200 : char enc = *p;
382 200 : if (enc != 'Q' && enc != 'q' && enc != 'B' && enc != 'b') return NULL;
383 200 : p++;
384 200 : if (*p != '?') return NULL;
385 200 : p++; /* skip ? */
386 :
387 : /* encoded text — ends at next ?= */
388 200 : const char *txt = p;
389 4373 : while (*p && !(*p == '?' && p[1] == '=')) p++;
390 200 : if (!*p) return NULL;
391 200 : size_t txt_len = (size_t)(p - txt);
392 200 : p += 2; /* skip ?= */
393 :
394 200 : char *decoded = decode_encoded_word(charset, enc, txt, txt_len);
395 200 : if (!decoded) return NULL;
396 200 : *pp = p;
397 200 : return decoded;
398 : }
399 :
400 2807 : char *mime_decode_words(const char *value) {
401 2807 : if (!value) return NULL;
402 :
403 2806 : size_t vlen = strlen(value);
404 : /* Upper bound: each raw byte can expand to at most 4 UTF-8 bytes. */
405 2806 : size_t cap = vlen * 4 + 1;
406 2806 : char *out = malloc(cap);
407 2806 : if (!out) return NULL;
408 :
409 2806 : size_t n = 0;
410 2806 : const char *p = value;
411 2806 : int prev_encoded = 0;
412 :
413 70484 : while (*p) {
414 : /* RFC 2047 §6.2: linear whitespace between adjacent encoded words
415 : * must be ignored. */
416 67678 : if (prev_encoded && (*p == ' ' || *p == '\t')) {
417 189 : const char *ws = p;
418 378 : while (*ws == ' ' || *ws == '\t') ws++;
419 189 : if (ws[0] == '=' && ws[1] == '?') {
420 1 : p = ws;
421 1 : continue;
422 : }
423 : }
424 :
425 67677 : if (p[0] == '=' && p[1] == '?') {
426 200 : char *decoded = try_decode_encoded_word(&p);
427 200 : if (decoded) {
428 200 : size_t dlen = strlen(decoded);
429 200 : if (n + dlen >= cap) {
430 0 : cap = n + dlen + vlen + 1;
431 0 : char *tmp = realloc(out, cap);
432 0 : if (!tmp) { free(decoded); break; }
433 0 : out = tmp;
434 : }
435 200 : memcpy(out + n, decoded, dlen);
436 200 : n += dlen;
437 200 : free(decoded);
438 200 : prev_encoded = 1;
439 200 : continue;
440 : }
441 : }
442 :
443 67477 : prev_encoded = 0;
444 67477 : out[n++] = *p++;
445 : }
446 :
447 2806 : out[n] = '\0';
448 2806 : return out;
449 : }
450 :
451 : /* ── Date formatting ────────────────────────────────────────────────── */
452 :
453 1492 : char *mime_format_date(const char *date) {
454 1492 : if (!date || !*date) return NULL;
455 :
456 : static const char * const fmts[] = {
457 : "%a, %d %b %Y %T %z", /* "Tue, 10 Mar 2026 15:07:40 +0000" */
458 : "%d %b %Y %T %z", /* "10 Mar 2026 15:07:40 +0000" */
459 : "%a, %d %b %Y %T %Z", /* "Tue, 24 Mar 2026 16:38:21 GMT" */
460 : "%d %b %Y %T %Z", /* "24 Mar 2026 16:38:21 UTC" */
461 : NULL
462 : };
463 :
464 : struct tm tm;
465 1491 : int parsed = 0;
466 1498 : for (int i = 0; fmts[i]; i++) {
467 1497 : memset(&tm, 0, sizeof(tm));
468 1497 : if (strptime(date, fmts[i], &tm)) { parsed = 1; break; }
469 : }
470 1491 : if (!parsed) return strdup(date);
471 :
472 : /* Save tm_gmtoff before calling timegm(): timegm() normalises the struct
473 : * and resets tm_gmtoff to 0. timegm() treats the fields as UTC, so
474 : * subtracting the original offset converts to true UTC. */
475 1490 : long gmtoff = tm.tm_gmtoff;
476 1490 : time_t utc = timegm(&tm) - gmtoff;
477 1490 : if (utc == (time_t)-1) return strdup(date);
478 :
479 : struct tm local;
480 1490 : localtime_r(&utc, &local);
481 :
482 1490 : char *buf = malloc(17); /* "YYYY-MM-DD HH:MM\0" */
483 1490 : if (!buf) return NULL;
484 1490 : if (strftime(buf, 17, "%Y-%m-%d %H:%M", &local) == 0) {
485 0 : free(buf);
486 0 : return strdup(date);
487 : }
488 1490 : return buf;
489 : }
490 :
491 : /* ── HTML part extractor ────────────────────────────────────────────── */
492 :
493 : static char *html_from_part(const char *part);
494 :
495 41 : static char *html_from_multipart(const char *msg, const char *ctype) {
496 41 : const char *b = strcasestr(ctype, "boundary=");
497 41 : if (!b) return NULL;
498 41 : b += strlen("boundary=");
499 :
500 41 : char boundary[512] = {0};
501 41 : if (*b == '"') {
502 40 : b++;
503 40 : const char *end = strchr(b, '"');
504 40 : if (!end) return NULL;
505 40 : snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
506 : } else {
507 1 : size_t i = 0;
508 9 : while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
509 : i < sizeof(boundary) - 1)
510 8 : boundary[i++] = *b++;
511 1 : boundary[i] = '\0';
512 : }
513 41 : if (!boundary[0]) return NULL;
514 :
515 : char delim[520];
516 41 : snprintf(delim, sizeof(delim), "--%s", boundary);
517 41 : size_t dlen = strlen(delim);
518 :
519 41 : const char *p = strstr(msg, delim);
520 87 : while (p) {
521 87 : if (p[dlen] == '-' && p[dlen+1] == '-') break; /* end boundary */
522 80 : p = strchr(p + dlen, '\n');
523 80 : if (!p) break;
524 80 : p++;
525 80 : const char *next = strstr(p, delim);
526 80 : if (!next) break;
527 80 : size_t partlen = (size_t)(next - p);
528 80 : char *part = strndup(p, partlen);
529 80 : if (!part) break;
530 80 : char *result = html_from_part(part);
531 80 : free(part);
532 80 : if (result) return result;
533 46 : p = next; /* keep p pointing at delimiter for next iteration */
534 : }
535 7 : return NULL;
536 : }
537 :
538 144 : static char *html_from_part(const char *part) {
539 144 : char *ctype = mime_get_header(part, "Content-Type");
540 144 : char *enc = mime_get_header(part, "Content-Transfer-Encoding");
541 144 : char *charset = extract_charset(ctype);
542 144 : const char *body = body_start(part);
543 144 : char *result = NULL;
544 :
545 144 : if (ctype && strncasecmp(ctype, "text/html", 9) == 0) {
546 36 : if (body) {
547 36 : char *raw = decode_transfer(body, strlen(body), enc);
548 36 : if (raw) {
549 36 : result = charset_to_utf8(raw, charset);
550 36 : free(raw);
551 : }
552 : }
553 108 : } else if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
554 41 : result = html_from_multipart(part, ctype);
555 : }
556 :
557 144 : free(ctype); free(enc); free(charset);
558 144 : return result;
559 : }
560 :
561 : /* ── Public API ─────────────────────────────────────────────────────── */
562 :
563 42 : char *mime_get_text_body(const char *msg) {
564 42 : if (!msg) return NULL;
565 41 : return text_from_part(msg);
566 : }
567 :
568 65 : char *mime_get_html_part(const char *msg) {
569 65 : if (!msg) return NULL;
570 64 : return html_from_part(msg);
571 : }
572 :
573 : /* ── Attachment extraction ──────────────────────────────────────────── */
574 :
575 : /* Extract a MIME header parameter value, e.g. filename="foo.pdf" or name=bar.
576 : * Handles quoted and unquoted values. Returns malloc'd string or NULL. */
577 131 : static char *extract_param(const char *header, const char *param) {
578 131 : if (!header || !param) return NULL;
579 : char search[64];
580 131 : snprintf(search, sizeof(search), "%s=", param);
581 131 : const char *p = strcasestr(header, search);
582 131 : if (!p) return NULL;
583 75 : p += strlen(search);
584 75 : if (*p == '"') {
585 71 : p++;
586 71 : const char *end = strchr(p, '"');
587 71 : if (!end) return NULL;
588 71 : return strndup(p, (size_t)(end - p));
589 : }
590 : /* unquoted value: ends at ';', whitespace, or end-of-string */
591 4 : const char *end = p;
592 44 : while (*end && *end != ';' && *end != ' ' && *end != '\t' &&
593 84 : *end != '\r' && *end != '\n')
594 40 : end++;
595 4 : if (end == p) return NULL;
596 4 : return strndup(p, (size_t)(end - p));
597 : }
598 :
599 : /* Sanitise a filename: strip directory separators and leading dots. */
600 72 : static char *sanitise_filename(const char *name) {
601 72 : if (!name || !*name) return NULL;
602 : /* take only the basename portion */
603 72 : const char *base = name;
604 740 : for (const char *p = name; *p; p++)
605 668 : if (*p == '/' || *p == '\\') base = p + 1;
606 72 : if (!*base) return NULL;
607 72 : char *s = strdup(base);
608 72 : if (!s) return NULL;
609 : /* strip leading dots (hidden files / directory traversal) */
610 72 : char *p = s;
611 72 : while (*p == '.') p++;
612 72 : if (!*p) { free(s); return strdup("attachment"); }
613 72 : if (p != s) memmove(s, p, strlen(p) + 1);
614 72 : return s;
615 : }
616 :
617 : /* Dynamic array for building the attachment list */
618 : typedef struct { MimeAttachment *data; int count; int cap; } AttachList;
619 :
620 73 : static int alist_push(AttachList *al, MimeAttachment att) {
621 73 : if (al->count >= al->cap) {
622 45 : int newcap = al->cap ? al->cap * 2 : 4;
623 45 : MimeAttachment *tmp = realloc(al->data,
624 45 : (size_t)newcap * sizeof(MimeAttachment));
625 45 : if (!tmp) return -1;
626 45 : al->data = tmp;
627 45 : al->cap = newcap;
628 : }
629 73 : al->data[al->count++] = att;
630 73 : return 0;
631 : }
632 :
633 : /* Forward declaration */
634 : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx);
635 :
636 : /* Walk a multipart body and collect attachments from each sub-part. */
637 48 : static void collect_multipart_attachments(const char *msg, const char *ctype,
638 : AttachList *al, int *idx) {
639 48 : const char *b = strcasestr(ctype, "boundary=");
640 48 : if (!b) return;
641 48 : b += strlen("boundary=");
642 :
643 48 : char boundary[512] = {0};
644 48 : if (*b == '"') {
645 47 : b++;
646 47 : const char *end = strchr(b, '"');
647 47 : if (!end) return;
648 47 : snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
649 : } else {
650 1 : size_t i = 0;
651 5 : while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
652 : i < sizeof(boundary) - 1)
653 4 : boundary[i++] = *b++;
654 1 : boundary[i] = '\0';
655 : }
656 48 : if (!boundary[0]) return;
657 :
658 : char delim[520];
659 48 : snprintf(delim, sizeof(delim), "--%s", boundary);
660 48 : size_t dlen = strlen(delim);
661 :
662 48 : const char *p = strstr(msg, delim);
663 142 : while (p) {
664 142 : p = strchr(p + dlen, '\n');
665 142 : if (!p) break;
666 142 : p++;
667 :
668 142 : const char *next = strstr(p, delim);
669 142 : if (!next) break;
670 :
671 142 : size_t partlen = (size_t)(next - p);
672 142 : char *part = strndup(p, partlen);
673 142 : if (!part) break;
674 142 : collect_parts(part, al, idx);
675 142 : free(part);
676 :
677 142 : p = next + dlen;
678 142 : if (p[0] == '-' && p[1] == '-') break;
679 94 : p = strchr(p, '\n');
680 94 : if (p) p++;
681 : }
682 : }
683 :
684 : /* Examine one MIME part (headers + body) and add to al if it is an attachment. */
685 206 : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx) {
686 206 : char *ctype = mime_get_header(msg, "Content-Type");
687 206 : char *disp = mime_get_header(msg, "Content-Disposition");
688 206 : char *enc = mime_get_header(msg, "Content-Transfer-Encoding");
689 :
690 : /* Recurse into multipart containers */
691 206 : if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
692 48 : collect_multipart_attachments(msg, ctype, al, unnamed_idx);
693 48 : free(ctype); free(disp); free(enc);
694 133 : return;
695 : }
696 :
697 : /* Determine filename from Content-Disposition or Content-Type name= */
698 158 : char *filename = NULL;
699 158 : int explicit_attach = 0;
700 158 : if (disp) {
701 76 : if (strncasecmp(disp, "attachment", 10) == 0) explicit_attach = 1;
702 76 : filename = extract_param(disp, "filename");
703 : /* RFC 5987: filename*=charset''encoded — simplified: strip trailing * */
704 76 : if (!filename) filename = extract_param(disp, "filename*");
705 : }
706 158 : if (!filename && ctype)
707 52 : filename = extract_param(ctype, "name");
708 :
709 : /* Skip non-attachment text and multipart parts unless explicitly marked */
710 158 : if (!explicit_attach) {
711 84 : if (!filename) {
712 82 : free(ctype); free(disp); free(enc);
713 82 : return; /* no filename → body part, skip */
714 : }
715 : /* text/plain and text/html without attachment disposition are body parts */
716 2 : if (ctype && (strncasecmp(ctype, "text/plain", 10) == 0 ||
717 1 : strncasecmp(ctype, "text/html", 9) == 0)) {
718 2 : free(ctype); free(disp); free(enc); free(filename);
719 2 : return;
720 : }
721 : }
722 :
723 74 : const char *body = body_start(msg);
724 74 : if (!body) {
725 1 : free(ctype); free(disp); free(enc); free(filename);
726 1 : return;
727 : }
728 :
729 : /* Decode body content; for base64 capture exact decoded byte count. */
730 73 : size_t data_size = 0;
731 : unsigned char *data;
732 73 : if (enc && strcasecmp(enc, "base64") == 0)
733 67 : data = (unsigned char *)decode_base64(body, strlen(body), &data_size);
734 : else {
735 6 : data = (unsigned char *)decode_transfer(body, strlen(body), enc);
736 6 : data_size = data ? strlen((char *)data) : 0;
737 : }
738 :
739 : /* Sanitise / generate filename */
740 73 : char *safe_name = NULL;
741 73 : if (filename) {
742 72 : char *decoded = mime_decode_words(filename);
743 72 : free(filename);
744 72 : safe_name = sanitise_filename(decoded ? decoded : "");
745 72 : free(decoded);
746 : }
747 73 : if (!safe_name) {
748 : char gen[32];
749 1 : snprintf(gen, sizeof(gen), "attachment-%d.bin", ++(*unnamed_idx));
750 1 : safe_name = strdup(gen);
751 : }
752 :
753 73 : MimeAttachment att = {0};
754 73 : att.filename = safe_name;
755 73 : att.content_type = ctype ? strdup(ctype) : strdup("application/octet-stream");
756 73 : att.data = data;
757 73 : att.size = data_size;
758 :
759 73 : if (alist_push(al, att) < 0) {
760 0 : free(att.filename); free(att.content_type); free(att.data);
761 : }
762 :
763 73 : free(ctype); free(disp); free(enc);
764 : }
765 :
766 65 : MimeAttachment *mime_list_attachments(const char *msg, int *count_out) {
767 65 : if (!msg || !count_out) { if (count_out) *count_out = 0; return NULL; }
768 64 : AttachList al = {NULL, 0, 0};
769 64 : int idx = 0;
770 64 : collect_parts(msg, &al, &idx);
771 64 : *count_out = al.count;
772 64 : if (al.count == 0) { free(al.data); return NULL; }
773 45 : return al.data;
774 : }
775 :
776 52 : void mime_free_attachments(MimeAttachment *list, int count) {
777 52 : if (!list) return;
778 82 : for (int i = 0; i < count; i++) {
779 49 : free(list[i].filename);
780 49 : free(list[i].content_type);
781 49 : free(list[i].data);
782 : }
783 33 : free(list);
784 : }
785 :
786 12 : int mime_save_attachment(const MimeAttachment *att, const char *dest_path) {
787 12 : if (!att || !dest_path || !att->data) return -1;
788 22 : RAII_FILE FILE *f = fopen(dest_path, "wb");
789 11 : if (!f) return -1;
790 : /* Write the full decoded buffer; for base64 the NUL terminator is not
791 : * part of the content — use att->size if accurate, else strlen fallback. */
792 11 : size_t n = att->size > 0 ? att->size : strlen((char *)att->data);
793 11 : size_t written = fwrite(att->data, 1, n, f);
794 11 : return (written != n) ? -1 : 0;
795 : }
796 :
797 4 : char *mime_extract_imap_literal(const char *response) {
798 4 : if (!response) return NULL;
799 3 : const char *brace = strchr(response, '{');
800 3 : if (!brace) return NULL;
801 :
802 2 : char *end = NULL;
803 2 : long size = strtol(brace + 1, &end, 10);
804 2 : if (!end || *end != '}' || size <= 0) return NULL;
805 :
806 2 : const char *content = end + 1;
807 2 : if (*content == '\r') content++;
808 2 : if (*content == '\n') content++;
809 :
810 : // Safety check
811 2 : size_t avail = strlen(content);
812 2 : if (avail < (size_t)size) {
813 1 : return strndup(content, avail);
814 : }
815 :
816 1 : return strndup(content, (size_t)size);
817 : }
|