Line data Source code
1 : #include "mime_util.h"
2 : #include "html_render.h"
3 : #include <stdio.h>
4 : #include <stdlib.h>
5 : #include <string.h>
6 : #include <ctype.h>
7 : #include <time.h>
8 : #include <iconv.h>
9 : #include <errno.h>
10 :
11 : /* ── Header extraction ──────────────────────────────────────────────── */
12 :
13 214 : char *mime_get_header(const char *msg, const char *name) {
14 214 : if (!msg || !name) return NULL;
15 212 : size_t nlen = strlen(name);
16 212 : const char *p = msg;
17 :
18 697 : while (p && *p) {
19 : /* Stop at the blank line separating headers from body. */
20 483 : if (*p == '\r' || *p == '\n')
21 : break;
22 :
23 417 : if (strncasecmp(p, name, nlen) == 0 && p[nlen] == ':') {
24 144 : const char *val = p + nlen + 1;
25 288 : while (*val == ' ' || *val == '\t') val++;
26 :
27 144 : size_t cap = 512, n = 0;
28 144 : char *result = malloc(cap);
29 144 : if (!result) return NULL;
30 :
31 : /* Collect value, unfolding continuation lines */
32 4588 : while (*val) {
33 4588 : if (*val == '\r' || *val == '\n') {
34 146 : const char *next = val;
35 146 : if (*next == '\r') next++;
36 146 : if (*next == '\n') next++;
37 146 : if (*next == ' ' || *next == '\t') {
38 : /* Continuation line: skip CRLF and the leading whitespace */
39 2 : val = next;
40 4 : while (*val == ' ' || *val == '\t') val++;
41 : /* Add a single space to separate folded content if needed */
42 2 : if (n > 0 && result[n-1] != ' ') {
43 2 : if (n + 1 >= cap) {
44 1 : cap *= 2;
45 1 : char *tmp = realloc(result, cap);
46 1 : if (!tmp) { free(result); return NULL; }
47 1 : result = tmp;
48 : }
49 2 : result[n++] = ' ';
50 : }
51 2 : continue;
52 : } else {
53 : /* Not a continuation line: we are done with this header */
54 : break;
55 : }
56 : }
57 :
58 4442 : if (n + 1 >= cap) {
59 1 : cap *= 2;
60 1 : char *tmp = realloc(result, cap);
61 1 : if (!tmp) { free(result); return NULL; }
62 1 : result = tmp;
63 : }
64 4442 : result[n++] = *val++;
65 : }
66 144 : result[n] = '\0';
67 144 : return result;
68 : }
69 :
70 : /* Advance to next line */
71 273 : p = strchr(p, '\n');
72 273 : if (p) p++;
73 : }
74 68 : return NULL;
75 : }
76 :
77 : /* ── Base64 decoder ─────────────────────────────────────────────────── */
78 :
79 30 : static int b64val(unsigned char c) {
80 30 : if (c >= 'A' && c <= 'Z') return c - 'A';
81 15 : if (c >= 'a' && c <= 'z') return c - 'a' + 26;
82 8 : if (c >= '0' && c <= '9') return c - '0' + 52;
83 4 : if (c == '+') return 62;
84 4 : if (c == '/') return 63;
85 4 : return -1;
86 : }
87 :
88 2 : static char *decode_base64(const char *in, size_t inlen) {
89 2 : size_t max = (inlen / 4 + 1) * 3 + 4;
90 2 : char *out = malloc(max);
91 2 : if (!out) return NULL;
92 2 : size_t n = 0;
93 2 : int buf = 0, bits = 0;
94 32 : for (size_t i = 0; i < inlen; i++) {
95 30 : int v = b64val((unsigned char)in[i]);
96 30 : if (v < 0) continue;
97 26 : buf = (buf << 6) | v;
98 26 : bits += 6;
99 26 : if (bits >= 8) {
100 19 : bits -= 8;
101 19 : out[n++] = (char)((buf >> bits) & 0xFF);
102 : }
103 : }
104 2 : out[n] = '\0';
105 2 : return out;
106 : }
107 :
108 : /* ── Quoted-Printable decoder ───────────────────────────────────────── */
109 :
110 2 : static char *decode_qp(const char *in, size_t inlen) {
111 2 : char *out = malloc(inlen + 1);
112 2 : if (!out) return NULL;
113 2 : size_t n = 0, i = 0;
114 26 : while (i < inlen) {
115 24 : if (in[i] == '=' && i + 1 < inlen &&
116 4 : (in[i + 1] == '\r' || in[i + 1] == '\n')) {
117 : /* Soft line break — skip */
118 1 : i++;
119 1 : if (i < inlen && in[i] == '\r') i++;
120 1 : if (i < inlen && in[i] == '\n') i++;
121 23 : } else if (in[i] == '=' && i + 2 < inlen &&
122 3 : isxdigit((unsigned char)in[i + 1]) &&
123 6 : isxdigit((unsigned char)in[i + 2])) {
124 3 : char hex[3] = { in[i + 1], in[i + 2], '\0' };
125 3 : out[n++] = (char)strtol(hex, NULL, 16);
126 3 : i += 3;
127 : } else {
128 20 : out[n++] = in[i++];
129 : }
130 : }
131 2 : out[n] = '\0';
132 2 : return out;
133 : }
134 :
135 : /* ── Body helpers ───────────────────────────────────────────────────── */
136 :
137 39 : static const char *body_start(const char *msg) {
138 39 : const char *p = strstr(msg, "\r\n\r\n");
139 39 : if (p) return p + 4;
140 3 : p = strstr(msg, "\n\n");
141 3 : if (p) return p + 2;
142 2 : return NULL;
143 : }
144 :
145 22 : static char *decode_transfer(const char *body, size_t len, const char *enc) {
146 22 : if (enc && strcasecmp(enc, "base64") == 0)
147 1 : return decode_base64(body, len);
148 21 : if (enc && strcasecmp(enc, "quoted-printable") == 0)
149 2 : return decode_qp(body, len);
150 19 : return strndup(body, len);
151 : }
152 :
153 : /* Extract the charset parameter value from a Content-Type header value.
154 : * E.g. "text/plain; charset=iso-8859-2" → "iso-8859-2".
155 : * Returns a malloc'd string or NULL if not found. */
156 39 : static char *extract_charset(const char *ctype) {
157 39 : if (!ctype) return NULL;
158 36 : const char *p = strcasestr(ctype, "charset=");
159 36 : if (!p) return NULL;
160 11 : p += 8;
161 11 : if (*p == '"') p++; /* skip optional opening quote */
162 11 : const char *start = p;
163 66 : while (*p && *p != ';' && *p != ' ' && *p != '\t' && *p != '"' && *p != '\r' && *p != '\n')
164 55 : p++;
165 11 : if (p == start) return NULL;
166 10 : return strndup(start, (size_t)(p - start));
167 : }
168 :
169 : /* Convert s from from_charset to UTF-8 via iconv.
170 : * Returns a malloc'd UTF-8 string; on failure returns strdup(s). */
171 22 : static char *charset_to_utf8(const char *s, const char *from_charset) {
172 22 : if (!s) return NULL;
173 22 : if (!from_charset ||
174 9 : strcasecmp(from_charset, "utf-8") == 0 ||
175 1 : strcasecmp(from_charset, "utf8") == 0 ||
176 1 : strcasecmp(from_charset, "us-ascii") == 0)
177 21 : return strdup(s);
178 :
179 1 : iconv_t cd = iconv_open("UTF-8", from_charset);
180 1 : if (cd == (iconv_t)-1) return strdup(s);
181 :
182 1 : size_t in_len = strlen(s);
183 1 : size_t out_size = in_len * 4 + 1;
184 1 : char *out = malloc(out_size);
185 1 : if (!out) { iconv_close(cd); return strdup(s); }
186 :
187 1 : char *inp = (char *)s;
188 1 : char *outp = out;
189 1 : size_t inbytes = in_len;
190 1 : size_t outbytes = out_size - 1;
191 1 : size_t r = iconv(cd, &inp, &inbytes, &outp, &outbytes);
192 1 : iconv_close(cd);
193 :
194 1 : if (r == (size_t)-1) { free(out); return strdup(s); }
195 1 : *outp = '\0';
196 1 : return out;
197 : }
198 :
199 : static char *text_from_part(const char *part);
200 :
201 3 : static char *text_from_multipart(const char *msg, const char *ctype) {
202 3 : const char *b = strcasestr(ctype, "boundary=");
203 3 : if (!b) return NULL;
204 3 : b += strlen("boundary=");
205 :
206 3 : char boundary[512] = {0};
207 3 : if (*b == '"') {
208 1 : b++;
209 1 : const char *end = strchr(b, '"');
210 1 : if (!end) return NULL;
211 1 : snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
212 : } else {
213 2 : size_t i = 0;
214 11 : while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
215 : i < sizeof(boundary) - 1)
216 9 : boundary[i++] = *b++;
217 2 : boundary[i] = '\0';
218 : }
219 3 : if (!boundary[0]) return NULL;
220 :
221 3 : char delim[520];
222 3 : snprintf(delim, sizeof(delim), "--%s", boundary);
223 3 : size_t dlen = strlen(delim);
224 :
225 3 : const char *p = strstr(msg, delim);
226 4 : while (p) {
227 4 : p = strchr(p + dlen, '\n');
228 4 : if (!p) break;
229 4 : p++;
230 :
231 4 : const char *next = strstr(p, delim);
232 4 : if (!next) break;
233 :
234 4 : size_t partlen = (size_t)(next - p);
235 4 : char *part = strndup(p, partlen);
236 4 : if (!part) break;
237 4 : char *result = text_from_part(part);
238 4 : free(part);
239 4 : if (result) return result;
240 :
241 2 : p = next + dlen;
242 2 : if (p[0] == '-' && p[1] == '-') break;
243 1 : p = strchr(p, '\n');
244 1 : if (p) p++;
245 : }
246 1 : return NULL;
247 : }
248 :
249 20 : static char *text_from_part(const char *part) {
250 20 : char *ctype = mime_get_header(part, "Content-Type");
251 20 : char *enc = mime_get_header(part, "Content-Transfer-Encoding");
252 20 : char *charset = extract_charset(ctype);
253 20 : const char *body = body_start(part);
254 20 : char *result = NULL;
255 :
256 20 : if (!ctype || strncasecmp(ctype, "text/plain", 10) == 0) {
257 15 : if (body) {
258 13 : char *raw = decode_transfer(body, strlen(body), enc);
259 13 : if (raw) {
260 13 : result = charset_to_utf8(raw, charset);
261 13 : free(raw);
262 : }
263 : }
264 5 : } else if (strncasecmp(ctype, "multipart/", 10) == 0) {
265 3 : result = text_from_multipart(part, ctype);
266 2 : } else if (strncasecmp(ctype, "text/html", 9) == 0) {
267 1 : if (body) {
268 1 : char *raw = decode_transfer(body, strlen(body), enc);
269 1 : if (raw) {
270 1 : char *utf8 = charset_to_utf8(raw, charset);
271 1 : free(raw);
272 1 : if (utf8) {
273 1 : result = html_render(utf8, 0, 0);
274 1 : free(utf8);
275 : }
276 : }
277 : }
278 : }
279 :
280 20 : free(ctype);
281 20 : free(enc);
282 20 : free(charset);
283 20 : return result;
284 : }
285 :
286 : /* ── RFC 2047 encoded-word decoder ──────────────────────────────────── */
287 :
288 : /**
289 : * Decode the text portion of one encoded word and convert to UTF-8.
290 : *
291 : * enc == 'Q'/'q': quoted-printable variant (underscore = space).
292 : * enc == 'B'/'b': base64.
293 : * charset: the declared charset of the encoded bytes.
294 : *
295 : * Returns a malloc'd NUL-terminated UTF-8 string, or NULL on failure.
296 : */
297 7 : static char *decode_encoded_word(const char *charset, char enc,
298 : const char *text, size_t text_len) {
299 7 : char *raw = NULL;
300 :
301 7 : if (enc == 'Q' || enc == 'q') {
302 6 : raw = malloc(text_len + 1);
303 6 : if (!raw) return NULL;
304 6 : size_t i = 0, j = 0;
305 66 : while (i < text_len) {
306 60 : if (text[i] == '_') {
307 4 : raw[j++] = ' ';
308 4 : i++;
309 56 : } else if (text[i] == '=' && i + 2 < text_len &&
310 11 : isxdigit((unsigned char)text[i + 1]) &&
311 22 : isxdigit((unsigned char)text[i + 2])) {
312 11 : char hex[3] = { text[i + 1], text[i + 2], '\0' };
313 11 : raw[j++] = (char)strtol(hex, NULL, 16);
314 11 : i += 3;
315 : } else {
316 45 : raw[j++] = text[i++];
317 : }
318 : }
319 6 : raw[j] = '\0';
320 : } else {
321 : /* B encoding */
322 1 : raw = decode_base64(text, text_len);
323 1 : if (!raw) return NULL;
324 : }
325 :
326 : /* If the declared charset is already UTF-8, return as-is. */
327 7 : if (strcasecmp(charset, "utf-8") == 0 || strcasecmp(charset, "utf8") == 0)
328 5 : return raw;
329 :
330 : /* Otherwise convert via iconv. */
331 2 : iconv_t cd = iconv_open("UTF-8", charset);
332 2 : if (cd == (iconv_t)-1)
333 1 : return raw; /* unknown charset — return raw bytes */
334 :
335 1 : size_t raw_len = strlen(raw);
336 1 : size_t out_size = raw_len * 4 + 1;
337 1 : char *utf8 = malloc(out_size);
338 1 : if (!utf8) { iconv_close(cd); return raw; }
339 :
340 1 : char *inp = raw;
341 1 : char *outp = utf8;
342 1 : size_t inbytes = raw_len;
343 1 : size_t outbytes = out_size - 1;
344 1 : size_t r = iconv(cd, &inp, &inbytes, &outp, &outbytes);
345 1 : iconv_close(cd);
346 :
347 1 : if (r == (size_t)-1) { free(utf8); return raw; }
348 :
349 1 : *outp = '\0';
350 1 : free(raw);
351 1 : return utf8;
352 : }
353 :
354 : /**
355 : * Try to parse and decode one encoded word starting exactly at *pp.
356 : * Format: =?charset?Q|B?encoded_text?=
357 : *
358 : * On success, *pp is advanced past the closing "?=" and the decoded
359 : * UTF-8 string (malloc'd) is returned.
360 : * On failure, *pp is unchanged and NULL is returned.
361 : */
362 7 : static char *try_decode_encoded_word(const char **pp) {
363 7 : const char *p = *pp;
364 7 : if (p[0] != '=' || p[1] != '?') return NULL;
365 7 : p += 2;
366 :
367 : /* charset */
368 7 : const char *cs = p;
369 59 : while (*p && *p != '?') p++;
370 7 : if (!*p) return NULL;
371 7 : size_t cs_len = (size_t)(p - cs);
372 7 : if (cs_len == 0 || cs_len >= 64) return NULL;
373 7 : char charset[64];
374 7 : memcpy(charset, cs, cs_len);
375 7 : charset[cs_len] = '\0';
376 7 : p++; /* skip ? */
377 :
378 : /* encoding indicator */
379 7 : char enc = *p;
380 7 : if (enc != 'Q' && enc != 'q' && enc != 'B' && enc != 'b') return NULL;
381 7 : p++;
382 7 : if (*p != '?') return NULL;
383 7 : p++; /* skip ? */
384 :
385 : /* encoded text — ends at next ?= */
386 7 : const char *txt = p;
387 97 : while (*p && !(*p == '?' && p[1] == '=')) p++;
388 7 : if (!*p) return NULL;
389 7 : size_t txt_len = (size_t)(p - txt);
390 7 : p += 2; /* skip ?= */
391 :
392 7 : char *decoded = decode_encoded_word(charset, enc, txt, txt_len);
393 7 : if (!decoded) return NULL;
394 7 : *pp = p;
395 7 : return decoded;
396 : }
397 :
398 66 : char *mime_decode_words(const char *value) {
399 66 : if (!value) return NULL;
400 :
401 65 : size_t vlen = strlen(value);
402 : /* Upper bound: each raw byte can expand to at most 4 UTF-8 bytes. */
403 65 : size_t cap = vlen * 4 + 1;
404 65 : char *out = malloc(cap);
405 65 : if (!out) return NULL;
406 :
407 65 : size_t n = 0;
408 65 : const char *p = value;
409 65 : int prev_encoded = 0;
410 :
411 1328 : while (*p) {
412 : /* RFC 2047 §6.2: linear whitespace between adjacent encoded words
413 : * must be ignored. */
414 1263 : if (prev_encoded && (*p == ' ' || *p == '\t')) {
415 2 : const char *ws = p;
416 4 : while (*ws == ' ' || *ws == '\t') ws++;
417 2 : if (ws[0] == '=' && ws[1] == '?') {
418 1 : p = ws;
419 1 : continue;
420 : }
421 : }
422 :
423 1262 : if (p[0] == '=' && p[1] == '?') {
424 7 : char *decoded = try_decode_encoded_word(&p);
425 7 : if (decoded) {
426 7 : size_t dlen = strlen(decoded);
427 7 : if (n + dlen >= cap) {
428 0 : cap = n + dlen + vlen + 1;
429 0 : char *tmp = realloc(out, cap);
430 0 : if (!tmp) { free(decoded); break; }
431 0 : out = tmp;
432 : }
433 7 : memcpy(out + n, decoded, dlen);
434 7 : n += dlen;
435 7 : free(decoded);
436 7 : prev_encoded = 1;
437 7 : continue;
438 : }
439 : }
440 :
441 1255 : prev_encoded = 0;
442 1255 : out[n++] = *p++;
443 : }
444 :
445 65 : out[n] = '\0';
446 65 : return out;
447 : }
448 :
449 : /* ── Date formatting ────────────────────────────────────────────────── */
450 :
451 41 : char *mime_format_date(const char *date) {
452 41 : if (!date || !*date) return NULL;
453 :
454 : static const char * const fmts[] = {
455 : "%a, %d %b %Y %T %z", /* "Tue, 10 Mar 2026 15:07:40 +0000" */
456 : "%d %b %Y %T %z", /* "10 Mar 2026 15:07:40 +0000" */
457 : "%a, %d %b %Y %T %Z", /* "Tue, 24 Mar 2026 16:38:21 GMT" */
458 : "%d %b %Y %T %Z", /* "24 Mar 2026 16:38:21 UTC" */
459 : NULL
460 : };
461 :
462 40 : struct tm tm;
463 40 : int parsed = 0;
464 47 : for (int i = 0; fmts[i]; i++) {
465 46 : memset(&tm, 0, sizeof(tm));
466 46 : if (strptime(date, fmts[i], &tm)) { parsed = 1; break; }
467 : }
468 40 : if (!parsed) return strdup(date);
469 :
470 : /* Save tm_gmtoff before calling timegm(): timegm() normalises the struct
471 : * and resets tm_gmtoff to 0. timegm() treats the fields as UTC, so
472 : * subtracting the original offset converts to true UTC. */
473 39 : long gmtoff = tm.tm_gmtoff;
474 39 : time_t utc = timegm(&tm) - gmtoff;
475 39 : if (utc == (time_t)-1) return strdup(date);
476 :
477 39 : struct tm local;
478 39 : localtime_r(&utc, &local);
479 :
480 39 : char *buf = malloc(17); /* "YYYY-MM-DD HH:MM\0" */
481 39 : if (!buf) return NULL;
482 39 : if (strftime(buf, 17, "%Y-%m-%d %H:%M", &local) == 0) {
483 0 : free(buf);
484 0 : return strdup(date);
485 : }
486 39 : return buf;
487 : }
488 :
489 : /* ── HTML part extractor ────────────────────────────────────────────── */
490 :
491 : static char *html_from_part(const char *part);
492 :
493 7 : static char *html_from_multipart(const char *msg, const char *ctype) {
494 7 : const char *b = strcasestr(ctype, "boundary=");
495 7 : if (!b) return NULL;
496 7 : b += strlen("boundary=");
497 :
498 7 : char boundary[512] = {0};
499 7 : if (*b == '"') {
500 6 : b++;
501 6 : const char *end = strchr(b, '"');
502 6 : if (!end) return NULL;
503 6 : snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
504 : } else {
505 1 : size_t i = 0;
506 9 : while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
507 : i < sizeof(boundary) - 1)
508 8 : boundary[i++] = *b++;
509 1 : boundary[i] = '\0';
510 : }
511 7 : if (!boundary[0]) return NULL;
512 :
513 7 : char delim[520];
514 7 : snprintf(delim, sizeof(delim), "--%s", boundary);
515 7 : size_t dlen = strlen(delim);
516 :
517 7 : const char *p = strstr(msg, delim);
518 9 : while (p) {
519 9 : if (p[dlen] == '-' && p[dlen+1] == '-') break; /* end boundary */
520 8 : p = strchr(p + dlen, '\n');
521 8 : if (!p) break;
522 8 : p++;
523 8 : const char *next = strstr(p, delim);
524 8 : if (!next) break;
525 8 : size_t partlen = (size_t)(next - p);
526 8 : char *part = strndup(p, partlen);
527 8 : if (!part) break;
528 8 : char *result = html_from_part(part);
529 8 : free(part);
530 8 : if (result) return result;
531 2 : p = next; /* keep p pointing at delimiter for next iteration */
532 : }
533 1 : return NULL;
534 : }
535 :
536 19 : static char *html_from_part(const char *part) {
537 19 : char *ctype = mime_get_header(part, "Content-Type");
538 19 : char *enc = mime_get_header(part, "Content-Transfer-Encoding");
539 19 : char *charset = extract_charset(ctype);
540 19 : const char *body = body_start(part);
541 19 : char *result = NULL;
542 :
543 19 : if (ctype && strncasecmp(ctype, "text/html", 9) == 0) {
544 8 : if (body) {
545 8 : char *raw = decode_transfer(body, strlen(body), enc);
546 8 : if (raw) {
547 8 : result = charset_to_utf8(raw, charset);
548 8 : free(raw);
549 : }
550 : }
551 11 : } else if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
552 7 : result = html_from_multipart(part, ctype);
553 : }
554 :
555 19 : free(ctype); free(enc); free(charset);
556 19 : return result;
557 : }
558 :
559 : /* ── Public API ─────────────────────────────────────────────────────── */
560 :
561 17 : char *mime_get_text_body(const char *msg) {
562 17 : if (!msg) return NULL;
563 16 : return text_from_part(msg);
564 : }
565 :
566 12 : char *mime_get_html_part(const char *msg) {
567 12 : if (!msg) return NULL;
568 11 : return html_from_part(msg);
569 : }
570 :
571 : /* ── Attachment extraction ──────────────────────────────────────────── */
572 :
573 : /* Extract a MIME header parameter value, e.g. filename="foo.pdf" or name=bar.
574 : * Handles quoted and unquoted values. Returns malloc'd string or NULL. */
575 1 : static char *extract_param(const char *header, const char *param) {
576 1 : if (!header || !param) return NULL;
577 1 : char search[64];
578 1 : snprintf(search, sizeof(search), "%s=", param);
579 1 : const char *p = strcasestr(header, search);
580 1 : if (!p) return NULL;
581 0 : p += strlen(search);
582 0 : if (*p == '"') {
583 0 : p++;
584 0 : const char *end = strchr(p, '"');
585 0 : if (!end) return NULL;
586 0 : return strndup(p, (size_t)(end - p));
587 : }
588 : /* unquoted value: ends at ';', whitespace, or end-of-string */
589 0 : const char *end = p;
590 0 : while (*end && *end != ';' && *end != ' ' && *end != '\t' &&
591 0 : *end != '\r' && *end != '\n')
592 0 : end++;
593 0 : if (end == p) return NULL;
594 0 : return strndup(p, (size_t)(end - p));
595 : }
596 :
597 : /* Sanitise a filename: strip directory separators and leading dots. */
598 0 : static char *sanitise_filename(const char *name) {
599 0 : if (!name || !*name) return NULL;
600 : /* take only the basename portion */
601 0 : const char *base = name;
602 0 : for (const char *p = name; *p; p++)
603 0 : if (*p == '/' || *p == '\\') base = p + 1;
604 0 : if (!*base) return NULL;
605 0 : char *s = strdup(base);
606 0 : if (!s) return NULL;
607 : /* strip leading dots (hidden files / directory traversal) */
608 0 : char *p = s;
609 0 : while (*p == '.') p++;
610 0 : if (!*p) { free(s); return strdup("attachment"); }
611 0 : if (p != s) memmove(s, p, strlen(p) + 1);
612 0 : return s;
613 : }
614 :
615 : /* Dynamic array for building the attachment list */
616 : typedef struct { MimeAttachment *data; int count; int cap; } AttachList;
617 :
618 0 : static int alist_push(AttachList *al, MimeAttachment att) {
619 0 : if (al->count >= al->cap) {
620 0 : int newcap = al->cap ? al->cap * 2 : 4;
621 0 : MimeAttachment *tmp = realloc(al->data,
622 0 : (size_t)newcap * sizeof(MimeAttachment));
623 0 : if (!tmp) return -1;
624 0 : al->data = tmp;
625 0 : al->cap = newcap;
626 : }
627 0 : al->data[al->count++] = att;
628 0 : return 0;
629 : }
630 :
631 : /* Forward declaration */
632 : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx);
633 :
634 : /* Walk a multipart body and collect attachments from each sub-part. */
635 0 : static void collect_multipart_attachments(const char *msg, const char *ctype,
636 : AttachList *al, int *idx) {
637 0 : const char *b = strcasestr(ctype, "boundary=");
638 0 : if (!b) return;
639 0 : b += strlen("boundary=");
640 :
641 0 : char boundary[512] = {0};
642 0 : if (*b == '"') {
643 0 : b++;
644 0 : const char *end = strchr(b, '"');
645 0 : if (!end) return;
646 0 : snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
647 : } else {
648 0 : size_t i = 0;
649 0 : while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
650 : i < sizeof(boundary) - 1)
651 0 : boundary[i++] = *b++;
652 0 : boundary[i] = '\0';
653 : }
654 0 : if (!boundary[0]) return;
655 :
656 0 : char delim[520];
657 0 : snprintf(delim, sizeof(delim), "--%s", boundary);
658 0 : size_t dlen = strlen(delim);
659 :
660 0 : const char *p = strstr(msg, delim);
661 0 : while (p) {
662 0 : p = strchr(p + dlen, '\n');
663 0 : if (!p) break;
664 0 : p++;
665 :
666 0 : const char *next = strstr(p, delim);
667 0 : if (!next) break;
668 :
669 0 : size_t partlen = (size_t)(next - p);
670 0 : char *part = strndup(p, partlen);
671 0 : if (!part) break;
672 0 : collect_parts(part, al, idx);
673 0 : free(part);
674 :
675 0 : p = next + dlen;
676 0 : if (p[0] == '-' && p[1] == '-') break;
677 0 : p = strchr(p, '\n');
678 0 : if (p) p++;
679 : }
680 : }
681 :
682 : /* Examine one MIME part (headers + body) and add to al if it is an attachment. */
683 1 : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx) {
684 1 : char *ctype = mime_get_header(msg, "Content-Type");
685 1 : char *disp = mime_get_header(msg, "Content-Disposition");
686 1 : char *enc = mime_get_header(msg, "Content-Transfer-Encoding");
687 :
688 : /* Recurse into multipart containers */
689 1 : if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
690 0 : collect_multipart_attachments(msg, ctype, al, unnamed_idx);
691 0 : free(ctype); free(disp); free(enc);
692 0 : return;
693 : }
694 :
695 : /* Determine filename from Content-Disposition or Content-Type name= */
696 1 : char *filename = NULL;
697 1 : int explicit_attach = 0;
698 1 : if (disp) {
699 0 : if (strncasecmp(disp, "attachment", 10) == 0) explicit_attach = 1;
700 0 : filename = extract_param(disp, "filename");
701 : /* RFC 5987: filename*=charset''encoded — simplified: strip trailing * */
702 0 : if (!filename) filename = extract_param(disp, "filename*");
703 : }
704 1 : if (!filename && ctype)
705 1 : filename = extract_param(ctype, "name");
706 :
707 : /* Skip non-attachment text and multipart parts unless explicitly marked */
708 1 : if (!explicit_attach) {
709 1 : if (!filename) {
710 1 : free(ctype); free(disp); free(enc);
711 1 : return; /* no filename → body part, skip */
712 : }
713 : /* text/plain and text/html without attachment disposition are body parts */
714 0 : if (ctype && (strncasecmp(ctype, "text/plain", 10) == 0 ||
715 0 : strncasecmp(ctype, "text/html", 9) == 0)) {
716 0 : free(ctype); free(disp); free(enc); free(filename);
717 0 : return;
718 : }
719 : }
720 :
721 0 : const char *body = body_start(msg);
722 0 : if (!body) {
723 0 : free(ctype); free(disp); free(enc); free(filename);
724 0 : return;
725 : }
726 :
727 : /* Decode body content */
728 0 : unsigned char *data = (unsigned char *)decode_transfer(body, strlen(body), enc);
729 0 : size_t data_size = data ? strlen((char *)data) : 0;
730 : /* For binary (base64-decoded) content the length may contain NUL bytes —
731 : * use the decoded output length from decode_base64, which null-terminates
732 : * but the real size is the base64-decoded byte count. */
733 0 : if (enc && strcasecmp(enc, "base64") == 0 && data) {
734 : /* decode_base64 returns the decoded bytes; count excludes the trailing NUL */
735 : /* we need actual binary size — recount via the decoded buffer length */
736 0 : size_t raw_enc_len = strlen(body);
737 0 : data_size = (raw_enc_len / 4) * 3; /* upper bound */
738 : /* trim padding: accurate enough for display; file write uses full buffer */
739 : }
740 :
741 : /* Sanitise / generate filename */
742 0 : char *safe_name = NULL;
743 0 : if (filename) {
744 0 : char *decoded = mime_decode_words(filename);
745 0 : free(filename);
746 0 : safe_name = sanitise_filename(decoded ? decoded : "");
747 0 : free(decoded);
748 : }
749 0 : if (!safe_name) {
750 0 : char gen[32];
751 0 : snprintf(gen, sizeof(gen), "attachment-%d.bin", ++(*unnamed_idx));
752 0 : safe_name = strdup(gen);
753 : }
754 :
755 0 : MimeAttachment att = {0};
756 0 : att.filename = safe_name;
757 0 : att.content_type = ctype ? strdup(ctype) : strdup("application/octet-stream");
758 0 : att.data = data;
759 0 : att.size = data_size;
760 :
761 0 : if (alist_push(al, att) < 0) {
762 0 : free(att.filename); free(att.content_type); free(att.data);
763 : }
764 :
765 0 : free(ctype); free(disp); free(enc);
766 : }
767 :
768 1 : MimeAttachment *mime_list_attachments(const char *msg, int *count_out) {
769 1 : if (!msg || !count_out) { if (count_out) *count_out = 0; return NULL; }
770 1 : AttachList al = {NULL, 0, 0};
771 1 : int idx = 0;
772 1 : collect_parts(msg, &al, &idx);
773 1 : *count_out = al.count;
774 1 : if (al.count == 0) { free(al.data); return NULL; }
775 0 : return al.data;
776 : }
777 :
778 1 : void mime_free_attachments(MimeAttachment *list, int count) {
779 1 : if (!list) return;
780 0 : for (int i = 0; i < count; i++) {
781 0 : free(list[i].filename);
782 0 : free(list[i].content_type);
783 0 : free(list[i].data);
784 : }
785 0 : free(list);
786 : }
787 :
788 0 : int mime_save_attachment(const MimeAttachment *att, const char *dest_path) {
789 0 : if (!att || !dest_path || !att->data) return -1;
790 0 : FILE *f = fopen(dest_path, "wb");
791 0 : if (!f) return -1;
792 : /* Write the full decoded buffer; for base64 the NUL terminator is not
793 : * part of the content — use att->size if accurate, else strlen fallback. */
794 0 : size_t n = att->size > 0 ? att->size : strlen((char *)att->data);
795 0 : size_t written = fwrite(att->data, 1, n, f);
796 0 : int err = (written != n) ? -1 : 0;
797 0 : fclose(f);
798 0 : return err;
799 : }
800 :
801 4 : char *mime_extract_imap_literal(const char *response) {
802 4 : if (!response) return NULL;
803 3 : const char *brace = strchr(response, '{');
804 3 : if (!brace) return NULL;
805 :
806 2 : char *end = NULL;
807 2 : long size = strtol(brace + 1, &end, 10);
808 2 : if (!end || *end != '}' || size <= 0) return NULL;
809 :
810 2 : const char *content = end + 1;
811 2 : if (*content == '\r') content++;
812 2 : if (*content == '\n') content++;
813 :
814 : // Safety check
815 2 : size_t avail = strlen(content);
816 2 : if (avail < (size_t)size) {
817 1 : return strndup(content, avail);
818 : }
819 :
820 1 : return strndup(content, (size_t)size);
821 : }
|