LCOV - code coverage report
Current view: top level - libemail/src/core - mime_util.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 98.7 % 519 512
Test Date: 2026-05-07 15:53:07 Functions: 100.0 % 27 27

            Line data    Source code
       1              : #include "mime_util.h"
       2              : #include "html_render.h"
       3              : #include "raii.h"
       4              : #include <stdio.h>
       5              : #include <stdlib.h>
       6              : #include <string.h>
       7              : #include <ctype.h>
       8              : #include <time.h>
       9              : #include <iconv.h>
      10              : #include <errno.h>
      11              : 
      12              : /* ── Header extraction ──────────────────────────────────────────────── */
      13              : 
      14         6451 : char *mime_get_header(const char *msg, const char *name) {
      15         6451 :     if (!msg || !name) return NULL;
      16         6449 :     size_t nlen = strlen(name);
      17         6449 :     const char *p = msg;
      18              : 
      19        24352 :     while (p && *p) {
      20              :         /* Stop at the blank line separating headers from body. */
      21        17899 :         if (*p == '\r' || *p == '\n')
      22              :             break;
      23              : 
      24        16514 :         if (strncasecmp(p, name, nlen) == 0 && p[nlen] == ':') {
      25         5060 :             const char *val = p + nlen + 1;
      26        10120 :             while (*val == ' ' || *val == '\t') val++;
      27              : 
      28         5060 :             size_t cap = 512, n = 0;
      29         5060 :             char *result = malloc(cap);
      30         5060 :             if (!result) return NULL;
      31              : 
      32              :             /* Collect value, unfolding continuation lines */
      33       145714 :             while (*val) {
      34       145714 :                 if (*val == '\r' || *val == '\n') {
      35         5248 :                     const char *next = val;
      36         5248 :                     if (*next == '\r') next++;
      37         5248 :                     if (*next == '\n') next++;
      38         5248 :                     if (*next == ' ' || *next == '\t') {
      39              :                         /* Continuation line: skip CRLF and the leading whitespace */
      40          188 :                         val = next;
      41          376 :                         while (*val == ' ' || *val == '\t') val++;
      42              :                         /* Add a single space to separate folded content if needed */
      43          188 :                         if (n > 0 && result[n-1] != ' ') {
      44          188 :                             if (n + 1 >= cap) {
      45            1 :                                 cap *= 2;
      46            1 :                                 char *tmp = realloc(result, cap);
      47            1 :                                 if (!tmp) { free(result); return NULL; }
      48            1 :                                 result = tmp;
      49              :                             }
      50          188 :                             result[n++] = ' ';
      51              :                         }
      52          188 :                         continue;
      53              :                     } else {
      54              :                         /* Not a continuation line: we are done with this header */
      55              :                         break;
      56              :                     }
      57              :                 }
      58              : 
      59       140466 :                 if (n + 1 >= cap) {
      60            1 :                     cap *= 2;
      61            1 :                     char *tmp = realloc(result, cap);
      62            1 :                     if (!tmp) { free(result); return NULL; }
      63            1 :                     result = tmp;
      64              :                 }
      65       140466 :                 result[n++] = *val++;
      66              :             }
      67         5060 :             result[n] = '\0';
      68         5060 :             return result;
      69              :         }
      70              : 
      71              :         /* Advance to next line */
      72        11454 :         p = strchr(p, '\n');
      73        11454 :         if (p) p++;
      74              :     }
      75         1389 :     return NULL;
      76              : }
      77              : 
      78              : /* ── Base64 decoder ─────────────────────────────────────────────────── */
      79              : 
      80         3632 : static int b64val(unsigned char c) {
      81         3632 :     if (c >= 'A' && c <= 'Z') return c - 'A';
      82         1282 :     if (c >= 'a' && c <= 'z') return c - 'a' + 26;
      83          294 :     if (c >= '0' && c <= '9') return c - '0' + 52;
      84          176 :     if (c == '+') return 62;
      85          176 :     if (c == '/') return 63;
      86          176 :     return -1;
      87              : }
      88              : 
      89           75 : static char *decode_base64(const char *in, size_t inlen, size_t *out_len) {
      90           75 :     size_t max = (inlen / 4 + 1) * 3 + 4;
      91           75 :     char *out = malloc(max);
      92           75 :     if (!out) return NULL;
      93           75 :     size_t n = 0;
      94           75 :     int buf = 0, bits = 0;
      95         3707 :     for (size_t i = 0; i < inlen; i++) {
      96         3632 :         int v = b64val((unsigned char)in[i]);
      97         3632 :         if (v < 0) continue;
      98         3456 :         buf = (buf << 6) | v;
      99         3456 :         bits += 6;
     100         3456 :         if (bits >= 8) {
     101         2582 :             bits -= 8;
     102         2582 :             out[n++] = (char)((buf >> bits) & 0xFF);
     103              :         }
     104              :     }
     105           75 :     out[n] = '\0';
     106           75 :     if (out_len) *out_len = n;
     107           75 :     return out;
     108              : }
     109              : 
     110              : /* ── Quoted-Printable decoder ───────────────────────────────────────── */
     111              : 
     112            2 : static char *decode_qp(const char *in, size_t inlen) {
     113            2 :     char *out = malloc(inlen + 1);
     114            2 :     if (!out) return NULL;
     115            2 :     size_t n = 0, i = 0;
     116           26 :     while (i < inlen) {
     117           24 :         if (in[i] == '=' && i + 1 < inlen &&
     118            4 :             (in[i + 1] == '\r' || in[i + 1] == '\n')) {
     119              :             /* Soft line break — skip */
     120            1 :             i++;
     121            1 :             if (i < inlen && in[i] == '\r') i++;
     122            1 :             if (i < inlen && in[i] == '\n') i++;
     123           23 :         } else if (in[i] == '=' && i + 2 < inlen &&
     124            3 :                    isxdigit((unsigned char)in[i + 1]) &&
     125            3 :                    isxdigit((unsigned char)in[i + 2])) {
     126            3 :             char hex[3] = { in[i + 1], in[i + 2], '\0' };
     127            3 :             out[n++] = (char)strtol(hex, NULL, 16);
     128            3 :             i += 3;
     129              :         } else {
     130           20 :             out[n++] = in[i++];
     131              :         }
     132              :     }
     133            2 :     out[n] = '\0';
     134            2 :     return out;
     135              : }
     136              : 
     137              : /* ── Body helpers ───────────────────────────────────────────────────── */
     138              : 
     139          269 : static const char *body_start(const char *msg) {
     140          269 :     const char *p = strstr(msg, "\r\n\r\n");
     141          269 :     if (p) return p + 4;
     142            4 :     p = strstr(msg, "\n\n");
     143            4 :     if (p) return p + 2;
     144            3 :     return NULL;
     145              : }
     146              : 
     147           81 : static char *decode_transfer(const char *body, size_t len, const char *enc) {
     148           81 :     if (enc && strcasecmp(enc, "base64") == 0)
     149            1 :         return decode_base64(body, len, NULL);
     150           80 :     if (enc && strcasecmp(enc, "quoted-printable") == 0)
     151            2 :         return decode_qp(body, len);
     152           78 :     return strndup(body, len);
     153              : }
     154              : 
     155              : /* Extract the charset parameter value from a Content-Type header value.
     156              :  * E.g. "text/plain; charset=iso-8859-2" → "iso-8859-2".
     157              :  * Returns a malloc'd string or NULL if not found. */
     158          195 : static char *extract_charset(const char *ctype) {
     159          195 :     if (!ctype) return NULL;
     160          180 :     const char *p = strcasestr(ctype, "charset=");
     161          180 :     if (!p) return NULL;
     162          109 :     p += 8;
     163          109 :     if (*p == '"') p++;          /* skip optional opening quote */
     164          109 :     const char *start = p;
     165          654 :     while (*p && *p != ';' && *p != ' ' && *p != '\t' && *p != '"' && *p != '\r' && *p != '\n')
     166          545 :         p++;
     167          109 :     if (p == start) return NULL;
     168          108 :     return strndup(start, (size_t)(p - start));
     169              : }
     170              : 
     171              : /* Convert s from from_charset to UTF-8 via iconv.
     172              :  * Returns a malloc'd UTF-8 string; on failure returns strdup(s). */
     173           75 : static char *charset_to_utf8(const char *s, const char *from_charset) {
     174           75 :     if (!s) return NULL;
     175           75 :     if (!from_charset ||
     176           56 :         strcasecmp(from_charset, "utf-8")  == 0 ||
     177            1 :         strcasecmp(from_charset, "utf8")   == 0 ||
     178            1 :         strcasecmp(from_charset, "us-ascii") == 0)
     179           74 :         return strdup(s);
     180              : 
     181            1 :     iconv_t cd = iconv_open("UTF-8", from_charset);
     182            1 :     if (cd == (iconv_t)-1) return strdup(s);
     183              : 
     184            1 :     size_t in_len   = strlen(s);
     185            1 :     size_t out_size = in_len * 4 + 1;
     186            1 :     char  *out      = malloc(out_size);
     187            1 :     if (!out) { iconv_close(cd); return strdup(s); }
     188              : 
     189            1 :     char  *inp      = (char *)s;
     190            1 :     char  *outp     = out;
     191            1 :     size_t inbytes  = in_len;
     192            1 :     size_t outbytes = out_size - 1;
     193            1 :     size_t r        = iconv(cd, &inp, &inbytes, &outp, &outbytes);
     194            1 :     iconv_close(cd);
     195              : 
     196            1 :     if (r == (size_t)-1) { free(out); return strdup(s); }
     197            1 :     *outp = '\0';
     198            1 :     return out;
     199              : }
     200              : 
     201              : static char *text_from_part(const char *part);
     202              : 
     203            9 : static char *text_from_multipart(const char *msg, const char *ctype) {
     204            9 :     const char *b = strcasestr(ctype, "boundary=");
     205            9 :     if (!b) return NULL;
     206            9 :     b += strlen("boundary=");
     207              : 
     208            9 :     char boundary[512] = {0};
     209            9 :     if (*b == '"') {
     210            7 :         b++;
     211            7 :         const char *end = strchr(b, '"');
     212            7 :         if (!end) return NULL;
     213            7 :         snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
     214              :     } else {
     215            2 :         size_t i = 0;
     216           11 :         while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
     217              :                i < sizeof(boundary) - 1)
     218            9 :             boundary[i++] = *b++;
     219            2 :         boundary[i] = '\0';
     220              :     }
     221            9 :     if (!boundary[0]) return NULL;
     222              : 
     223              :     char delim[520];
     224            9 :     snprintf(delim, sizeof(delim), "--%s", boundary);
     225            9 :     size_t dlen = strlen(delim);
     226              : 
     227            9 :     const char *p = strstr(msg, delim);
     228           10 :     while (p) {
     229           10 :         p = strchr(p + dlen, '\n');
     230           10 :         if (!p) break;
     231           10 :         p++;
     232              : 
     233           10 :         const char *next = strstr(p, delim);
     234           10 :         if (!next) break;
     235              : 
     236           10 :         size_t partlen = (size_t)(next - p);
     237           10 :         char *part = strndup(p, partlen);
     238           10 :         if (!part) break;
     239           10 :         char *result = text_from_part(part);
     240           10 :         free(part);
     241           10 :         if (result) return result;
     242              : 
     243            2 :         p = next + dlen;
     244            2 :         if (p[0] == '-' && p[1] == '-') break;
     245            1 :         p = strchr(p, '\n');
     246            1 :         if (p) p++;
     247              :     }
     248            1 :     return NULL;
     249              : }
     250              : 
     251           51 : static char *text_from_part(const char *part) {
     252           51 :     char *ctype   = mime_get_header(part, "Content-Type");
     253           51 :     char *enc     = mime_get_header(part, "Content-Transfer-Encoding");
     254           51 :     char *charset = extract_charset(ctype);
     255           51 :     const char *body = body_start(part);
     256           51 :     char *result = NULL;
     257              : 
     258           51 :     if (!ctype || strncasecmp(ctype, "text/plain", 10) == 0) {
     259           40 :         if (body) {
     260           38 :             char *raw = decode_transfer(body, strlen(body), enc);
     261           38 :             if (raw) {
     262           38 :                 result = charset_to_utf8(raw, charset);
     263           38 :                 free(raw);
     264              :             }
     265              :         }
     266           11 :     } else if (strncasecmp(ctype, "multipart/", 10) == 0) {
     267            9 :         result = text_from_multipart(part, ctype);
     268            2 :     } else if (strncasecmp(ctype, "text/html", 9) == 0) {
     269            1 :         if (body) {
     270            1 :             char *raw = decode_transfer(body, strlen(body), enc);
     271            1 :             if (raw) {
     272            1 :                 char *utf8 = charset_to_utf8(raw, charset);
     273            1 :                 free(raw);
     274            1 :                 if (utf8) {
     275            1 :                     result = html_render(utf8, 0, 0);
     276            1 :                     free(utf8);
     277              :                 }
     278              :             }
     279              :         }
     280              :     }
     281              : 
     282           51 :     free(ctype);
     283           51 :     free(enc);
     284           51 :     free(charset);
     285           51 :     return result;
     286              : }
     287              : 
     288              : /* ── RFC 2047 encoded-word decoder ──────────────────────────────────── */
     289              : 
     290              : /**
     291              :  * Decode the text portion of one encoded word and convert to UTF-8.
     292              :  *
     293              :  * enc == 'Q'/'q': quoted-printable variant (underscore = space).
     294              :  * enc == 'B'/'b': base64.
     295              :  * charset: the declared charset of the encoded bytes.
     296              :  *
     297              :  * Returns a malloc'd NUL-terminated UTF-8 string, or NULL on failure.
     298              :  */
     299          200 : static char *decode_encoded_word(const char *charset, char enc,
     300              :                                   const char *text, size_t text_len) {
     301          200 :     char *raw = NULL;
     302              : 
     303          200 :     if (enc == 'Q' || enc == 'q') {
     304          193 :         raw = malloc(text_len + 1);
     305          193 :         if (!raw) return NULL;
     306          193 :         size_t i = 0, j = 0;
     307         1936 :         while (i < text_len) {
     308         1743 :             if (text[i] == '_') {
     309          191 :                 raw[j++] = ' ';
     310          191 :                 i++;
     311         1552 :             } else if (text[i] == '=' && i + 2 < text_len &&
     312           11 :                        isxdigit((unsigned char)text[i + 1]) &&
     313           11 :                        isxdigit((unsigned char)text[i + 2])) {
     314           11 :                 char hex[3] = { text[i + 1], text[i + 2], '\0' };
     315           11 :                 raw[j++] = (char)strtol(hex, NULL, 16);
     316           11 :                 i += 3;
     317              :             } else {
     318         1541 :                 raw[j++] = text[i++];
     319              :             }
     320              :         }
     321          193 :         raw[j] = '\0';
     322              :     } else {
     323              :         /* B encoding */
     324            7 :         raw = decode_base64(text, text_len, NULL);
     325            7 :         if (!raw) return NULL;
     326              :     }
     327              : 
     328              :     /* If the declared charset is already UTF-8, return as-is. */
     329          200 :     if (strcasecmp(charset, "utf-8") == 0 || strcasecmp(charset, "utf8") == 0)
     330          198 :         return raw;
     331              : 
     332              :     /* Otherwise convert via iconv. */
     333            2 :     iconv_t cd = iconv_open("UTF-8", charset);
     334            2 :     if (cd == (iconv_t)-1)
     335            1 :         return raw;   /* unknown charset — return raw bytes */
     336              : 
     337            1 :     size_t raw_len   = strlen(raw);
     338            1 :     size_t out_size  = raw_len * 4 + 1;
     339            1 :     char  *utf8      = malloc(out_size);
     340            1 :     if (!utf8) { iconv_close(cd); return raw; }
     341              : 
     342            1 :     char   *inp      = raw;
     343            1 :     char   *outp     = utf8;
     344            1 :     size_t  inbytes  = raw_len;
     345            1 :     size_t  outbytes = out_size - 1;
     346            1 :     size_t  r        = iconv(cd, &inp, &inbytes, &outp, &outbytes);
     347            1 :     iconv_close(cd);
     348              : 
     349            1 :     if (r == (size_t)-1) { free(utf8); return raw; }
     350              : 
     351            1 :     *outp = '\0';
     352            1 :     free(raw);
     353            1 :     return utf8;
     354              : }
     355              : 
     356              : /**
     357              :  * Try to parse and decode one encoded word starting exactly at *pp.
     358              :  * Format: =?charset?Q|B?encoded_text?=
     359              :  *
     360              :  * On success, *pp is advanced past the closing "?=" and the decoded
     361              :  * UTF-8 string (malloc'd) is returned.
     362              :  * On failure, *pp is unchanged and NULL is returned.
     363              :  */
     364          200 : static char *try_decode_encoded_word(const char **pp) {
     365          200 :     const char *p = *pp;
     366          200 :     if (p[0] != '=' || p[1] != '?') return NULL;
     367          200 :     p += 2;
     368              : 
     369              :     /* charset */
     370          200 :     const char *cs = p;
     371         1217 :     while (*p && *p != '?') p++;
     372          200 :     if (!*p) return NULL;
     373          200 :     size_t cs_len = (size_t)(p - cs);
     374          200 :     if (cs_len == 0 || cs_len >= 64) return NULL;
     375              :     char charset[64];
     376          200 :     memcpy(charset, cs, cs_len);
     377          200 :     charset[cs_len] = '\0';
     378          200 :     p++;   /* skip ? */
     379              : 
     380              :     /* encoding indicator */
     381          200 :     char enc = *p;
     382          200 :     if (enc != 'Q' && enc != 'q' && enc != 'B' && enc != 'b') return NULL;
     383          200 :     p++;
     384          200 :     if (*p != '?') return NULL;
     385          200 :     p++;   /* skip ? */
     386              : 
     387              :     /* encoded text — ends at next ?= */
     388          200 :     const char *txt = p;
     389         4373 :     while (*p && !(*p == '?' && p[1] == '=')) p++;
     390          200 :     if (!*p) return NULL;
     391          200 :     size_t txt_len = (size_t)(p - txt);
     392          200 :     p += 2;   /* skip ?= */
     393              : 
     394          200 :     char *decoded = decode_encoded_word(charset, enc, txt, txt_len);
     395          200 :     if (!decoded) return NULL;
     396          200 :     *pp = p;
     397          200 :     return decoded;
     398              : }
     399              : 
     400         2807 : char *mime_decode_words(const char *value) {
     401         2807 :     if (!value) return NULL;
     402              : 
     403         2806 :     size_t vlen = strlen(value);
     404              :     /* Upper bound: each raw byte can expand to at most 4 UTF-8 bytes. */
     405         2806 :     size_t cap = vlen * 4 + 1;
     406         2806 :     char  *out = malloc(cap);
     407         2806 :     if (!out) return NULL;
     408              : 
     409         2806 :     size_t      n             = 0;
     410         2806 :     const char *p             = value;
     411         2806 :     int         prev_encoded  = 0;
     412              : 
     413        70484 :     while (*p) {
     414              :         /* RFC 2047 §6.2: linear whitespace between adjacent encoded words
     415              :          * must be ignored. */
     416        67678 :         if (prev_encoded && (*p == ' ' || *p == '\t')) {
     417          189 :             const char *ws = p;
     418          378 :             while (*ws == ' ' || *ws == '\t') ws++;
     419          189 :             if (ws[0] == '=' && ws[1] == '?') {
     420            1 :                 p = ws;
     421            1 :                 continue;
     422              :             }
     423              :         }
     424              : 
     425        67677 :         if (p[0] == '=' && p[1] == '?') {
     426          200 :             char *decoded = try_decode_encoded_word(&p);
     427          200 :             if (decoded) {
     428          200 :                 size_t dlen = strlen(decoded);
     429          200 :                 if (n + dlen >= cap) {
     430            0 :                     cap = n + dlen + vlen + 1;
     431            0 :                     char *tmp = realloc(out, cap);
     432            0 :                     if (!tmp) { free(decoded); break; }
     433            0 :                     out = tmp;
     434              :                 }
     435          200 :                 memcpy(out + n, decoded, dlen);
     436          200 :                 n += dlen;
     437          200 :                 free(decoded);
     438          200 :                 prev_encoded = 1;
     439          200 :                 continue;
     440              :             }
     441              :         }
     442              : 
     443        67477 :         prev_encoded = 0;
     444        67477 :         out[n++] = *p++;
     445              :     }
     446              : 
     447         2806 :     out[n] = '\0';
     448         2806 :     return out;
     449              : }
     450              : 
     451              : /* ── Date formatting ────────────────────────────────────────────────── */
     452              : 
     453         1492 : char *mime_format_date(const char *date) {
     454         1492 :     if (!date || !*date) return NULL;
     455              : 
     456              :     static const char * const fmts[] = {
     457              :         "%a, %d %b %Y %T %z",  /* "Tue, 10 Mar 2026 15:07:40 +0000"     */
     458              :         "%d %b %Y %T %z",       /* "10 Mar 2026 15:07:40 +0000"          */
     459              :         "%a, %d %b %Y %T %Z",  /* "Tue, 24 Mar 2026 16:38:21 GMT"       */
     460              :         "%d %b %Y %T %Z",       /* "24 Mar 2026 16:38:21 UTC"            */
     461              :         NULL
     462              :     };
     463              : 
     464              :     struct tm tm;
     465         1491 :     int parsed = 0;
     466         1498 :     for (int i = 0; fmts[i]; i++) {
     467         1497 :         memset(&tm, 0, sizeof(tm));
     468         1497 :         if (strptime(date, fmts[i], &tm)) { parsed = 1; break; }
     469              :     }
     470         1491 :     if (!parsed) return strdup(date);
     471              : 
     472              :     /* Save tm_gmtoff before calling timegm(): timegm() normalises the struct
     473              :      * and resets tm_gmtoff to 0.  timegm() treats the fields as UTC, so
     474              :      * subtracting the original offset converts to true UTC. */
     475         1490 :     long gmtoff = tm.tm_gmtoff;
     476         1490 :     time_t utc = timegm(&tm) - gmtoff;
     477         1490 :     if (utc == (time_t)-1) return strdup(date);
     478              : 
     479              :     struct tm local;
     480         1490 :     localtime_r(&utc, &local);
     481              : 
     482         1490 :     char *buf = malloc(17);   /* "YYYY-MM-DD HH:MM\0" */
     483         1490 :     if (!buf) return NULL;
     484         1490 :     if (strftime(buf, 17, "%Y-%m-%d %H:%M", &local) == 0) {
     485            0 :         free(buf);
     486            0 :         return strdup(date);
     487              :     }
     488         1490 :     return buf;
     489              : }
     490              : 
     491              : /* ── HTML part extractor ────────────────────────────────────────────── */
     492              : 
     493              : static char *html_from_part(const char *part);
     494              : 
     495           41 : static char *html_from_multipart(const char *msg, const char *ctype) {
     496           41 :     const char *b = strcasestr(ctype, "boundary=");
     497           41 :     if (!b) return NULL;
     498           41 :     b += strlen("boundary=");
     499              : 
     500           41 :     char boundary[512] = {0};
     501           41 :     if (*b == '"') {
     502           40 :         b++;
     503           40 :         const char *end = strchr(b, '"');
     504           40 :         if (!end) return NULL;
     505           40 :         snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
     506              :     } else {
     507            1 :         size_t i = 0;
     508            9 :         while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
     509              :                i < sizeof(boundary) - 1)
     510            8 :             boundary[i++] = *b++;
     511            1 :         boundary[i] = '\0';
     512              :     }
     513           41 :     if (!boundary[0]) return NULL;
     514              : 
     515              :     char delim[520];
     516           41 :     snprintf(delim, sizeof(delim), "--%s", boundary);
     517           41 :     size_t dlen = strlen(delim);
     518              : 
     519           41 :     const char *p = strstr(msg, delim);
     520           87 :     while (p) {
     521           87 :         if (p[dlen] == '-' && p[dlen+1] == '-') break; /* end boundary */
     522           80 :         p = strchr(p + dlen, '\n');
     523           80 :         if (!p) break;
     524           80 :         p++;
     525           80 :         const char *next = strstr(p, delim);
     526           80 :         if (!next) break;
     527           80 :         size_t partlen = (size_t)(next - p);
     528           80 :         char *part = strndup(p, partlen);
     529           80 :         if (!part) break;
     530           80 :         char *result = html_from_part(part);
     531           80 :         free(part);
     532           80 :         if (result) return result;
     533           46 :         p = next; /* keep p pointing at delimiter for next iteration */
     534              :     }
     535            7 :     return NULL;
     536              : }
     537              : 
     538          144 : static char *html_from_part(const char *part) {
     539          144 :     char *ctype   = mime_get_header(part, "Content-Type");
     540          144 :     char *enc     = mime_get_header(part, "Content-Transfer-Encoding");
     541          144 :     char *charset = extract_charset(ctype);
     542          144 :     const char *body = body_start(part);
     543          144 :     char *result = NULL;
     544              : 
     545          144 :     if (ctype && strncasecmp(ctype, "text/html", 9) == 0) {
     546           36 :         if (body) {
     547           36 :             char *raw = decode_transfer(body, strlen(body), enc);
     548           36 :             if (raw) {
     549           36 :                 result = charset_to_utf8(raw, charset);
     550           36 :                 free(raw);
     551              :             }
     552              :         }
     553          108 :     } else if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
     554           41 :         result = html_from_multipart(part, ctype);
     555              :     }
     556              : 
     557          144 :     free(ctype); free(enc); free(charset);
     558          144 :     return result;
     559              : }
     560              : 
     561              : /* ── Public API ─────────────────────────────────────────────────────── */
     562              : 
     563           42 : char *mime_get_text_body(const char *msg) {
     564           42 :     if (!msg) return NULL;
     565           41 :     return text_from_part(msg);
     566              : }
     567              : 
     568           65 : char *mime_get_html_part(const char *msg) {
     569           65 :     if (!msg) return NULL;
     570           64 :     return html_from_part(msg);
     571              : }
     572              : 
     573              : /* ── Attachment extraction ──────────────────────────────────────────── */
     574              : 
     575              : /* Extract a MIME header parameter value, e.g. filename="foo.pdf" or name=bar.
     576              :  * Handles quoted and unquoted values.  Returns malloc'd string or NULL. */
     577          131 : static char *extract_param(const char *header, const char *param) {
     578          131 :     if (!header || !param) return NULL;
     579              :     char search[64];
     580          131 :     snprintf(search, sizeof(search), "%s=", param);
     581          131 :     const char *p = strcasestr(header, search);
     582          131 :     if (!p) return NULL;
     583           75 :     p += strlen(search);
     584           75 :     if (*p == '"') {
     585           71 :         p++;
     586           71 :         const char *end = strchr(p, '"');
     587           71 :         if (!end) return NULL;
     588           71 :         return strndup(p, (size_t)(end - p));
     589              :     }
     590              :     /* unquoted value: ends at ';', whitespace, or end-of-string */
     591            4 :     const char *end = p;
     592           44 :     while (*end && *end != ';' && *end != ' ' && *end != '\t' &&
     593           84 :            *end != '\r' && *end != '\n')
     594           40 :         end++;
     595            4 :     if (end == p) return NULL;
     596            4 :     return strndup(p, (size_t)(end - p));
     597              : }
     598              : 
     599              : /* Sanitise a filename: strip directory separators and leading dots. */
     600           72 : static char *sanitise_filename(const char *name) {
     601           72 :     if (!name || !*name) return NULL;
     602              :     /* take only the basename portion */
     603           72 :     const char *base = name;
     604          740 :     for (const char *p = name; *p; p++)
     605          668 :         if (*p == '/' || *p == '\\') base = p + 1;
     606           72 :     if (!*base) return NULL;
     607           72 :     char *s = strdup(base);
     608           72 :     if (!s) return NULL;
     609              :     /* strip leading dots (hidden files / directory traversal) */
     610           72 :     char *p = s;
     611           72 :     while (*p == '.') p++;
     612           72 :     if (!*p) { free(s); return strdup("attachment"); }
     613           72 :     if (p != s) memmove(s, p, strlen(p) + 1);
     614           72 :     return s;
     615              : }
     616              : 
     617              : /* Dynamic array for building the attachment list */
     618              : typedef struct { MimeAttachment *data; int count; int cap; } AttachList;
     619              : 
     620           73 : static int alist_push(AttachList *al, MimeAttachment att) {
     621           73 :     if (al->count >= al->cap) {
     622           45 :         int newcap = al->cap ? al->cap * 2 : 4;
     623           45 :         MimeAttachment *tmp = realloc(al->data,
     624           45 :                                       (size_t)newcap * sizeof(MimeAttachment));
     625           45 :         if (!tmp) return -1;
     626           45 :         al->data = tmp;
     627           45 :         al->cap  = newcap;
     628              :     }
     629           73 :     al->data[al->count++] = att;
     630           73 :     return 0;
     631              : }
     632              : 
     633              : /* Forward declaration */
     634              : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx);
     635              : 
     636              : /* Walk a multipart body and collect attachments from each sub-part. */
     637           48 : static void collect_multipart_attachments(const char *msg, const char *ctype,
     638              :                                           AttachList *al, int *idx) {
     639           48 :     const char *b = strcasestr(ctype, "boundary=");
     640           48 :     if (!b) return;
     641           48 :     b += strlen("boundary=");
     642              : 
     643           48 :     char boundary[512] = {0};
     644           48 :     if (*b == '"') {
     645           47 :         b++;
     646           47 :         const char *end = strchr(b, '"');
     647           47 :         if (!end) return;
     648           47 :         snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
     649              :     } else {
     650            1 :         size_t i = 0;
     651            5 :         while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
     652              :                i < sizeof(boundary) - 1)
     653            4 :             boundary[i++] = *b++;
     654            1 :         boundary[i] = '\0';
     655              :     }
     656           48 :     if (!boundary[0]) return;
     657              : 
     658              :     char delim[520];
     659           48 :     snprintf(delim, sizeof(delim), "--%s", boundary);
     660           48 :     size_t dlen = strlen(delim);
     661              : 
     662           48 :     const char *p = strstr(msg, delim);
     663          142 :     while (p) {
     664          142 :         p = strchr(p + dlen, '\n');
     665          142 :         if (!p) break;
     666          142 :         p++;
     667              : 
     668          142 :         const char *next = strstr(p, delim);
     669          142 :         if (!next) break;
     670              : 
     671          142 :         size_t partlen = (size_t)(next - p);
     672          142 :         char *part = strndup(p, partlen);
     673          142 :         if (!part) break;
     674          142 :         collect_parts(part, al, idx);
     675          142 :         free(part);
     676              : 
     677          142 :         p = next + dlen;
     678          142 :         if (p[0] == '-' && p[1] == '-') break;
     679           94 :         p = strchr(p, '\n');
     680           94 :         if (p) p++;
     681              :     }
     682              : }
     683              : 
     684              : /* Examine one MIME part (headers + body) and add to al if it is an attachment. */
     685          206 : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx) {
     686          206 :     char *ctype = mime_get_header(msg, "Content-Type");
     687          206 :     char *disp  = mime_get_header(msg, "Content-Disposition");
     688          206 :     char *enc   = mime_get_header(msg, "Content-Transfer-Encoding");
     689              : 
     690              :     /* Recurse into multipart containers */
     691          206 :     if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
     692           48 :         collect_multipart_attachments(msg, ctype, al, unnamed_idx);
     693           48 :         free(ctype); free(disp); free(enc);
     694          133 :         return;
     695              :     }
     696              : 
     697              :     /* Determine filename from Content-Disposition or Content-Type name= */
     698          158 :     char *filename = NULL;
     699          158 :     int explicit_attach = 0;
     700          158 :     if (disp) {
     701           76 :         if (strncasecmp(disp, "attachment", 10) == 0) explicit_attach = 1;
     702           76 :         filename = extract_param(disp, "filename");
     703              :         /* RFC 5987: filename*=charset''encoded — simplified: strip trailing * */
     704           76 :         if (!filename) filename = extract_param(disp, "filename*");
     705              :     }
     706          158 :     if (!filename && ctype)
     707           52 :         filename = extract_param(ctype, "name");
     708              : 
     709              :     /* Skip non-attachment text and multipart parts unless explicitly marked */
     710          158 :     if (!explicit_attach) {
     711           84 :         if (!filename) {
     712           82 :             free(ctype); free(disp); free(enc);
     713           82 :             return;  /* no filename → body part, skip */
     714              :         }
     715              :         /* text/plain and text/html without attachment disposition are body parts */
     716            2 :         if (ctype && (strncasecmp(ctype, "text/plain", 10) == 0 ||
     717            1 :                       strncasecmp(ctype, "text/html",   9) == 0)) {
     718            2 :             free(ctype); free(disp); free(enc); free(filename);
     719            2 :             return;
     720              :         }
     721              :     }
     722              : 
     723           74 :     const char *body = body_start(msg);
     724           74 :     if (!body) {
     725            1 :         free(ctype); free(disp); free(enc); free(filename);
     726            1 :         return;
     727              :     }
     728              : 
     729              :     /* Decode body content; for base64 capture exact decoded byte count. */
     730           73 :     size_t data_size = 0;
     731              :     unsigned char *data;
     732           73 :     if (enc && strcasecmp(enc, "base64") == 0)
     733           67 :         data = (unsigned char *)decode_base64(body, strlen(body), &data_size);
     734              :     else {
     735            6 :         data = (unsigned char *)decode_transfer(body, strlen(body), enc);
     736            6 :         data_size = data ? strlen((char *)data) : 0;
     737              :     }
     738              : 
     739              :     /* Sanitise / generate filename */
     740           73 :     char *safe_name = NULL;
     741           73 :     if (filename) {
     742           72 :         char *decoded = mime_decode_words(filename);
     743           72 :         free(filename);
     744           72 :         safe_name = sanitise_filename(decoded ? decoded : "");
     745           72 :         free(decoded);
     746              :     }
     747           73 :     if (!safe_name) {
     748              :         char gen[32];
     749            1 :         snprintf(gen, sizeof(gen), "attachment-%d.bin", ++(*unnamed_idx));
     750            1 :         safe_name = strdup(gen);
     751              :     }
     752              : 
     753           73 :     MimeAttachment att = {0};
     754           73 :     att.filename     = safe_name;
     755           73 :     att.content_type = ctype ? strdup(ctype) : strdup("application/octet-stream");
     756           73 :     att.data         = data;
     757           73 :     att.size         = data_size;
     758              : 
     759           73 :     if (alist_push(al, att) < 0) {
     760            0 :         free(att.filename); free(att.content_type); free(att.data);
     761              :     }
     762              : 
     763           73 :     free(ctype); free(disp); free(enc);
     764              : }
     765              : 
     766           65 : MimeAttachment *mime_list_attachments(const char *msg, int *count_out) {
     767           65 :     if (!msg || !count_out) { if (count_out) *count_out = 0; return NULL; }
     768           64 :     AttachList al = {NULL, 0, 0};
     769           64 :     int idx = 0;
     770           64 :     collect_parts(msg, &al, &idx);
     771           64 :     *count_out = al.count;
     772           64 :     if (al.count == 0) { free(al.data); return NULL; }
     773           45 :     return al.data;
     774              : }
     775              : 
     776           52 : void mime_free_attachments(MimeAttachment *list, int count) {
     777           52 :     if (!list) return;
     778           82 :     for (int i = 0; i < count; i++) {
     779           49 :         free(list[i].filename);
     780           49 :         free(list[i].content_type);
     781           49 :         free(list[i].data);
     782              :     }
     783           33 :     free(list);
     784              : }
     785              : 
     786           12 : int mime_save_attachment(const MimeAttachment *att, const char *dest_path) {
     787           12 :     if (!att || !dest_path || !att->data) return -1;
     788           22 :     RAII_FILE FILE *f = fopen(dest_path, "wb");
     789           11 :     if (!f) return -1;
     790              :     /* Write the full decoded buffer; for base64 the NUL terminator is not
     791              :      * part of the content — use att->size if accurate, else strlen fallback. */
     792           11 :     size_t n = att->size > 0 ? att->size : strlen((char *)att->data);
     793           11 :     size_t written = fwrite(att->data, 1, n, f);
     794           11 :     return (written != n) ? -1 : 0;
     795              : }
     796              : 
     797            4 : char *mime_extract_imap_literal(const char *response) {
     798            4 :     if (!response) return NULL;
     799            3 :     const char *brace = strchr(response, '{');
     800            3 :     if (!brace) return NULL;
     801              : 
     802            2 :     char *end = NULL;
     803            2 :     long size = strtol(brace + 1, &end, 10);
     804            2 :     if (!end || *end != '}' || size <= 0) return NULL;
     805              : 
     806            2 :     const char *content = end + 1;
     807            2 :     if (*content == '\r') content++;
     808            2 :     if (*content == '\n') content++;
     809              : 
     810              :     // Safety check
     811            2 :     size_t avail = strlen(content);
     812            2 :     if (avail < (size_t)size) {
     813            1 :         return strndup(content, avail);
     814              :     }
     815              : 
     816            1 :     return strndup(content, (size_t)size);
     817              : }
        

Generated by: LCOV version 2.0-1