LCOV - code coverage report
Current view: top level - libemail/src/core - mime_util.c (source / functions) Coverage Total Hit
Test: coverage-functional.info Lines: 73.4 % 519 381
Test Date: 2026-05-07 15:53:08 Functions: 92.6 % 27 25

            Line data    Source code
       1              : #include "mime_util.h"
       2              : #include "html_render.h"
       3              : #include "raii.h"
       4              : #include <stdio.h>
       5              : #include <stdlib.h>
       6              : #include <string.h>
       7              : #include <ctype.h>
       8              : #include <time.h>
       9              : #include <iconv.h>
      10              : #include <errno.h>
      11              : 
      12              : /* ── Header extraction ──────────────────────────────────────────────── */
      13              : 
      14         6090 : char *mime_get_header(const char *msg, const char *name) {
      15         6090 :     if (!msg || !name) return NULL;
      16         6090 :     size_t nlen = strlen(name);
      17         6090 :     const char *p = msg;
      18              : 
      19        23084 :     while (p && *p) {
      20              :         /* Stop at the blank line separating headers from body. */
      21        16994 :         if (*p == '\r' || *p == '\n')
      22              :             break;
      23              : 
      24        15766 :         if (strncasecmp(p, name, nlen) == 0 && p[nlen] == ':') {
      25         4862 :             const char *val = p + nlen + 1;
      26         9724 :             while (*val == ' ' || *val == '\t') val++;
      27              : 
      28         4862 :             size_t cap = 512, n = 0;
      29         4862 :             char *result = malloc(cap);
      30         4862 :             if (!result) return NULL;
      31              : 
      32              :             /* Collect value, unfolding continuation lines */
      33       140522 :             while (*val) {
      34       140522 :                 if (*val == '\r' || *val == '\n') {
      35         5048 :                     const char *next = val;
      36         5048 :                     if (*next == '\r') next++;
      37         5048 :                     if (*next == '\n') next++;
      38         5048 :                     if (*next == ' ' || *next == '\t') {
      39              :                         /* Continuation line: skip CRLF and the leading whitespace */
      40          186 :                         val = next;
      41          372 :                         while (*val == ' ' || *val == '\t') val++;
      42              :                         /* Add a single space to separate folded content if needed */
      43          186 :                         if (n > 0 && result[n-1] != ' ') {
      44          186 :                             if (n + 1 >= cap) {
      45            0 :                                 cap *= 2;
      46            0 :                                 char *tmp = realloc(result, cap);
      47            0 :                                 if (!tmp) { free(result); return NULL; }
      48            0 :                                 result = tmp;
      49              :                             }
      50          186 :                             result[n++] = ' ';
      51              :                         }
      52          186 :                         continue;
      53              :                     } else {
      54              :                         /* Not a continuation line: we are done with this header */
      55              :                         break;
      56              :                     }
      57              :                 }
      58              : 
      59       135474 :                 if (n + 1 >= cap) {
      60            0 :                     cap *= 2;
      61            0 :                     char *tmp = realloc(result, cap);
      62            0 :                     if (!tmp) { free(result); return NULL; }
      63            0 :                     result = tmp;
      64              :                 }
      65       135474 :                 result[n++] = *val++;
      66              :             }
      67         4862 :             result[n] = '\0';
      68         4862 :             return result;
      69              :         }
      70              : 
      71              :         /* Advance to next line */
      72        10904 :         p = strchr(p, '\n');
      73        10904 :         if (p) p++;
      74              :     }
      75         1228 :     return NULL;
      76              : }
      77              : 
      78              : /* ── Base64 decoder ─────────────────────────────────────────────────── */
      79              : 
      80         1128 : static int b64val(unsigned char c) {
      81         1128 :     if (c >= 'A' && c <= 'Z') return c - 'A';
      82          620 :     if (c >= 'a' && c <= 'z') return c - 'a' + 26;
      83          260 :     if (c >= '0' && c <= '9') return c - '0' + 52;
      84          155 :     if (c == '+') return 62;
      85          155 :     if (c == '/') return 63;
      86          155 :     return -1;
      87              : }
      88              : 
      89           62 : static char *decode_base64(const char *in, size_t inlen, size_t *out_len) {
      90           62 :     size_t max = (inlen / 4 + 1) * 3 + 4;
      91           62 :     char *out = malloc(max);
      92           62 :     if (!out) return NULL;
      93           62 :     size_t n = 0;
      94           62 :     int buf = 0, bits = 0;
      95         1190 :     for (size_t i = 0; i < inlen; i++) {
      96         1128 :         int v = b64val((unsigned char)in[i]);
      97         1128 :         if (v < 0) continue;
      98          973 :         buf = (buf << 6) | v;
      99          973 :         bits += 6;
     100          973 :         if (bits >= 8) {
     101          722 :             bits -= 8;
     102          722 :             out[n++] = (char)((buf >> bits) & 0xFF);
     103              :         }
     104              :     }
     105           62 :     out[n] = '\0';
     106           62 :     if (out_len) *out_len = n;
     107           62 :     return out;
     108              : }
     109              : 
     110              : /* ── Quoted-Printable decoder ───────────────────────────────────────── */
     111              : 
     112            0 : static char *decode_qp(const char *in, size_t inlen) {
     113            0 :     char *out = malloc(inlen + 1);
     114            0 :     if (!out) return NULL;
     115            0 :     size_t n = 0, i = 0;
     116            0 :     while (i < inlen) {
     117            0 :         if (in[i] == '=' && i + 1 < inlen &&
     118            0 :             (in[i + 1] == '\r' || in[i + 1] == '\n')) {
     119              :             /* Soft line break — skip */
     120            0 :             i++;
     121            0 :             if (i < inlen && in[i] == '\r') i++;
     122            0 :             if (i < inlen && in[i] == '\n') i++;
     123            0 :         } else if (in[i] == '=' && i + 2 < inlen &&
     124            0 :                    isxdigit((unsigned char)in[i + 1]) &&
     125            0 :                    isxdigit((unsigned char)in[i + 2])) {
     126            0 :             char hex[3] = { in[i + 1], in[i + 2], '\0' };
     127            0 :             out[n++] = (char)strtol(hex, NULL, 16);
     128            0 :             i += 3;
     129              :         } else {
     130            0 :             out[n++] = in[i++];
     131              :         }
     132              :     }
     133            0 :     out[n] = '\0';
     134            0 :     return out;
     135              : }
     136              : 
     137              : /* ── Body helpers ───────────────────────────────────────────────────── */
     138              : 
     139          204 : static const char *body_start(const char *msg) {
     140          204 :     const char *p = strstr(msg, "\r\n\r\n");
     141          204 :     if (p) return p + 4;
     142            0 :     p = strstr(msg, "\n\n");
     143            0 :     if (p) return p + 2;
     144            0 :     return NULL;
     145              : }
     146              : 
     147           46 : static char *decode_transfer(const char *body, size_t len, const char *enc) {
     148           46 :     if (enc && strcasecmp(enc, "base64") == 0)
     149            0 :         return decode_base64(body, len, NULL);
     150           46 :     if (enc && strcasecmp(enc, "quoted-printable") == 0)
     151            0 :         return decode_qp(body, len);
     152           46 :     return strndup(body, len);
     153              : }
     154              : 
     155              : /* Extract the charset parameter value from a Content-Type header value.
     156              :  * E.g. "text/plain; charset=iso-8859-2" → "iso-8859-2".
     157              :  * Returns a malloc'd string or NULL if not found. */
     158          142 : static char *extract_charset(const char *ctype) {
     159          142 :     if (!ctype) return NULL;
     160          142 :     const char *p = strcasestr(ctype, "charset=");
     161          142 :     if (!p) return NULL;
     162           92 :     p += 8;
     163           92 :     if (*p == '"') p++;          /* skip optional opening quote */
     164           92 :     const char *start = p;
     165          552 :     while (*p && *p != ';' && *p != ' ' && *p != '\t' && *p != '"' && *p != '\r' && *p != '\n')
     166          460 :         p++;
     167           92 :     if (p == start) return NULL;
     168           92 :     return strndup(start, (size_t)(p - start));
     169              : }
     170              : 
     171              : /* Convert s from from_charset to UTF-8 via iconv.
     172              :  * Returns a malloc'd UTF-8 string; on failure returns strdup(s). */
     173           46 : static char *charset_to_utf8(const char *s, const char *from_charset) {
     174           46 :     if (!s) return NULL;
     175           46 :     if (!from_charset ||
     176           46 :         strcasecmp(from_charset, "utf-8")  == 0 ||
     177            0 :         strcasecmp(from_charset, "utf8")   == 0 ||
     178            0 :         strcasecmp(from_charset, "us-ascii") == 0)
     179           46 :         return strdup(s);
     180              : 
     181            0 :     iconv_t cd = iconv_open("UTF-8", from_charset);
     182            0 :     if (cd == (iconv_t)-1) return strdup(s);
     183              : 
     184            0 :     size_t in_len   = strlen(s);
     185            0 :     size_t out_size = in_len * 4 + 1;
     186            0 :     char  *out      = malloc(out_size);
     187            0 :     if (!out) { iconv_close(cd); return strdup(s); }
     188              : 
     189            0 :     char  *inp      = (char *)s;
     190            0 :     char  *outp     = out;
     191            0 :     size_t inbytes  = in_len;
     192            0 :     size_t outbytes = out_size - 1;
     193            0 :     size_t r        = iconv(cd, &inp, &inbytes, &outp, &outbytes);
     194            0 :     iconv_close(cd);
     195              : 
     196            0 :     if (r == (size_t)-1) { free(out); return strdup(s); }
     197            0 :     *outp = '\0';
     198            0 :     return out;
     199              : }
     200              : 
     201              : static char *text_from_part(const char *part);
     202              : 
     203            6 : static char *text_from_multipart(const char *msg, const char *ctype) {
     204            6 :     const char *b = strcasestr(ctype, "boundary=");
     205            6 :     if (!b) return NULL;
     206            6 :     b += strlen("boundary=");
     207              : 
     208            6 :     char boundary[512] = {0};
     209            6 :     if (*b == '"') {
     210            6 :         b++;
     211            6 :         const char *end = strchr(b, '"');
     212            6 :         if (!end) return NULL;
     213            6 :         snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
     214              :     } else {
     215            0 :         size_t i = 0;
     216            0 :         while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
     217              :                i < sizeof(boundary) - 1)
     218            0 :             boundary[i++] = *b++;
     219            0 :         boundary[i] = '\0';
     220              :     }
     221            6 :     if (!boundary[0]) return NULL;
     222              : 
     223              :     char delim[520];
     224            6 :     snprintf(delim, sizeof(delim), "--%s", boundary);
     225            6 :     size_t dlen = strlen(delim);
     226              : 
     227            6 :     const char *p = strstr(msg, delim);
     228            6 :     while (p) {
     229            6 :         p = strchr(p + dlen, '\n');
     230            6 :         if (!p) break;
     231            6 :         p++;
     232              : 
     233            6 :         const char *next = strstr(p, delim);
     234            6 :         if (!next) break;
     235              : 
     236            6 :         size_t partlen = (size_t)(next - p);
     237            6 :         char *part = strndup(p, partlen);
     238            6 :         if (!part) break;
     239            6 :         char *result = text_from_part(part);
     240            6 :         free(part);
     241            6 :         if (result) return result;
     242              : 
     243            0 :         p = next + dlen;
     244            0 :         if (p[0] == '-' && p[1] == '-') break;
     245            0 :         p = strchr(p, '\n');
     246            0 :         if (p) p++;
     247              :     }
     248            0 :     return NULL;
     249              : }
     250              : 
     251           20 : static char *text_from_part(const char *part) {
     252           20 :     char *ctype   = mime_get_header(part, "Content-Type");
     253           20 :     char *enc     = mime_get_header(part, "Content-Transfer-Encoding");
     254           20 :     char *charset = extract_charset(ctype);
     255           20 :     const char *body = body_start(part);
     256           20 :     char *result = NULL;
     257              : 
     258           20 :     if (!ctype || strncasecmp(ctype, "text/plain", 10) == 0) {
     259           14 :         if (body) {
     260           14 :             char *raw = decode_transfer(body, strlen(body), enc);
     261           14 :             if (raw) {
     262           14 :                 result = charset_to_utf8(raw, charset);
     263           14 :                 free(raw);
     264              :             }
     265              :         }
     266            6 :     } else if (strncasecmp(ctype, "multipart/", 10) == 0) {
     267            6 :         result = text_from_multipart(part, ctype);
     268            0 :     } else if (strncasecmp(ctype, "text/html", 9) == 0) {
     269            0 :         if (body) {
     270            0 :             char *raw = decode_transfer(body, strlen(body), enc);
     271            0 :             if (raw) {
     272            0 :                 char *utf8 = charset_to_utf8(raw, charset);
     273            0 :                 free(raw);
     274            0 :                 if (utf8) {
     275            0 :                     result = html_render(utf8, 0, 0);
     276            0 :                     free(utf8);
     277              :                 }
     278              :             }
     279              :         }
     280              :     }
     281              : 
     282           20 :     free(ctype);
     283           20 :     free(enc);
     284           20 :     free(charset);
     285           20 :     return result;
     286              : }
     287              : 
     288              : /* ── RFC 2047 encoded-word decoder ──────────────────────────────────── */
     289              : 
     290              : /**
     291              :  * Decode the text portion of one encoded word and convert to UTF-8.
     292              :  *
     293              :  * enc == 'Q'/'q': quoted-printable variant (underscore = space).
     294              :  * enc == 'B'/'b': base64.
     295              :  * charset: the declared charset of the encoded bytes.
     296              :  *
     297              :  * Returns a malloc'd NUL-terminated UTF-8 string, or NULL on failure.
     298              :  */
     299          187 : static char *decode_encoded_word(const char *charset, char enc,
     300              :                                   const char *text, size_t text_len) {
     301          187 :     char *raw = NULL;
     302              : 
     303          187 :     if (enc == 'Q' || enc == 'q') {
     304          187 :         raw = malloc(text_len + 1);
     305          187 :         if (!raw) return NULL;
     306          187 :         size_t i = 0, j = 0;
     307         1870 :         while (i < text_len) {
     308         1683 :             if (text[i] == '_') {
     309          187 :                 raw[j++] = ' ';
     310          187 :                 i++;
     311         1496 :             } else if (text[i] == '=' && i + 2 < text_len &&
     312            0 :                        isxdigit((unsigned char)text[i + 1]) &&
     313            0 :                        isxdigit((unsigned char)text[i + 2])) {
     314            0 :                 char hex[3] = { text[i + 1], text[i + 2], '\0' };
     315            0 :                 raw[j++] = (char)strtol(hex, NULL, 16);
     316            0 :                 i += 3;
     317              :             } else {
     318         1496 :                 raw[j++] = text[i++];
     319              :             }
     320              :         }
     321          187 :         raw[j] = '\0';
     322              :     } else {
     323              :         /* B encoding */
     324            0 :         raw = decode_base64(text, text_len, NULL);
     325            0 :         if (!raw) return NULL;
     326              :     }
     327              : 
     328              :     /* If the declared charset is already UTF-8, return as-is. */
     329          187 :     if (strcasecmp(charset, "utf-8") == 0 || strcasecmp(charset, "utf8") == 0)
     330          187 :         return raw;
     331              : 
     332              :     /* Otherwise convert via iconv. */
     333            0 :     iconv_t cd = iconv_open("UTF-8", charset);
     334            0 :     if (cd == (iconv_t)-1)
     335            0 :         return raw;   /* unknown charset — return raw bytes */
     336              : 
     337            0 :     size_t raw_len   = strlen(raw);
     338            0 :     size_t out_size  = raw_len * 4 + 1;
     339            0 :     char  *utf8      = malloc(out_size);
     340            0 :     if (!utf8) { iconv_close(cd); return raw; }
     341              : 
     342            0 :     char   *inp      = raw;
     343            0 :     char   *outp     = utf8;
     344            0 :     size_t  inbytes  = raw_len;
     345            0 :     size_t  outbytes = out_size - 1;
     346            0 :     size_t  r        = iconv(cd, &inp, &inbytes, &outp, &outbytes);
     347            0 :     iconv_close(cd);
     348              : 
     349            0 :     if (r == (size_t)-1) { free(utf8); return raw; }
     350              : 
     351            0 :     *outp = '\0';
     352            0 :     free(raw);
     353            0 :     return utf8;
     354              : }
     355              : 
     356              : /**
     357              :  * Try to parse and decode one encoded word starting exactly at *pp.
     358              :  * Format: =?charset?Q|B?encoded_text?=
     359              :  *
     360              :  * On success, *pp is advanced past the closing "?=" and the decoded
     361              :  * UTF-8 string (malloc'd) is returned.
     362              :  * On failure, *pp is unchanged and NULL is returned.
     363              :  */
     364          187 : static char *try_decode_encoded_word(const char **pp) {
     365          187 :     const char *p = *pp;
     366          187 :     if (p[0] != '=' || p[1] != '?') return NULL;
     367          187 :     p += 2;
     368              : 
     369              :     /* charset */
     370          187 :     const char *cs = p;
     371         1122 :     while (*p && *p != '?') p++;
     372          187 :     if (!*p) return NULL;
     373          187 :     size_t cs_len = (size_t)(p - cs);
     374          187 :     if (cs_len == 0 || cs_len >= 64) return NULL;
     375              :     char charset[64];
     376          187 :     memcpy(charset, cs, cs_len);
     377          187 :     charset[cs_len] = '\0';
     378          187 :     p++;   /* skip ? */
     379              : 
     380              :     /* encoding indicator */
     381          187 :     char enc = *p;
     382          187 :     if (enc != 'Q' && enc != 'q' && enc != 'B' && enc != 'b') return NULL;
     383          187 :     p++;
     384          187 :     if (*p != '?') return NULL;
     385          187 :     p++;   /* skip ? */
     386              : 
     387              :     /* encoded text — ends at next ?= */
     388          187 :     const char *txt = p;
     389         1870 :     while (*p && !(*p == '?' && p[1] == '=')) p++;
     390          187 :     if (!*p) return NULL;
     391          187 :     size_t txt_len = (size_t)(p - txt);
     392          187 :     p += 2;   /* skip ?= */
     393              : 
     394          187 :     char *decoded = decode_encoded_word(charset, enc, txt, txt_len);
     395          187 :     if (!decoded) return NULL;
     396          187 :     *pp = p;
     397          187 :     return decoded;
     398              : }
     399              : 
     400         2725 : char *mime_decode_words(const char *value) {
     401         2725 :     if (!value) return NULL;
     402              : 
     403         2725 :     size_t vlen = strlen(value);
     404              :     /* Upper bound: each raw byte can expand to at most 4 UTF-8 bytes. */
     405         2725 :     size_t cap = vlen * 4 + 1;
     406         2725 :     char  *out = malloc(cap);
     407         2725 :     if (!out) return NULL;
     408              : 
     409         2725 :     size_t      n             = 0;
     410         2725 :     const char *p             = value;
     411         2725 :     int         prev_encoded  = 0;
     412              : 
     413        69482 :     while (*p) {
     414              :         /* RFC 2047 §6.2: linear whitespace between adjacent encoded words
     415              :          * must be ignored. */
     416        66757 :         if (prev_encoded && (*p == ' ' || *p == '\t')) {
     417          187 :             const char *ws = p;
     418          374 :             while (*ws == ' ' || *ws == '\t') ws++;
     419          187 :             if (ws[0] == '=' && ws[1] == '?') {
     420            0 :                 p = ws;
     421            0 :                 continue;
     422              :             }
     423              :         }
     424              : 
     425        66757 :         if (p[0] == '=' && p[1] == '?') {
     426          187 :             char *decoded = try_decode_encoded_word(&p);
     427          187 :             if (decoded) {
     428          187 :                 size_t dlen = strlen(decoded);
     429          187 :                 if (n + dlen >= cap) {
     430            0 :                     cap = n + dlen + vlen + 1;
     431            0 :                     char *tmp = realloc(out, cap);
     432            0 :                     if (!tmp) { free(decoded); break; }
     433            0 :                     out = tmp;
     434              :                 }
     435          187 :                 memcpy(out + n, decoded, dlen);
     436          187 :                 n += dlen;
     437          187 :                 free(decoded);
     438          187 :                 prev_encoded = 1;
     439          187 :                 continue;
     440              :             }
     441              :         }
     442              : 
     443        66570 :         prev_encoded = 0;
     444        66570 :         out[n++] = *p++;
     445              :     }
     446              : 
     447         2725 :     out[n] = '\0';
     448         2725 :     return out;
     449              : }
     450              : 
     451              : /* ── Date formatting ────────────────────────────────────────────────── */
     452              : 
     453         1461 : char *mime_format_date(const char *date) {
     454         1461 :     if (!date || !*date) return NULL;
     455              : 
     456              :     static const char * const fmts[] = {
     457              :         "%a, %d %b %Y %T %z",  /* "Tue, 10 Mar 2026 15:07:40 +0000"     */
     458              :         "%d %b %Y %T %z",       /* "10 Mar 2026 15:07:40 +0000"          */
     459              :         "%a, %d %b %Y %T %Z",  /* "Tue, 24 Mar 2026 16:38:21 GMT"       */
     460              :         "%d %b %Y %T %Z",       /* "24 Mar 2026 16:38:21 UTC"            */
     461              :         NULL
     462              :     };
     463              : 
     464              :     struct tm tm;
     465         1461 :     int parsed = 0;
     466         1461 :     for (int i = 0; fmts[i]; i++) {
     467         1461 :         memset(&tm, 0, sizeof(tm));
     468         1461 :         if (strptime(date, fmts[i], &tm)) { parsed = 1; break; }
     469              :     }
     470         1461 :     if (!parsed) return strdup(date);
     471              : 
     472              :     /* Save tm_gmtoff before calling timegm(): timegm() normalises the struct
     473              :      * and resets tm_gmtoff to 0.  timegm() treats the fields as UTC, so
     474              :      * subtracting the original offset converts to true UTC. */
     475         1461 :     long gmtoff = tm.tm_gmtoff;
     476         1461 :     time_t utc = timegm(&tm) - gmtoff;
     477         1461 :     if (utc == (time_t)-1) return strdup(date);
     478              : 
     479              :     struct tm local;
     480         1461 :     localtime_r(&utc, &local);
     481              : 
     482         1461 :     char *buf = malloc(17);   /* "YYYY-MM-DD HH:MM\0" */
     483         1461 :     if (!buf) return NULL;
     484         1461 :     if (strftime(buf, 17, "%Y-%m-%d %H:%M", &local) == 0) {
     485            0 :         free(buf);
     486            0 :         return strdup(date);
     487              :     }
     488         1461 :     return buf;
     489              : }
     490              : 
     491              : /* ── HTML part extractor ────────────────────────────────────────────── */
     492              : 
     493              : static char *html_from_part(const char *part);
     494              : 
     495           38 : static char *html_from_multipart(const char *msg, const char *ctype) {
     496           38 :     const char *b = strcasestr(ctype, "boundary=");
     497           38 :     if (!b) return NULL;
     498           38 :     b += strlen("boundary=");
     499              : 
     500           38 :     char boundary[512] = {0};
     501           38 :     if (*b == '"') {
     502           38 :         b++;
     503           38 :         const char *end = strchr(b, '"');
     504           38 :         if (!end) return NULL;
     505           38 :         snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
     506              :     } else {
     507            0 :         size_t i = 0;
     508            0 :         while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
     509              :                i < sizeof(boundary) - 1)
     510            0 :             boundary[i++] = *b++;
     511            0 :         boundary[i] = '\0';
     512              :     }
     513           38 :     if (!boundary[0]) return NULL;
     514              : 
     515              :     char delim[520];
     516           38 :     snprintf(delim, sizeof(delim), "--%s", boundary);
     517           38 :     size_t dlen = strlen(delim);
     518              : 
     519           38 :     const char *p = strstr(msg, delim);
     520           82 :     while (p) {
     521           82 :         if (p[dlen] == '-' && p[dlen+1] == '-') break; /* end boundary */
     522           76 :         p = strchr(p + dlen, '\n');
     523           76 :         if (!p) break;
     524           76 :         p++;
     525           76 :         const char *next = strstr(p, delim);
     526           76 :         if (!next) break;
     527           76 :         size_t partlen = (size_t)(next - p);
     528           76 :         char *part = strndup(p, partlen);
     529           76 :         if (!part) break;
     530           76 :         char *result = html_from_part(part);
     531           76 :         free(part);
     532           76 :         if (result) return result;
     533           44 :         p = next; /* keep p pointing at delimiter for next iteration */
     534              :     }
     535            6 :     return NULL;
     536              : }
     537              : 
     538          122 : static char *html_from_part(const char *part) {
     539          122 :     char *ctype   = mime_get_header(part, "Content-Type");
     540          122 :     char *enc     = mime_get_header(part, "Content-Transfer-Encoding");
     541          122 :     char *charset = extract_charset(ctype);
     542          122 :     const char *body = body_start(part);
     543          122 :     char *result = NULL;
     544              : 
     545          122 :     if (ctype && strncasecmp(ctype, "text/html", 9) == 0) {
     546           32 :         if (body) {
     547           32 :             char *raw = decode_transfer(body, strlen(body), enc);
     548           32 :             if (raw) {
     549           32 :                 result = charset_to_utf8(raw, charset);
     550           32 :                 free(raw);
     551              :             }
     552              :         }
     553           90 :     } else if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
     554           38 :         result = html_from_multipart(part, ctype);
     555              :     }
     556              : 
     557          122 :     free(ctype); free(enc); free(charset);
     558          122 :     return result;
     559              : }
     560              : 
     561              : /* ── Public API ─────────────────────────────────────────────────────── */
     562              : 
     563           14 : char *mime_get_text_body(const char *msg) {
     564           14 :     if (!msg) return NULL;
     565           14 :     return text_from_part(msg);
     566              : }
     567              : 
     568           46 : char *mime_get_html_part(const char *msg) {
     569           46 :     if (!msg) return NULL;
     570           46 :     return html_from_part(msg);
     571              : }
     572              : 
     573              : /* ── Attachment extraction ──────────────────────────────────────────── */
     574              : 
     575              : /* Extract a MIME header parameter value, e.g. filename="foo.pdf" or name=bar.
     576              :  * Handles quoted and unquoted values.  Returns malloc'd string or NULL. */
     577          101 : static char *extract_param(const char *header, const char *param) {
     578          101 :     if (!header || !param) return NULL;
     579              :     char search[64];
     580          101 :     snprintf(search, sizeof(search), "%s=", param);
     581          101 :     const char *p = strcasestr(header, search);
     582          101 :     if (!p) return NULL;
     583           62 :     p += strlen(search);
     584           62 :     if (*p == '"') {
     585           62 :         p++;
     586           62 :         const char *end = strchr(p, '"');
     587           62 :         if (!end) return NULL;
     588           62 :         return strndup(p, (size_t)(end - p));
     589              :     }
     590              :     /* unquoted value: ends at ';', whitespace, or end-of-string */
     591            0 :     const char *end = p;
     592            0 :     while (*end && *end != ';' && *end != ' ' && *end != '\t' &&
     593            0 :            *end != '\r' && *end != '\n')
     594            0 :         end++;
     595            0 :     if (end == p) return NULL;
     596            0 :     return strndup(p, (size_t)(end - p));
     597              : }
     598              : 
     599              : /* Sanitise a filename: strip directory separators and leading dots. */
     600           62 : static char *sanitise_filename(const char *name) {
     601           62 :     if (!name || !*name) return NULL;
     602              :     /* take only the basename portion */
     603           62 :     const char *base = name;
     604          631 :     for (const char *p = name; *p; p++)
     605          569 :         if (*p == '/' || *p == '\\') base = p + 1;
     606           62 :     if (!*base) return NULL;
     607           62 :     char *s = strdup(base);
     608           62 :     if (!s) return NULL;
     609              :     /* strip leading dots (hidden files / directory traversal) */
     610           62 :     char *p = s;
     611           62 :     while (*p == '.') p++;
     612           62 :     if (!*p) { free(s); return strdup("attachment"); }
     613           62 :     if (p != s) memmove(s, p, strlen(p) + 1);
     614           62 :     return s;
     615              : }
     616              : 
     617              : /* Dynamic array for building the attachment list */
     618              : typedef struct { MimeAttachment *data; int count; int cap; } AttachList;
     619              : 
     620           62 : static int alist_push(AttachList *al, MimeAttachment att) {
     621           62 :     if (al->count >= al->cap) {
     622           35 :         int newcap = al->cap ? al->cap * 2 : 4;
     623           35 :         MimeAttachment *tmp = realloc(al->data,
     624           35 :                                       (size_t)newcap * sizeof(MimeAttachment));
     625           35 :         if (!tmp) return -1;
     626           35 :         al->data = tmp;
     627           35 :         al->cap  = newcap;
     628              :     }
     629           62 :     al->data[al->count++] = att;
     630           62 :     return 0;
     631              : }
     632              : 
     633              : /* Forward declaration */
     634              : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx);
     635              : 
     636              : /* Walk a multipart body and collect attachments from each sub-part. */
     637           35 : static void collect_multipart_attachments(const char *msg, const char *ctype,
     638              :                                           AttachList *al, int *idx) {
     639           35 :     const char *b = strcasestr(ctype, "boundary=");
     640           35 :     if (!b) return;
     641           35 :     b += strlen("boundary=");
     642              : 
     643           35 :     char boundary[512] = {0};
     644           35 :     if (*b == '"') {
     645           35 :         b++;
     646           35 :         const char *end = strchr(b, '"');
     647           35 :         if (!end) return;
     648           35 :         snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
     649              :     } else {
     650            0 :         size_t i = 0;
     651            0 :         while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
     652              :                i < sizeof(boundary) - 1)
     653            0 :             boundary[i++] = *b++;
     654            0 :         boundary[i] = '\0';
     655              :     }
     656           35 :     if (!boundary[0]) return;
     657              : 
     658              :     char delim[520];
     659           35 :     snprintf(delim, sizeof(delim), "--%s", boundary);
     660           35 :     size_t dlen = strlen(delim);
     661              : 
     662           35 :     const char *p = strstr(msg, delim);
     663          124 :     while (p) {
     664          124 :         p = strchr(p + dlen, '\n');
     665          124 :         if (!p) break;
     666          124 :         p++;
     667              : 
     668          124 :         const char *next = strstr(p, delim);
     669          124 :         if (!next) break;
     670              : 
     671          124 :         size_t partlen = (size_t)(next - p);
     672          124 :         char *part = strndup(p, partlen);
     673          124 :         if (!part) break;
     674          124 :         collect_parts(part, al, idx);
     675          124 :         free(part);
     676              : 
     677          124 :         p = next + dlen;
     678          124 :         if (p[0] == '-' && p[1] == '-') break;
     679           89 :         p = strchr(p, '\n');
     680           89 :         if (p) p++;
     681              :     }
     682              : }
     683              : 
     684              : /* Examine one MIME part (headers + body) and add to al if it is an attachment. */
     685          163 : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx) {
     686          163 :     char *ctype = mime_get_header(msg, "Content-Type");
     687          163 :     char *disp  = mime_get_header(msg, "Content-Disposition");
     688          163 :     char *enc   = mime_get_header(msg, "Content-Transfer-Encoding");
     689              : 
     690              :     /* Recurse into multipart containers */
     691          163 :     if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
     692           35 :         collect_multipart_attachments(msg, ctype, al, unnamed_idx);
     693           35 :         free(ctype); free(disp); free(enc);
     694          101 :         return;
     695              :     }
     696              : 
     697              :     /* Determine filename from Content-Disposition or Content-Type name= */
     698          128 :     char *filename = NULL;
     699          128 :     int explicit_attach = 0;
     700          128 :     if (disp) {
     701           62 :         if (strncasecmp(disp, "attachment", 10) == 0) explicit_attach = 1;
     702           62 :         filename = extract_param(disp, "filename");
     703              :         /* RFC 5987: filename*=charset''encoded — simplified: strip trailing * */
     704           62 :         if (!filename) filename = extract_param(disp, "filename*");
     705              :     }
     706          128 :     if (!filename && ctype)
     707           39 :         filename = extract_param(ctype, "name");
     708              : 
     709              :     /* Skip non-attachment text and multipart parts unless explicitly marked */
     710          128 :     if (!explicit_attach) {
     711           66 :         if (!filename) {
     712           66 :             free(ctype); free(disp); free(enc);
     713           66 :             return;  /* no filename → body part, skip */
     714              :         }
     715              :         /* text/plain and text/html without attachment disposition are body parts */
     716            0 :         if (ctype && (strncasecmp(ctype, "text/plain", 10) == 0 ||
     717            0 :                       strncasecmp(ctype, "text/html",   9) == 0)) {
     718            0 :             free(ctype); free(disp); free(enc); free(filename);
     719            0 :             return;
     720              :         }
     721              :     }
     722              : 
     723           62 :     const char *body = body_start(msg);
     724           62 :     if (!body) {
     725            0 :         free(ctype); free(disp); free(enc); free(filename);
     726            0 :         return;
     727              :     }
     728              : 
     729              :     /* Decode body content; for base64 capture exact decoded byte count. */
     730           62 :     size_t data_size = 0;
     731              :     unsigned char *data;
     732           62 :     if (enc && strcasecmp(enc, "base64") == 0)
     733           62 :         data = (unsigned char *)decode_base64(body, strlen(body), &data_size);
     734              :     else {
     735            0 :         data = (unsigned char *)decode_transfer(body, strlen(body), enc);
     736            0 :         data_size = data ? strlen((char *)data) : 0;
     737              :     }
     738              : 
     739              :     /* Sanitise / generate filename */
     740           62 :     char *safe_name = NULL;
     741           62 :     if (filename) {
     742           62 :         char *decoded = mime_decode_words(filename);
     743           62 :         free(filename);
     744           62 :         safe_name = sanitise_filename(decoded ? decoded : "");
     745           62 :         free(decoded);
     746              :     }
     747           62 :     if (!safe_name) {
     748              :         char gen[32];
     749            0 :         snprintf(gen, sizeof(gen), "attachment-%d.bin", ++(*unnamed_idx));
     750            0 :         safe_name = strdup(gen);
     751              :     }
     752              : 
     753           62 :     MimeAttachment att = {0};
     754           62 :     att.filename     = safe_name;
     755           62 :     att.content_type = ctype ? strdup(ctype) : strdup("application/octet-stream");
     756           62 :     att.data         = data;
     757           62 :     att.size         = data_size;
     758              : 
     759           62 :     if (alist_push(al, att) < 0) {
     760            0 :         free(att.filename); free(att.content_type); free(att.data);
     761              :     }
     762              : 
     763           62 :     free(ctype); free(disp); free(enc);
     764              : }
     765              : 
     766           39 : MimeAttachment *mime_list_attachments(const char *msg, int *count_out) {
     767           39 :     if (!msg || !count_out) { if (count_out) *count_out = 0; return NULL; }
     768           39 :     AttachList al = {NULL, 0, 0};
     769           39 :     int idx = 0;
     770           39 :     collect_parts(msg, &al, &idx);
     771           39 :     *count_out = al.count;
     772           39 :     if (al.count == 0) { free(al.data); return NULL; }
     773           35 :     return al.data;
     774              : }
     775              : 
     776           27 : void mime_free_attachments(MimeAttachment *list, int count) {
     777           27 :     if (!list) return;
     778           61 :     for (int i = 0; i < count; i++) {
     779           38 :         free(list[i].filename);
     780           38 :         free(list[i].content_type);
     781           38 :         free(list[i].data);
     782              :     }
     783           23 :     free(list);
     784              : }
     785              : 
     786            9 : int mime_save_attachment(const MimeAttachment *att, const char *dest_path) {
     787            9 :     if (!att || !dest_path || !att->data) return -1;
     788           18 :     RAII_FILE FILE *f = fopen(dest_path, "wb");
     789            9 :     if (!f) return -1;
     790              :     /* Write the full decoded buffer; for base64 the NUL terminator is not
     791              :      * part of the content — use att->size if accurate, else strlen fallback. */
     792            9 :     size_t n = att->size > 0 ? att->size : strlen((char *)att->data);
     793            9 :     size_t written = fwrite(att->data, 1, n, f);
     794            9 :     return (written != n) ? -1 : 0;
     795              : }
     796              : 
     797            0 : char *mime_extract_imap_literal(const char *response) {
     798            0 :     if (!response) return NULL;
     799            0 :     const char *brace = strchr(response, '{');
     800            0 :     if (!brace) return NULL;
     801              : 
     802            0 :     char *end = NULL;
     803            0 :     long size = strtol(brace + 1, &end, 10);
     804            0 :     if (!end || *end != '}' || size <= 0) return NULL;
     805              : 
     806            0 :     const char *content = end + 1;
     807            0 :     if (*content == '\r') content++;
     808            0 :     if (*content == '\n') content++;
     809              : 
     810              :     // Safety check
     811            0 :     size_t avail = strlen(content);
     812            0 :     if (avail < (size_t)size) {
     813            0 :         return strndup(content, avail);
     814              :     }
     815              : 
     816            0 :     return strndup(content, (size_t)size);
     817              : }
        

Generated by: LCOV version 2.0-1