LCOV - code coverage report
Current view: top level - libemail/src/core - mime_util.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 75.9 % 528 401
Test Date: 2026-04-15 21:12:52 Functions: 85.2 % 27 23

            Line data    Source code
       1              : #include "mime_util.h"
       2              : #include "html_render.h"
       3              : #include <stdio.h>
       4              : #include <stdlib.h>
       5              : #include <string.h>
       6              : #include <ctype.h>
       7              : #include <time.h>
       8              : #include <iconv.h>
       9              : #include <errno.h>
      10              : 
      11              : /* ── Header extraction ──────────────────────────────────────────────── */
      12              : 
      13          214 : char *mime_get_header(const char *msg, const char *name) {
      14          214 :     if (!msg || !name) return NULL;
      15          212 :     size_t nlen = strlen(name);
      16          212 :     const char *p = msg;
      17              : 
      18          697 :     while (p && *p) {
      19              :         /* Stop at the blank line separating headers from body. */
      20          483 :         if (*p == '\r' || *p == '\n')
      21              :             break;
      22              : 
      23          417 :         if (strncasecmp(p, name, nlen) == 0 && p[nlen] == ':') {
      24          144 :             const char *val = p + nlen + 1;
      25          288 :             while (*val == ' ' || *val == '\t') val++;
      26              : 
      27          144 :             size_t cap = 512, n = 0;
      28          144 :             char *result = malloc(cap);
      29          144 :             if (!result) return NULL;
      30              : 
      31              :             /* Collect value, unfolding continuation lines */
      32         4588 :             while (*val) {
      33         4588 :                 if (*val == '\r' || *val == '\n') {
      34          146 :                     const char *next = val;
      35          146 :                     if (*next == '\r') next++;
      36          146 :                     if (*next == '\n') next++;
      37          146 :                     if (*next == ' ' || *next == '\t') {
      38              :                         /* Continuation line: skip CRLF and the leading whitespace */
      39            2 :                         val = next;
      40            4 :                         while (*val == ' ' || *val == '\t') val++;
      41              :                         /* Add a single space to separate folded content if needed */
      42            2 :                         if (n > 0 && result[n-1] != ' ') {
      43            2 :                             if (n + 1 >= cap) {
      44            1 :                                 cap *= 2;
      45            1 :                                 char *tmp = realloc(result, cap);
      46            1 :                                 if (!tmp) { free(result); return NULL; }
      47            1 :                                 result = tmp;
      48              :                             }
      49            2 :                             result[n++] = ' ';
      50              :                         }
      51            2 :                         continue;
      52              :                     } else {
      53              :                         /* Not a continuation line: we are done with this header */
      54              :                         break;
      55              :                     }
      56              :                 }
      57              : 
      58         4442 :                 if (n + 1 >= cap) {
      59            1 :                     cap *= 2;
      60            1 :                     char *tmp = realloc(result, cap);
      61            1 :                     if (!tmp) { free(result); return NULL; }
      62            1 :                     result = tmp;
      63              :                 }
      64         4442 :                 result[n++] = *val++;
      65              :             }
      66          144 :             result[n] = '\0';
      67          144 :             return result;
      68              :         }
      69              : 
      70              :         /* Advance to next line */
      71          273 :         p = strchr(p, '\n');
      72          273 :         if (p) p++;
      73              :     }
      74           68 :     return NULL;
      75              : }
      76              : 
      77              : /* ── Base64 decoder ─────────────────────────────────────────────────── */
      78              : 
      79           30 : static int b64val(unsigned char c) {
      80           30 :     if (c >= 'A' && c <= 'Z') return c - 'A';
      81           15 :     if (c >= 'a' && c <= 'z') return c - 'a' + 26;
      82            8 :     if (c >= '0' && c <= '9') return c - '0' + 52;
      83            4 :     if (c == '+') return 62;
      84            4 :     if (c == '/') return 63;
      85            4 :     return -1;
      86              : }
      87              : 
      88            2 : static char *decode_base64(const char *in, size_t inlen) {
      89            2 :     size_t max = (inlen / 4 + 1) * 3 + 4;
      90            2 :     char *out = malloc(max);
      91            2 :     if (!out) return NULL;
      92            2 :     size_t n = 0;
      93            2 :     int buf = 0, bits = 0;
      94           32 :     for (size_t i = 0; i < inlen; i++) {
      95           30 :         int v = b64val((unsigned char)in[i]);
      96           30 :         if (v < 0) continue;
      97           26 :         buf = (buf << 6) | v;
      98           26 :         bits += 6;
      99           26 :         if (bits >= 8) {
     100           19 :             bits -= 8;
     101           19 :             out[n++] = (char)((buf >> bits) & 0xFF);
     102              :         }
     103              :     }
     104            2 :     out[n] = '\0';
     105            2 :     return out;
     106              : }
     107              : 
     108              : /* ── Quoted-Printable decoder ───────────────────────────────────────── */
     109              : 
     110            2 : static char *decode_qp(const char *in, size_t inlen) {
     111            2 :     char *out = malloc(inlen + 1);
     112            2 :     if (!out) return NULL;
     113            2 :     size_t n = 0, i = 0;
     114           26 :     while (i < inlen) {
     115           24 :         if (in[i] == '=' && i + 1 < inlen &&
     116            4 :             (in[i + 1] == '\r' || in[i + 1] == '\n')) {
     117              :             /* Soft line break — skip */
     118            1 :             i++;
     119            1 :             if (i < inlen && in[i] == '\r') i++;
     120            1 :             if (i < inlen && in[i] == '\n') i++;
     121           23 :         } else if (in[i] == '=' && i + 2 < inlen &&
     122            3 :                    isxdigit((unsigned char)in[i + 1]) &&
     123            6 :                    isxdigit((unsigned char)in[i + 2])) {
     124            3 :             char hex[3] = { in[i + 1], in[i + 2], '\0' };
     125            3 :             out[n++] = (char)strtol(hex, NULL, 16);
     126            3 :             i += 3;
     127              :         } else {
     128           20 :             out[n++] = in[i++];
     129              :         }
     130              :     }
     131            2 :     out[n] = '\0';
     132            2 :     return out;
     133              : }
     134              : 
     135              : /* ── Body helpers ───────────────────────────────────────────────────── */
     136              : 
     137           39 : static const char *body_start(const char *msg) {
     138           39 :     const char *p = strstr(msg, "\r\n\r\n");
     139           39 :     if (p) return p + 4;
     140            3 :     p = strstr(msg, "\n\n");
     141            3 :     if (p) return p + 2;
     142            2 :     return NULL;
     143              : }
     144              : 
     145           22 : static char *decode_transfer(const char *body, size_t len, const char *enc) {
     146           22 :     if (enc && strcasecmp(enc, "base64") == 0)
     147            1 :         return decode_base64(body, len);
     148           21 :     if (enc && strcasecmp(enc, "quoted-printable") == 0)
     149            2 :         return decode_qp(body, len);
     150           19 :     return strndup(body, len);
     151              : }
     152              : 
     153              : /* Extract the charset parameter value from a Content-Type header value.
     154              :  * E.g. "text/plain; charset=iso-8859-2" → "iso-8859-2".
     155              :  * Returns a malloc'd string or NULL if not found. */
     156           39 : static char *extract_charset(const char *ctype) {
     157           39 :     if (!ctype) return NULL;
     158           36 :     const char *p = strcasestr(ctype, "charset=");
     159           36 :     if (!p) return NULL;
     160           11 :     p += 8;
     161           11 :     if (*p == '"') p++;          /* skip optional opening quote */
     162           11 :     const char *start = p;
     163           66 :     while (*p && *p != ';' && *p != ' ' && *p != '\t' && *p != '"' && *p != '\r' && *p != '\n')
     164           55 :         p++;
     165           11 :     if (p == start) return NULL;
     166           10 :     return strndup(start, (size_t)(p - start));
     167              : }
     168              : 
     169              : /* Convert s from from_charset to UTF-8 via iconv.
     170              :  * Returns a malloc'd UTF-8 string; on failure returns strdup(s). */
     171           22 : static char *charset_to_utf8(const char *s, const char *from_charset) {
     172           22 :     if (!s) return NULL;
     173           22 :     if (!from_charset ||
     174            9 :         strcasecmp(from_charset, "utf-8")  == 0 ||
     175            1 :         strcasecmp(from_charset, "utf8")   == 0 ||
     176            1 :         strcasecmp(from_charset, "us-ascii") == 0)
     177           21 :         return strdup(s);
     178              : 
     179            1 :     iconv_t cd = iconv_open("UTF-8", from_charset);
     180            1 :     if (cd == (iconv_t)-1) return strdup(s);
     181              : 
     182            1 :     size_t in_len   = strlen(s);
     183            1 :     size_t out_size = in_len * 4 + 1;
     184            1 :     char  *out      = malloc(out_size);
     185            1 :     if (!out) { iconv_close(cd); return strdup(s); }
     186              : 
     187            1 :     char  *inp      = (char *)s;
     188            1 :     char  *outp     = out;
     189            1 :     size_t inbytes  = in_len;
     190            1 :     size_t outbytes = out_size - 1;
     191            1 :     size_t r        = iconv(cd, &inp, &inbytes, &outp, &outbytes);
     192            1 :     iconv_close(cd);
     193              : 
     194            1 :     if (r == (size_t)-1) { free(out); return strdup(s); }
     195            1 :     *outp = '\0';
     196            1 :     return out;
     197              : }
     198              : 
     199              : static char *text_from_part(const char *part);
     200              : 
     201            3 : static char *text_from_multipart(const char *msg, const char *ctype) {
     202            3 :     const char *b = strcasestr(ctype, "boundary=");
     203            3 :     if (!b) return NULL;
     204            3 :     b += strlen("boundary=");
     205              : 
     206            3 :     char boundary[512] = {0};
     207            3 :     if (*b == '"') {
     208            1 :         b++;
     209            1 :         const char *end = strchr(b, '"');
     210            1 :         if (!end) return NULL;
     211            1 :         snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
     212              :     } else {
     213            2 :         size_t i = 0;
     214           11 :         while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
     215              :                i < sizeof(boundary) - 1)
     216            9 :             boundary[i++] = *b++;
     217            2 :         boundary[i] = '\0';
     218              :     }
     219            3 :     if (!boundary[0]) return NULL;
     220              : 
     221            3 :     char delim[520];
     222            3 :     snprintf(delim, sizeof(delim), "--%s", boundary);
     223            3 :     size_t dlen = strlen(delim);
     224              : 
     225            3 :     const char *p = strstr(msg, delim);
     226            4 :     while (p) {
     227            4 :         p = strchr(p + dlen, '\n');
     228            4 :         if (!p) break;
     229            4 :         p++;
     230              : 
     231            4 :         const char *next = strstr(p, delim);
     232            4 :         if (!next) break;
     233              : 
     234            4 :         size_t partlen = (size_t)(next - p);
     235            4 :         char *part = strndup(p, partlen);
     236            4 :         if (!part) break;
     237            4 :         char *result = text_from_part(part);
     238            4 :         free(part);
     239            4 :         if (result) return result;
     240              : 
     241            2 :         p = next + dlen;
     242            2 :         if (p[0] == '-' && p[1] == '-') break;
     243            1 :         p = strchr(p, '\n');
     244            1 :         if (p) p++;
     245              :     }
     246            1 :     return NULL;
     247              : }
     248              : 
     249           20 : static char *text_from_part(const char *part) {
     250           20 :     char *ctype   = mime_get_header(part, "Content-Type");
     251           20 :     char *enc     = mime_get_header(part, "Content-Transfer-Encoding");
     252           20 :     char *charset = extract_charset(ctype);
     253           20 :     const char *body = body_start(part);
     254           20 :     char *result = NULL;
     255              : 
     256           20 :     if (!ctype || strncasecmp(ctype, "text/plain", 10) == 0) {
     257           15 :         if (body) {
     258           13 :             char *raw = decode_transfer(body, strlen(body), enc);
     259           13 :             if (raw) {
     260           13 :                 result = charset_to_utf8(raw, charset);
     261           13 :                 free(raw);
     262              :             }
     263              :         }
     264            5 :     } else if (strncasecmp(ctype, "multipart/", 10) == 0) {
     265            3 :         result = text_from_multipart(part, ctype);
     266            2 :     } else if (strncasecmp(ctype, "text/html", 9) == 0) {
     267            1 :         if (body) {
     268            1 :             char *raw = decode_transfer(body, strlen(body), enc);
     269            1 :             if (raw) {
     270            1 :                 char *utf8 = charset_to_utf8(raw, charset);
     271            1 :                 free(raw);
     272            1 :                 if (utf8) {
     273            1 :                     result = html_render(utf8, 0, 0);
     274            1 :                     free(utf8);
     275              :                 }
     276              :             }
     277              :         }
     278              :     }
     279              : 
     280           20 :     free(ctype);
     281           20 :     free(enc);
     282           20 :     free(charset);
     283           20 :     return result;
     284              : }
     285              : 
     286              : /* ── RFC 2047 encoded-word decoder ──────────────────────────────────── */
     287              : 
     288              : /**
     289              :  * Decode the text portion of one encoded word and convert to UTF-8.
     290              :  *
     291              :  * enc == 'Q'/'q': quoted-printable variant (underscore = space).
     292              :  * enc == 'B'/'b': base64.
     293              :  * charset: the declared charset of the encoded bytes.
     294              :  *
     295              :  * Returns a malloc'd NUL-terminated UTF-8 string, or NULL on failure.
     296              :  */
     297            7 : static char *decode_encoded_word(const char *charset, char enc,
     298              :                                   const char *text, size_t text_len) {
     299            7 :     char *raw = NULL;
     300              : 
     301            7 :     if (enc == 'Q' || enc == 'q') {
     302            6 :         raw = malloc(text_len + 1);
     303            6 :         if (!raw) return NULL;
     304            6 :         size_t i = 0, j = 0;
     305           66 :         while (i < text_len) {
     306           60 :             if (text[i] == '_') {
     307            4 :                 raw[j++] = ' ';
     308            4 :                 i++;
     309           56 :             } else if (text[i] == '=' && i + 2 < text_len &&
     310           11 :                        isxdigit((unsigned char)text[i + 1]) &&
     311           22 :                        isxdigit((unsigned char)text[i + 2])) {
     312           11 :                 char hex[3] = { text[i + 1], text[i + 2], '\0' };
     313           11 :                 raw[j++] = (char)strtol(hex, NULL, 16);
     314           11 :                 i += 3;
     315              :             } else {
     316           45 :                 raw[j++] = text[i++];
     317              :             }
     318              :         }
     319            6 :         raw[j] = '\0';
     320              :     } else {
     321              :         /* B encoding */
     322            1 :         raw = decode_base64(text, text_len);
     323            1 :         if (!raw) return NULL;
     324              :     }
     325              : 
     326              :     /* If the declared charset is already UTF-8, return as-is. */
     327            7 :     if (strcasecmp(charset, "utf-8") == 0 || strcasecmp(charset, "utf8") == 0)
     328            5 :         return raw;
     329              : 
     330              :     /* Otherwise convert via iconv. */
     331            2 :     iconv_t cd = iconv_open("UTF-8", charset);
     332            2 :     if (cd == (iconv_t)-1)
     333            1 :         return raw;   /* unknown charset — return raw bytes */
     334              : 
     335            1 :     size_t raw_len   = strlen(raw);
     336            1 :     size_t out_size  = raw_len * 4 + 1;
     337            1 :     char  *utf8      = malloc(out_size);
     338            1 :     if (!utf8) { iconv_close(cd); return raw; }
     339              : 
     340            1 :     char   *inp      = raw;
     341            1 :     char   *outp     = utf8;
     342            1 :     size_t  inbytes  = raw_len;
     343            1 :     size_t  outbytes = out_size - 1;
     344            1 :     size_t  r        = iconv(cd, &inp, &inbytes, &outp, &outbytes);
     345            1 :     iconv_close(cd);
     346              : 
     347            1 :     if (r == (size_t)-1) { free(utf8); return raw; }
     348              : 
     349            1 :     *outp = '\0';
     350            1 :     free(raw);
     351            1 :     return utf8;
     352              : }
     353              : 
     354              : /**
     355              :  * Try to parse and decode one encoded word starting exactly at *pp.
     356              :  * Format: =?charset?Q|B?encoded_text?=
     357              :  *
     358              :  * On success, *pp is advanced past the closing "?=" and the decoded
     359              :  * UTF-8 string (malloc'd) is returned.
     360              :  * On failure, *pp is unchanged and NULL is returned.
     361              :  */
     362            7 : static char *try_decode_encoded_word(const char **pp) {
     363            7 :     const char *p = *pp;
     364            7 :     if (p[0] != '=' || p[1] != '?') return NULL;
     365            7 :     p += 2;
     366              : 
     367              :     /* charset */
     368            7 :     const char *cs = p;
     369           59 :     while (*p && *p != '?') p++;
     370            7 :     if (!*p) return NULL;
     371            7 :     size_t cs_len = (size_t)(p - cs);
     372            7 :     if (cs_len == 0 || cs_len >= 64) return NULL;
     373            7 :     char charset[64];
     374            7 :     memcpy(charset, cs, cs_len);
     375            7 :     charset[cs_len] = '\0';
     376            7 :     p++;   /* skip ? */
     377              : 
     378              :     /* encoding indicator */
     379            7 :     char enc = *p;
     380            7 :     if (enc != 'Q' && enc != 'q' && enc != 'B' && enc != 'b') return NULL;
     381            7 :     p++;
     382            7 :     if (*p != '?') return NULL;
     383            7 :     p++;   /* skip ? */
     384              : 
     385              :     /* encoded text — ends at next ?= */
     386            7 :     const char *txt = p;
     387           97 :     while (*p && !(*p == '?' && p[1] == '=')) p++;
     388            7 :     if (!*p) return NULL;
     389            7 :     size_t txt_len = (size_t)(p - txt);
     390            7 :     p += 2;   /* skip ?= */
     391              : 
     392            7 :     char *decoded = decode_encoded_word(charset, enc, txt, txt_len);
     393            7 :     if (!decoded) return NULL;
     394            7 :     *pp = p;
     395            7 :     return decoded;
     396              : }
     397              : 
     398           66 : char *mime_decode_words(const char *value) {
     399           66 :     if (!value) return NULL;
     400              : 
     401           65 :     size_t vlen = strlen(value);
     402              :     /* Upper bound: each raw byte can expand to at most 4 UTF-8 bytes. */
     403           65 :     size_t cap = vlen * 4 + 1;
     404           65 :     char  *out = malloc(cap);
     405           65 :     if (!out) return NULL;
     406              : 
     407           65 :     size_t      n             = 0;
     408           65 :     const char *p             = value;
     409           65 :     int         prev_encoded  = 0;
     410              : 
     411         1328 :     while (*p) {
     412              :         /* RFC 2047 §6.2: linear whitespace between adjacent encoded words
     413              :          * must be ignored. */
     414         1263 :         if (prev_encoded && (*p == ' ' || *p == '\t')) {
     415            2 :             const char *ws = p;
     416            4 :             while (*ws == ' ' || *ws == '\t') ws++;
     417            2 :             if (ws[0] == '=' && ws[1] == '?') {
     418            1 :                 p = ws;
     419            1 :                 continue;
     420              :             }
     421              :         }
     422              : 
     423         1262 :         if (p[0] == '=' && p[1] == '?') {
     424            7 :             char *decoded = try_decode_encoded_word(&p);
     425            7 :             if (decoded) {
     426            7 :                 size_t dlen = strlen(decoded);
     427            7 :                 if (n + dlen >= cap) {
     428            0 :                     cap = n + dlen + vlen + 1;
     429            0 :                     char *tmp = realloc(out, cap);
     430            0 :                     if (!tmp) { free(decoded); break; }
     431            0 :                     out = tmp;
     432              :                 }
     433            7 :                 memcpy(out + n, decoded, dlen);
     434            7 :                 n += dlen;
     435            7 :                 free(decoded);
     436            7 :                 prev_encoded = 1;
     437            7 :                 continue;
     438              :             }
     439              :         }
     440              : 
     441         1255 :         prev_encoded = 0;
     442         1255 :         out[n++] = *p++;
     443              :     }
     444              : 
     445           65 :     out[n] = '\0';
     446           65 :     return out;
     447              : }
     448              : 
     449              : /* ── Date formatting ────────────────────────────────────────────────── */
     450              : 
     451           41 : char *mime_format_date(const char *date) {
     452           41 :     if (!date || !*date) return NULL;
     453              : 
     454              :     static const char * const fmts[] = {
     455              :         "%a, %d %b %Y %T %z",  /* "Tue, 10 Mar 2026 15:07:40 +0000"     */
     456              :         "%d %b %Y %T %z",       /* "10 Mar 2026 15:07:40 +0000"          */
     457              :         "%a, %d %b %Y %T %Z",  /* "Tue, 24 Mar 2026 16:38:21 GMT"       */
     458              :         "%d %b %Y %T %Z",       /* "24 Mar 2026 16:38:21 UTC"            */
     459              :         NULL
     460              :     };
     461              : 
     462           40 :     struct tm tm;
     463           40 :     int parsed = 0;
     464           47 :     for (int i = 0; fmts[i]; i++) {
     465           46 :         memset(&tm, 0, sizeof(tm));
     466           46 :         if (strptime(date, fmts[i], &tm)) { parsed = 1; break; }
     467              :     }
     468           40 :     if (!parsed) return strdup(date);
     469              : 
     470              :     /* Save tm_gmtoff before calling timegm(): timegm() normalises the struct
     471              :      * and resets tm_gmtoff to 0.  timegm() treats the fields as UTC, so
     472              :      * subtracting the original offset converts to true UTC. */
     473           39 :     long gmtoff = tm.tm_gmtoff;
     474           39 :     time_t utc = timegm(&tm) - gmtoff;
     475           39 :     if (utc == (time_t)-1) return strdup(date);
     476              : 
     477           39 :     struct tm local;
     478           39 :     localtime_r(&utc, &local);
     479              : 
     480           39 :     char *buf = malloc(17);   /* "YYYY-MM-DD HH:MM\0" */
     481           39 :     if (!buf) return NULL;
     482           39 :     if (strftime(buf, 17, "%Y-%m-%d %H:%M", &local) == 0) {
     483            0 :         free(buf);
     484            0 :         return strdup(date);
     485              :     }
     486           39 :     return buf;
     487              : }
     488              : 
     489              : /* ── HTML part extractor ────────────────────────────────────────────── */
     490              : 
     491              : static char *html_from_part(const char *part);
     492              : 
     493            7 : static char *html_from_multipart(const char *msg, const char *ctype) {
     494            7 :     const char *b = strcasestr(ctype, "boundary=");
     495            7 :     if (!b) return NULL;
     496            7 :     b += strlen("boundary=");
     497              : 
     498            7 :     char boundary[512] = {0};
     499            7 :     if (*b == '"') {
     500            6 :         b++;
     501            6 :         const char *end = strchr(b, '"');
     502            6 :         if (!end) return NULL;
     503            6 :         snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
     504              :     } else {
     505            1 :         size_t i = 0;
     506            9 :         while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
     507              :                i < sizeof(boundary) - 1)
     508            8 :             boundary[i++] = *b++;
     509            1 :         boundary[i] = '\0';
     510              :     }
     511            7 :     if (!boundary[0]) return NULL;
     512              : 
     513            7 :     char delim[520];
     514            7 :     snprintf(delim, sizeof(delim), "--%s", boundary);
     515            7 :     size_t dlen = strlen(delim);
     516              : 
     517            7 :     const char *p = strstr(msg, delim);
     518            9 :     while (p) {
     519            9 :         if (p[dlen] == '-' && p[dlen+1] == '-') break; /* end boundary */
     520            8 :         p = strchr(p + dlen, '\n');
     521            8 :         if (!p) break;
     522            8 :         p++;
     523            8 :         const char *next = strstr(p, delim);
     524            8 :         if (!next) break;
     525            8 :         size_t partlen = (size_t)(next - p);
     526            8 :         char *part = strndup(p, partlen);
     527            8 :         if (!part) break;
     528            8 :         char *result = html_from_part(part);
     529            8 :         free(part);
     530            8 :         if (result) return result;
     531            2 :         p = next; /* keep p pointing at delimiter for next iteration */
     532              :     }
     533            1 :     return NULL;
     534              : }
     535              : 
     536           19 : static char *html_from_part(const char *part) {
     537           19 :     char *ctype   = mime_get_header(part, "Content-Type");
     538           19 :     char *enc     = mime_get_header(part, "Content-Transfer-Encoding");
     539           19 :     char *charset = extract_charset(ctype);
     540           19 :     const char *body = body_start(part);
     541           19 :     char *result = NULL;
     542              : 
     543           19 :     if (ctype && strncasecmp(ctype, "text/html", 9) == 0) {
     544            8 :         if (body) {
     545            8 :             char *raw = decode_transfer(body, strlen(body), enc);
     546            8 :             if (raw) {
     547            8 :                 result = charset_to_utf8(raw, charset);
     548            8 :                 free(raw);
     549              :             }
     550              :         }
     551           11 :     } else if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
     552            7 :         result = html_from_multipart(part, ctype);
     553              :     }
     554              : 
     555           19 :     free(ctype); free(enc); free(charset);
     556           19 :     return result;
     557              : }
     558              : 
     559              : /* ── Public API ─────────────────────────────────────────────────────── */
     560              : 
     561           17 : char *mime_get_text_body(const char *msg) {
     562           17 :     if (!msg) return NULL;
     563           16 :     return text_from_part(msg);
     564              : }
     565              : 
     566           12 : char *mime_get_html_part(const char *msg) {
     567           12 :     if (!msg) return NULL;
     568           11 :     return html_from_part(msg);
     569              : }
     570              : 
     571              : /* ── Attachment extraction ──────────────────────────────────────────── */
     572              : 
     573              : /* Extract a MIME header parameter value, e.g. filename="foo.pdf" or name=bar.
     574              :  * Handles quoted and unquoted values.  Returns malloc'd string or NULL. */
     575            1 : static char *extract_param(const char *header, const char *param) {
     576            1 :     if (!header || !param) return NULL;
     577            1 :     char search[64];
     578            1 :     snprintf(search, sizeof(search), "%s=", param);
     579            1 :     const char *p = strcasestr(header, search);
     580            1 :     if (!p) return NULL;
     581            0 :     p += strlen(search);
     582            0 :     if (*p == '"') {
     583            0 :         p++;
     584            0 :         const char *end = strchr(p, '"');
     585            0 :         if (!end) return NULL;
     586            0 :         return strndup(p, (size_t)(end - p));
     587              :     }
     588              :     /* unquoted value: ends at ';', whitespace, or end-of-string */
     589            0 :     const char *end = p;
     590            0 :     while (*end && *end != ';' && *end != ' ' && *end != '\t' &&
     591            0 :            *end != '\r' && *end != '\n')
     592            0 :         end++;
     593            0 :     if (end == p) return NULL;
     594            0 :     return strndup(p, (size_t)(end - p));
     595              : }
     596              : 
     597              : /* Sanitise a filename: strip directory separators and leading dots. */
     598            0 : static char *sanitise_filename(const char *name) {
     599            0 :     if (!name || !*name) return NULL;
     600              :     /* take only the basename portion */
     601            0 :     const char *base = name;
     602            0 :     for (const char *p = name; *p; p++)
     603            0 :         if (*p == '/' || *p == '\\') base = p + 1;
     604            0 :     if (!*base) return NULL;
     605            0 :     char *s = strdup(base);
     606            0 :     if (!s) return NULL;
     607              :     /* strip leading dots (hidden files / directory traversal) */
     608            0 :     char *p = s;
     609            0 :     while (*p == '.') p++;
     610            0 :     if (!*p) { free(s); return strdup("attachment"); }
     611            0 :     if (p != s) memmove(s, p, strlen(p) + 1);
     612            0 :     return s;
     613              : }
     614              : 
     615              : /* Dynamic array for building the attachment list */
     616              : typedef struct { MimeAttachment *data; int count; int cap; } AttachList;
     617              : 
     618            0 : static int alist_push(AttachList *al, MimeAttachment att) {
     619            0 :     if (al->count >= al->cap) {
     620            0 :         int newcap = al->cap ? al->cap * 2 : 4;
     621            0 :         MimeAttachment *tmp = realloc(al->data,
     622            0 :                                       (size_t)newcap * sizeof(MimeAttachment));
     623            0 :         if (!tmp) return -1;
     624            0 :         al->data = tmp;
     625            0 :         al->cap  = newcap;
     626              :     }
     627            0 :     al->data[al->count++] = att;
     628            0 :     return 0;
     629              : }
     630              : 
     631              : /* Forward declaration */
     632              : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx);
     633              : 
     634              : /* Walk a multipart body and collect attachments from each sub-part. */
     635            0 : static void collect_multipart_attachments(const char *msg, const char *ctype,
     636              :                                           AttachList *al, int *idx) {
     637            0 :     const char *b = strcasestr(ctype, "boundary=");
     638            0 :     if (!b) return;
     639            0 :     b += strlen("boundary=");
     640              : 
     641            0 :     char boundary[512] = {0};
     642            0 :     if (*b == '"') {
     643            0 :         b++;
     644            0 :         const char *end = strchr(b, '"');
     645            0 :         if (!end) return;
     646            0 :         snprintf(boundary, sizeof(boundary), "%.*s", (int)(end - b), b);
     647              :     } else {
     648            0 :         size_t i = 0;
     649            0 :         while (*b && *b != ';' && *b != ' ' && *b != '\r' && *b != '\n' &&
     650              :                i < sizeof(boundary) - 1)
     651            0 :             boundary[i++] = *b++;
     652            0 :         boundary[i] = '\0';
     653              :     }
     654            0 :     if (!boundary[0]) return;
     655              : 
     656            0 :     char delim[520];
     657            0 :     snprintf(delim, sizeof(delim), "--%s", boundary);
     658            0 :     size_t dlen = strlen(delim);
     659              : 
     660            0 :     const char *p = strstr(msg, delim);
     661            0 :     while (p) {
     662            0 :         p = strchr(p + dlen, '\n');
     663            0 :         if (!p) break;
     664            0 :         p++;
     665              : 
     666            0 :         const char *next = strstr(p, delim);
     667            0 :         if (!next) break;
     668              : 
     669            0 :         size_t partlen = (size_t)(next - p);
     670            0 :         char *part = strndup(p, partlen);
     671            0 :         if (!part) break;
     672            0 :         collect_parts(part, al, idx);
     673            0 :         free(part);
     674              : 
     675            0 :         p = next + dlen;
     676            0 :         if (p[0] == '-' && p[1] == '-') break;
     677            0 :         p = strchr(p, '\n');
     678            0 :         if (p) p++;
     679              :     }
     680              : }
     681              : 
     682              : /* Examine one MIME part (headers + body) and add to al if it is an attachment. */
     683            1 : static void collect_parts(const char *msg, AttachList *al, int *unnamed_idx) {
     684            1 :     char *ctype = mime_get_header(msg, "Content-Type");
     685            1 :     char *disp  = mime_get_header(msg, "Content-Disposition");
     686            1 :     char *enc   = mime_get_header(msg, "Content-Transfer-Encoding");
     687              : 
     688              :     /* Recurse into multipart containers */
     689            1 :     if (ctype && strncasecmp(ctype, "multipart/", 10) == 0) {
     690            0 :         collect_multipart_attachments(msg, ctype, al, unnamed_idx);
     691            0 :         free(ctype); free(disp); free(enc);
     692            0 :         return;
     693              :     }
     694              : 
     695              :     /* Determine filename from Content-Disposition or Content-Type name= */
     696            1 :     char *filename = NULL;
     697            1 :     int explicit_attach = 0;
     698            1 :     if (disp) {
     699            0 :         if (strncasecmp(disp, "attachment", 10) == 0) explicit_attach = 1;
     700            0 :         filename = extract_param(disp, "filename");
     701              :         /* RFC 5987: filename*=charset''encoded — simplified: strip trailing * */
     702            0 :         if (!filename) filename = extract_param(disp, "filename*");
     703              :     }
     704            1 :     if (!filename && ctype)
     705            1 :         filename = extract_param(ctype, "name");
     706              : 
     707              :     /* Skip non-attachment text and multipart parts unless explicitly marked */
     708            1 :     if (!explicit_attach) {
     709            1 :         if (!filename) {
     710            1 :             free(ctype); free(disp); free(enc);
     711            1 :             return;  /* no filename → body part, skip */
     712              :         }
     713              :         /* text/plain and text/html without attachment disposition are body parts */
     714            0 :         if (ctype && (strncasecmp(ctype, "text/plain", 10) == 0 ||
     715            0 :                       strncasecmp(ctype, "text/html",   9) == 0)) {
     716            0 :             free(ctype); free(disp); free(enc); free(filename);
     717            0 :             return;
     718              :         }
     719              :     }
     720              : 
     721            0 :     const char *body = body_start(msg);
     722            0 :     if (!body) {
     723            0 :         free(ctype); free(disp); free(enc); free(filename);
     724            0 :         return;
     725              :     }
     726              : 
     727              :     /* Decode body content */
     728            0 :     unsigned char *data = (unsigned char *)decode_transfer(body, strlen(body), enc);
     729            0 :     size_t data_size = data ? strlen((char *)data) : 0;
     730              :     /* For binary (base64-decoded) content the length may contain NUL bytes —
     731              :      * use the decoded output length from decode_base64, which null-terminates
     732              :      * but the real size is the base64-decoded byte count. */
     733            0 :     if (enc && strcasecmp(enc, "base64") == 0 && data) {
     734              :         /* decode_base64 returns the decoded bytes; count excludes the trailing NUL */
     735              :         /* we need actual binary size — recount via the decoded buffer length */
     736            0 :         size_t raw_enc_len = strlen(body);
     737            0 :         data_size = (raw_enc_len / 4) * 3;  /* upper bound */
     738              :         /* trim padding: accurate enough for display; file write uses full buffer */
     739              :     }
     740              : 
     741              :     /* Sanitise / generate filename */
     742            0 :     char *safe_name = NULL;
     743            0 :     if (filename) {
     744            0 :         char *decoded = mime_decode_words(filename);
     745            0 :         free(filename);
     746            0 :         safe_name = sanitise_filename(decoded ? decoded : "");
     747            0 :         free(decoded);
     748              :     }
     749            0 :     if (!safe_name) {
     750            0 :         char gen[32];
     751            0 :         snprintf(gen, sizeof(gen), "attachment-%d.bin", ++(*unnamed_idx));
     752            0 :         safe_name = strdup(gen);
     753              :     }
     754              : 
     755            0 :     MimeAttachment att = {0};
     756            0 :     att.filename     = safe_name;
     757            0 :     att.content_type = ctype ? strdup(ctype) : strdup("application/octet-stream");
     758            0 :     att.data         = data;
     759            0 :     att.size         = data_size;
     760              : 
     761            0 :     if (alist_push(al, att) < 0) {
     762            0 :         free(att.filename); free(att.content_type); free(att.data);
     763              :     }
     764              : 
     765            0 :     free(ctype); free(disp); free(enc);
     766              : }
     767              : 
     768            1 : MimeAttachment *mime_list_attachments(const char *msg, int *count_out) {
     769            1 :     if (!msg || !count_out) { if (count_out) *count_out = 0; return NULL; }
     770            1 :     AttachList al = {NULL, 0, 0};
     771            1 :     int idx = 0;
     772            1 :     collect_parts(msg, &al, &idx);
     773            1 :     *count_out = al.count;
     774            1 :     if (al.count == 0) { free(al.data); return NULL; }
     775            0 :     return al.data;
     776              : }
     777              : 
     778            1 : void mime_free_attachments(MimeAttachment *list, int count) {
     779            1 :     if (!list) return;
     780            0 :     for (int i = 0; i < count; i++) {
     781            0 :         free(list[i].filename);
     782            0 :         free(list[i].content_type);
     783            0 :         free(list[i].data);
     784              :     }
     785            0 :     free(list);
     786              : }
     787              : 
     788            0 : int mime_save_attachment(const MimeAttachment *att, const char *dest_path) {
     789            0 :     if (!att || !dest_path || !att->data) return -1;
     790            0 :     FILE *f = fopen(dest_path, "wb");
     791            0 :     if (!f) return -1;
     792              :     /* Write the full decoded buffer; for base64 the NUL terminator is not
     793              :      * part of the content — use att->size if accurate, else strlen fallback. */
     794            0 :     size_t n = att->size > 0 ? att->size : strlen((char *)att->data);
     795            0 :     size_t written = fwrite(att->data, 1, n, f);
     796            0 :     int err = (written != n) ? -1 : 0;
     797            0 :     fclose(f);
     798            0 :     return err;
     799              : }
     800              : 
     801            4 : char *mime_extract_imap_literal(const char *response) {
     802            4 :     if (!response) return NULL;
     803            3 :     const char *brace = strchr(response, '{');
     804            3 :     if (!brace) return NULL;
     805              :     
     806            2 :     char *end = NULL;
     807            2 :     long size = strtol(brace + 1, &end, 10);
     808            2 :     if (!end || *end != '}' || size <= 0) return NULL;
     809              :     
     810            2 :     const char *content = end + 1;
     811            2 :     if (*content == '\r') content++;
     812            2 :     if (*content == '\n') content++;
     813              :     
     814              :     // Safety check
     815            2 :     size_t avail = strlen(content);
     816            2 :     if (avail < (size_t)size) {
     817            1 :         return strndup(content, avail);
     818              :     }
     819              :     
     820            1 :     return strndup(content, (size_t)size);
     821              : }
        

Generated by: LCOV version 2.0-1