LCOV - code coverage report
Current view: top level - libemail/src/core - imap_util.c (source / functions) Coverage Total Hit
Test: coverage-functional.info Lines: 93.6 % 157 147
Test Date: 2026-05-07 15:53:08 Functions: 100.0 % 5 5

            Line data    Source code
       1              : #include "imap_util.h"
       2              : #include <stdio.h>
       3              : #include <stdlib.h>
       4              : #include <string.h>
       5              : #include <stdint.h>
       6              : 
       7              : /**
       8              :  * @file imap_util.c
       9              :  * @brief IMAP Modified UTF-7 decoder (RFC 3501 ยง5.1.3).
      10              :  */
      11              : 
      12              : /* Modified base64 alphabet: A-Z(0-25), a-z(26-51), 0-9(52-61), +(62), ,(63) */
      13          264 : static int mod64_value(char c) {
      14          264 :     if (c >= 'A' && c <= 'Z') return c - 'A';
      15          110 :     if (c >= 'a' && c <= 'z') return c - 'a' + 26;
      16           66 :     if (c >= '0' && c <= '9') return c - '0' + 52;
      17            0 :     if (c == '+') return 62;
      18            0 :     if (c == ',') return 63;
      19            0 :     return -1;
      20              : }
      21              : 
      22              : /* Encode one Unicode code point as UTF-8.  Returns bytes written (1-4). */
      23           66 : static int utf8_encode(uint32_t cp, char *out) {
      24           66 :     if (cp < 0x80) {
      25            0 :         out[0] = (char)cp;
      26            0 :         return 1;
      27              :     }
      28           66 :     if (cp < 0x800) {
      29           22 :         out[0] = (char)(0xC0 | (cp >> 6));
      30           22 :         out[1] = (char)(0x80 | (cp & 0x3F));
      31           22 :         return 2;
      32              :     }
      33           44 :     if (cp < 0x10000) {
      34           22 :         out[0] = (char)(0xE0 | (cp >> 12));
      35           22 :         out[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
      36           22 :         out[2] = (char)(0x80 | (cp & 0x3F));
      37           22 :         return 3;
      38              :     }
      39           22 :     out[0] = (char)(0xF0 | (cp >> 18));
      40           22 :     out[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
      41           22 :     out[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
      42           22 :     out[3] = (char)(0x80 | (cp & 0x3F));
      43           22 :     return 4;
      44              : }
      45              : 
      46          205 : char *imap_utf7_decode(const char *s) {
      47          205 :     if (!s) return NULL;
      48          205 :     size_t len = strlen(s);
      49              : 
      50              :     /* Worst case: each input byte expands to at most 4 UTF-8 bytes */
      51          205 :     char *out = malloc(len * 4 + 1);
      52          205 :     if (!out) return NULL;
      53          205 :     char *dst = out;
      54              : 
      55          205 :     const char *p = s;
      56         2057 :     while (*p) {
      57         1852 :         if (*p != '&') {
      58         1764 :             *dst++ = *p++;
      59         1786 :             continue;
      60              :         }
      61           88 :         p++; /* skip '&' */
      62              : 
      63           88 :         if (*p == '-') {
      64              :             /* "&-" is a literal '&' */
      65           22 :             *dst++ = '&';
      66           22 :             p++;
      67           22 :             continue;
      68              :         }
      69              : 
      70              :         /* Decode modified-base64 run into raw bytes (UTF-16BE) */
      71              :         uint8_t bytes[256];
      72           66 :         int byte_cnt = 0;
      73           66 :         int bits = 0, bit_cnt = 0;
      74              : 
      75          330 :         while (*p && *p != '-') {
      76          264 :             int v = mod64_value(*p++);
      77          264 :             if (v < 0) break;
      78          264 :             bits = (bits << 6) | v;
      79          264 :             bit_cnt += 6;
      80          264 :             if (bit_cnt >= 8) {
      81          176 :                 bit_cnt -= 8;
      82          176 :                 if (byte_cnt < (int)sizeof(bytes))
      83          176 :                     bytes[byte_cnt++] = (uint8_t)((unsigned)bits >> bit_cnt);
      84          176 :                 bits &= (1 << bit_cnt) - 1;
      85              :             }
      86              :         }
      87           66 :         if (*p == '-') p++;
      88              : 
      89              :         /* Interpret raw bytes as UTF-16BE, emit as UTF-8 */
      90          132 :         for (int i = 0; i + 1 < byte_cnt; i += 2) {
      91           66 :             uint16_t unit = ((uint16_t)bytes[i] << 8) | bytes[i + 1];
      92              :             uint32_t cp;
      93              : 
      94           88 :             if (unit >= 0xD800 && unit <= 0xDBFF && i + 3 < byte_cnt) {
      95              :                 /* High surrogate โ€” pair with following low surrogate */
      96           22 :                 uint16_t low = ((uint16_t)bytes[i + 2] << 8) | bytes[i + 3];
      97           22 :                 if (low >= 0xDC00 && low <= 0xDFFF) {
      98           22 :                     cp = 0x10000u
      99           22 :                          + ((uint32_t)(unit - 0xD800) << 10)
     100           22 :                          + (uint32_t)(low - 0xDC00);
     101           22 :                     i += 2;
     102              :                 } else {
     103            0 :                     cp = unit; /* unpaired surrogate โ€” pass through */
     104              :                 }
     105              :             } else {
     106           44 :                 cp = unit;
     107              :             }
     108              : 
     109              :             char utf8[4];
     110           66 :             int n = utf8_encode(cp, utf8);
     111           66 :             memcpy(dst, utf8, (size_t)n);
     112           66 :             dst += n;
     113              :         }
     114              :     }
     115          205 :     *dst = '\0';
     116          205 :     return out;
     117              : }
     118              : 
     119            8 : int imap_uid_set_expand(const char *set, char (**uids_out)[17], int *count_out) {
     120            8 :     *uids_out  = NULL;
     121            8 :     *count_out = 0;
     122            8 :     if (!set || !*set) return 0;
     123              : 
     124            8 :     int cap = 32, cnt = 0;
     125            8 :     char (*uids)[17] = NULL;
     126              : 
     127            8 :     const char *p = set;
     128           24 :     while (*p) {
     129           16 :         while (*p == ' ' || *p == '\t') p++;
     130           16 :         if (!*p) break;
     131              : 
     132              :         char *end;
     133           16 :         unsigned long lo = strtoul(p, &end, 10);
     134           16 :         if (end == p) break;   /* not a number โ€” stop */
     135           16 :         p = end;
     136              : 
     137           16 :         unsigned long hi = lo;
     138           16 :         if (*p == ':') {
     139            8 :             p++;
     140            8 :             hi = strtoul(p, &end, 10);
     141            8 :             if (end == p) hi = lo;  /* malformed range */
     142            8 :             else p = end;
     143              :         }
     144              : 
     145           48 :         for (unsigned long uid = lo; uid <= hi; uid++) {
     146           32 :             if (!uids) {
     147            8 :                 uids = malloc((size_t)cap * sizeof(char[17]));
     148            8 :                 if (!uids) return -1;
     149              :             }
     150           32 :             if (cnt == cap) {
     151            0 :                 cap *= 2;
     152            0 :                 char (*tmp)[17] = realloc(uids, (size_t)cap * sizeof(char[17]));
     153            0 :                 if (!tmp) { free(uids); return -1; }
     154            0 :                 uids = tmp;
     155              :             }
     156           32 :             snprintf(uids[cnt++], 17, "%016u", (unsigned)uid);
     157              :         }
     158              : 
     159           16 :         if (*p == ',') p++;
     160              :     }
     161              : 
     162            8 :     *uids_out  = uids;
     163            8 :     *count_out = cnt;
     164            8 :     return 0;
     165              : }
     166              : 
     167          348 : char *imap_utf7_encode(const char *s) {
     168          348 :     if (!s) return NULL;
     169          348 :     size_t len = strlen(s);
     170              :     /* Upper bound: every input byte can expand to at most 8 output chars. */
     171          348 :     char *out = malloc(len * 8 + 4);
     172          348 :     if (!out) return NULL;
     173          348 :     char *dst = out;
     174              : 
     175              :     static const char mod64[] =
     176              :         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
     177              : 
     178          348 :     const unsigned char *p = (const unsigned char *)s;
     179         2826 :     while (*p) {
     180         2478 :         if (*p >= 0x20 && *p <= 0x7E && *p != '&') {
     181              :             /* Printable ASCII (except '&'): pass through */
     182         2399 :             *dst++ = (char)*p++;
     183           79 :         } else if (*p == '&') {
     184              :             /* '&' is escaped as "&-" */
     185           20 :             *dst++ = '&';
     186           20 :             *dst++ = '-';
     187           20 :             p++;
     188              :         } else {
     189              :             /* Non-ASCII run: encode as UTF-16BE in modified Base64 */
     190           59 :             *dst++ = '&';
     191           59 :             unsigned int bits = 0;
     192           59 :             int bit_cnt = 0;
     193              : 
     194          118 :             while (*p && !(*p >= 0x20 && *p <= 0x7E)) {
     195              :                 /* Decode one UTF-8 code point. */
     196              :                 uint32_t cp;
     197              :                 int seqlen;
     198           59 :                 if      (*p < 0x80) { cp = *p;         seqlen = 1; }
     199           59 :                 else if (*p < 0xC2) { cp = 0xFFFD;     seqlen = 1; }
     200           59 :                 else if (*p < 0xE0) { cp = *p & 0x1Fu; seqlen = 2; }
     201           40 :                 else if (*p < 0xF0) { cp = *p & 0x0Fu; seqlen = 3; }
     202           20 :                 else                { cp = *p & 0x07u; seqlen = 4; }
     203          178 :                 for (int i = 1; i < seqlen; i++) {
     204          119 :                     if ((p[i] & 0xC0) != 0x80) { seqlen = i; cp = 0xFFFD; break; }
     205          119 :                     cp = (cp << 6) | (p[i] & 0x3Fu);
     206              :                 }
     207           59 :                 p += seqlen;
     208              : 
     209              :                 /* Emit as UTF-16BE (BMP char or surrogate pair). */
     210              :                 uint16_t units[2];
     211              :                 int nunit;
     212           59 :                 if (cp <= 0xFFFFu) {
     213           39 :                     units[0] = (uint16_t)cp;
     214           39 :                     nunit = 1;
     215              :                 } else {
     216           20 :                     cp -= 0x10000u;
     217           20 :                     units[0] = (uint16_t)(0xD800u | (cp >> 10));
     218           20 :                     units[1] = (uint16_t)(0xDC00u | (cp & 0x3FFu));
     219           20 :                     nunit = 2;
     220              :                 }
     221              : 
     222              :                 /* Feed each byte of UTF-16BE into the Base64 bit stream. */
     223          138 :                 for (int j = 0; j < nunit; j++) {
     224           79 :                     uint8_t hi = (uint8_t)(units[j] >> 8);
     225           79 :                     uint8_t lo = (uint8_t)(units[j] & 0xFF);
     226              : 
     227           79 :                     bits = (bits << 8) | hi;
     228           79 :                     bit_cnt += 8;
     229          178 :                     while (bit_cnt >= 6) {
     230           99 :                         bit_cnt -= 6;
     231           99 :                         *dst++ = mod64[(bits >> bit_cnt) & 0x3F];
     232           99 :                         bits &= (1u << bit_cnt) - 1u;
     233              :                     }
     234              : 
     235           79 :                     bits = (bits << 8) | lo;
     236           79 :                     bit_cnt += 8;
     237          158 :                     while (bit_cnt >= 6) {
     238           79 :                         bit_cnt -= 6;
     239           79 :                         *dst++ = mod64[(bits >> bit_cnt) & 0x3F];
     240           79 :                         bits &= (1u << bit_cnt) - 1u;
     241              :                     }
     242              :                 }
     243              :             }
     244              :             /* Flush remaining bits (zero-padded to the next 6-bit boundary). */
     245           59 :             if (bit_cnt > 0)
     246           59 :                 *dst++ = mod64[(bits << (6 - bit_cnt)) & 0x3F];
     247              : 
     248           59 :             *dst++ = '-';
     249              :         }
     250              :     }
     251          348 :     *dst = '\0';
     252          348 :     return out;
     253              : }
        

Generated by: LCOV version 2.0-1