LCOV - code coverage report
Current view: top level - libemail/src/core - imap_util.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 66.1 % 127 84
Test Date: 2026-04-15 21:12:52 Functions: 100.0 % 4 4

            Line data    Source code
       1              : #include "imap_util.h"
       2              : #include <stdlib.h>
       3              : #include <string.h>
       4              : #include <stdint.h>
       5              : 
       6              : /**
       7              :  * @file imap_util.c
       8              :  * @brief IMAP Modified UTF-7 decoder (RFC 3501 §5.1.3).
       9              :  */
      10              : 
      11              : /* Modified base64 alphabet: A-Z(0-25), a-z(26-51), 0-9(52-61), +(62), ,(63) */
      12           61 : static int mod64_value(char c) {
      13           61 :     if (c >= 'A' && c <= 'Z') return c - 'A';
      14           16 :     if (c >= 'a' && c <= 'z') return c - 'a' + 26;
      15            8 :     if (c >= '0' && c <= '9') return c - '0' + 52;
      16            3 :     if (c == '+') return 62;
      17            2 :     if (c == ',') return 63;
      18            1 :     return -1;
      19              : }
      20              : 
      21              : /* Encode one Unicode code point as UTF-8.  Returns bytes written (1-4). */
      22           19 : static int utf8_encode(uint32_t cp, char *out) {
      23           19 :     if (cp < 0x80) {
      24            2 :         out[0] = (char)cp;
      25            2 :         return 1;
      26              :     }
      27           17 :     if (cp < 0x800) {
      28           13 :         out[0] = (char)(0xC0 | (cp >> 6));
      29           13 :         out[1] = (char)(0x80 | (cp & 0x3F));
      30           13 :         return 2;
      31              :     }
      32            4 :     if (cp < 0x10000) {
      33            3 :         out[0] = (char)(0xE0 | (cp >> 12));
      34            3 :         out[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
      35            3 :         out[2] = (char)(0x80 | (cp & 0x3F));
      36            3 :         return 3;
      37              :     }
      38            1 :     out[0] = (char)(0xF0 | (cp >> 18));
      39            1 :     out[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
      40            1 :     out[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
      41            1 :     out[3] = (char)(0x80 | (cp & 0x3F));
      42            1 :     return 4;
      43              : }
      44              : 
      45           17 : char *imap_utf7_decode(const char *s) {
      46           17 :     if (!s) return NULL;
      47           16 :     size_t len = strlen(s);
      48              : 
      49              :     /* Worst case: each input byte expands to at most 4 UTF-8 bytes */
      50           16 :     char *out = malloc(len * 4 + 1);
      51           16 :     if (!out) return NULL;
      52           16 :     char *dst = out;
      53              : 
      54           16 :     const char *p = s;
      55          108 :     while (*p) {
      56           92 :         if (*p != '&') {
      57           72 :             *dst++ = *p++;
      58           72 :             continue;
      59              :         }
      60           20 :         p++; /* skip '&' */
      61              : 
      62           20 :         if (*p == '-') {
      63              :             /* "&-" is a literal '&' */
      64            1 :             *dst++ = '&';
      65            1 :             p++;
      66            1 :             continue;
      67              :         }
      68              : 
      69              :         /* Decode modified-base64 run into raw bytes (UTF-16BE) */
      70           19 :         uint8_t bytes[256];
      71           19 :         int byte_cnt = 0;
      72           19 :         int bits = 0, bit_cnt = 0;
      73              : 
      74           79 :         while (*p && *p != '-') {
      75           61 :             int v = mod64_value(*p++);
      76           61 :             if (v < 0) break;
      77           60 :             bits = (bits << 6) | v;
      78           60 :             bit_cnt += 6;
      79           60 :             if (bit_cnt >= 8) {
      80           40 :                 bit_cnt -= 8;
      81           40 :                 if (byte_cnt < (int)sizeof(bytes))
      82           40 :                     bytes[byte_cnt++] = (uint8_t)((unsigned)bits >> bit_cnt);
      83           40 :                 bits &= (1 << bit_cnt) - 1;
      84              :             }
      85              :         }
      86           19 :         if (*p == '-') p++;
      87              : 
      88              :         /* Interpret raw bytes as UTF-16BE, emit as UTF-8 */
      89           38 :         for (int i = 0; i + 1 < byte_cnt; i += 2) {
      90           19 :             uint16_t unit = ((uint16_t)bytes[i] << 8) | bytes[i + 1];
      91              :             uint32_t cp;
      92              : 
      93           21 :             if (unit >= 0xD800 && unit <= 0xDBFF && i + 3 < byte_cnt) {
      94              :                 /* High surrogate — pair with following low surrogate */
      95            2 :                 uint16_t low = ((uint16_t)bytes[i + 2] << 8) | bytes[i + 3];
      96            2 :                 if (low >= 0xDC00 && low <= 0xDFFF) {
      97            1 :                     cp = 0x10000u
      98            1 :                          + ((uint32_t)(unit - 0xD800) << 10)
      99            1 :                          + (uint32_t)(low - 0xDC00);
     100            1 :                     i += 2;
     101              :                 } else {
     102            1 :                     cp = unit; /* unpaired surrogate — pass through */
     103              :                 }
     104              :             } else {
     105           17 :                 cp = unit;
     106              :             }
     107              : 
     108           19 :             char utf8[4];
     109           19 :             int n = utf8_encode(cp, utf8);
     110           19 :             memcpy(dst, utf8, (size_t)n);
     111           19 :             dst += n;
     112              :         }
     113              :     }
     114           16 :     *dst = '\0';
     115           16 :     return out;
     116              : }
     117              : 
     118           49 : char *imap_utf7_encode(const char *s) {
     119           49 :     if (!s) return NULL;
     120           49 :     size_t len = strlen(s);
     121              :     /* Upper bound: every input byte can expand to at most 8 output chars. */
     122           49 :     char *out = malloc(len * 8 + 4);
     123           49 :     if (!out) return NULL;
     124           49 :     char *dst = out;
     125              : 
     126              :     static const char mod64[] =
     127              :         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
     128              : 
     129           49 :     const unsigned char *p = (const unsigned char *)s;
     130          350 :     while (*p) {
     131          301 :         if (*p >= 0x20 && *p <= 0x7E && *p != '&') {
     132              :             /* Printable ASCII (except '&'): pass through */
     133          301 :             *dst++ = (char)*p++;
     134            0 :         } else if (*p == '&') {
     135              :             /* '&' is escaped as "&-" */
     136            0 :             *dst++ = '&';
     137            0 :             *dst++ = '-';
     138            0 :             p++;
     139              :         } else {
     140              :             /* Non-ASCII run: encode as UTF-16BE in modified Base64 */
     141            0 :             *dst++ = '&';
     142            0 :             unsigned int bits = 0;
     143            0 :             int bit_cnt = 0;
     144              : 
     145            0 :             while (*p && !(*p >= 0x20 && *p <= 0x7E)) {
     146              :                 /* Decode one UTF-8 code point. */
     147              :                 uint32_t cp;
     148              :                 int seqlen;
     149            0 :                 if      (*p < 0x80) { cp = *p;         seqlen = 1; }
     150            0 :                 else if (*p < 0xC2) { cp = 0xFFFD;     seqlen = 1; }
     151            0 :                 else if (*p < 0xE0) { cp = *p & 0x1Fu; seqlen = 2; }
     152            0 :                 else if (*p < 0xF0) { cp = *p & 0x0Fu; seqlen = 3; }
     153            0 :                 else                { cp = *p & 0x07u; seqlen = 4; }
     154            0 :                 for (int i = 1; i < seqlen; i++) {
     155            0 :                     if ((p[i] & 0xC0) != 0x80) { seqlen = i; cp = 0xFFFD; break; }
     156            0 :                     cp = (cp << 6) | (p[i] & 0x3Fu);
     157              :                 }
     158            0 :                 p += seqlen;
     159              : 
     160              :                 /* Emit as UTF-16BE (BMP char or surrogate pair). */
     161            0 :                 uint16_t units[2];
     162              :                 int nunit;
     163            0 :                 if (cp <= 0xFFFFu) {
     164            0 :                     units[0] = (uint16_t)cp;
     165            0 :                     nunit = 1;
     166              :                 } else {
     167            0 :                     cp -= 0x10000u;
     168            0 :                     units[0] = (uint16_t)(0xD800u | (cp >> 10));
     169            0 :                     units[1] = (uint16_t)(0xDC00u | (cp & 0x3FFu));
     170            0 :                     nunit = 2;
     171              :                 }
     172              : 
     173              :                 /* Feed each byte of UTF-16BE into the Base64 bit stream. */
     174            0 :                 for (int j = 0; j < nunit; j++) {
     175            0 :                     uint8_t hi = (uint8_t)(units[j] >> 8);
     176            0 :                     uint8_t lo = (uint8_t)(units[j] & 0xFF);
     177              : 
     178            0 :                     bits = (bits << 8) | hi;
     179            0 :                     bit_cnt += 8;
     180            0 :                     while (bit_cnt >= 6) {
     181            0 :                         bit_cnt -= 6;
     182            0 :                         *dst++ = mod64[(bits >> bit_cnt) & 0x3F];
     183            0 :                         bits &= (1u << bit_cnt) - 1u;
     184              :                     }
     185              : 
     186            0 :                     bits = (bits << 8) | lo;
     187            0 :                     bit_cnt += 8;
     188            0 :                     while (bit_cnt >= 6) {
     189            0 :                         bit_cnt -= 6;
     190            0 :                         *dst++ = mod64[(bits >> bit_cnt) & 0x3F];
     191            0 :                         bits &= (1u << bit_cnt) - 1u;
     192              :                     }
     193              :                 }
     194              :             }
     195              :             /* Flush remaining bits (zero-padded to the next 6-bit boundary). */
     196            0 :             if (bit_cnt > 0)
     197            0 :                 *dst++ = mod64[(bits << (6 - bit_cnt)) & 0x3F];
     198              : 
     199            0 :             *dst++ = '-';
     200              :         }
     201              :     }
     202           49 :     *dst = '\0';
     203           49 :     return out;
     204              : }
        

Generated by: LCOV version 2.0-1