LCOV - code coverage report
Current view: top level - libemail/src/core - html_render.c (source / functions) Coverage Total Hit
Test: coverage-functional.info Lines: 91.9 % 347 319
Test Date: 2026-05-07 15:53:08 Functions: 100.0 % 30 30

            Line data    Source code
       1              : #include "html_render.h"
       2              : #include "html_parser.h"
       3              : #include "html_medium.h"
       4              : #include <stdlib.h>
       5              : #include <string.h>
       6              : #include <stdio.h>
       7              : #include <ctype.h>
       8              : #include <stdint.h>
       9              : 
      10              : /* ── List stack ──────────────────────────────────────────────────────── */
      11              : 
      12              : #define LIST_MAX 16
      13              : 
      14              : typedef struct { int is_ol; int cnt; } ListFrame;
      15              : 
      16              : /* ── Render state ────────────────────────────────────────────────────── */
      17              : 
      18              : typedef struct {
      19              :     char     *buf;
      20              :     size_t    len, cap;
      21              :     int       col;         /* visible column (ANSI not counted) */
      22              :     int       width;       /* wrap width; 0 = no wrap */
      23              :     int       ansi;        /* emit ANSI escapes? */
      24              :     int       bold;        /* depth counter */
      25              :     int       italic;
      26              :     int       uline;
      27              :     int       color_fg;    /* depth: foreground color set by parse_style */
      28              :     int       color_bg;    /* depth: background color set by parse_style */
      29              :     int       skip;        /* depth: no output (script/style) */
      30              :     int       pre;         /* depth: no wrap */
      31              :     int       pending_nl;  /* buffered newlines to emit: 0, 1, or 2 */
      32              :     int       bq;          /* blockquote depth */
      33              :     ListFrame lists[LIST_MAX];
      34              :     int       list_top;
      35              : } RS;
      36              : 
      37              : /* ── Buffer ──────────────────────────────────────────────────────────── */
      38              : 
      39        13812 : static void rs_push(RS *rs, char c) {
      40        13812 :     if (rs->len + 2 > rs->cap) {
      41           32 :         size_t nc = rs->cap ? rs->cap * 2 : 512;
      42           32 :         char *t = realloc(rs->buf, nc);
      43           32 :         if (!t) return;
      44           32 :         rs->buf = t; rs->cap = nc;
      45              :     }
      46        13812 :     rs->buf[rs->len++] = c;
      47        13812 :     rs->buf[rs->len]   = '\0';
      48              : }
      49         3820 : static void rs_str(RS *rs, const char *s) { for (; *s; s++) rs_push(rs, *s); }
      50         1664 : static void rs_write(RS *rs, const char *s, int n)
      51         8032 :     { for (int i = 0; i < n; i++) rs_push(rs, s[i]); }
      52              : 
      53              : /* ── UTF-8 helpers ───────────────────────────────────────────────────── */
      54              : 
      55         6560 : static uint32_t utf8_adv(const char **p) {
      56         6560 :     unsigned char c = (unsigned char)**p;
      57              :     uint32_t cp; int ex;
      58         6560 :     if      (c < 0x80) { cp = c;        ex = 0; }
      59           96 :     else if (c < 0xC0) { (*p)++; return 0xFFFD; }
      60           96 :     else if (c < 0xE0) { cp = c & 0x1F; ex = 1; }
      61           32 :     else if (c < 0xF0) { cp = c & 0x0F; ex = 2; }
      62            0 :     else               { cp = c & 0x07; ex = 3; }
      63         6560 :     (*p)++;
      64         6688 :     for (int i = 0; i < ex; i++) {
      65          128 :         if ((**p & 0xC0) != 0x80) return 0xFFFD;
      66          128 :         cp = (cp << 6) | (**p & 0x3F); (*p)++;
      67              :     }
      68         6560 :     return cp;
      69              : }
      70              : 
      71           32 : static int str_vis_width(const char *s) {
      72           32 :     int w = 0;
      73          352 :     while (*s) {
      74          320 :         if ((unsigned char)*s == 0x1B && s[1] == '[') {
      75            0 :             s += 2; while (*s && *s != 'm') s++; if (*s) s++;
      76            0 :             continue;
      77              :         }
      78          320 :         w += html_medium_char_width(utf8_adv(&s));
      79              :     }
      80           32 :     return w;
      81              : }
      82              : 
      83              : /* ── Newline / prefix management ─────────────────────────────────────── */
      84              : 
      85         1408 : static void emit_bq_prefix(RS *rs) {
      86         1408 :     if (rs->col == 0 && rs->bq > 0) {
      87           64 :         for (int i = 0; i < rs->bq; i++) { rs_push(rs, '>'); rs_push(rs, ' '); }
      88           32 :         rs->col = rs->bq * 2;
      89              :     }
      90         1408 : }
      91              : 
      92         1024 : static void flush_nl(RS *rs) {
      93         1024 :     if (!rs->pending_nl) return;
      94              :     /* Count consecutive newlines already at the end of the buffer so we
      95              :      * never accumulate more than pending_nl in a row. */
      96          384 :     int trailing = 0;
      97          384 :     for (int i = (int)rs->len - 1; i >= 0 && rs->buf[i] == '\n'; i--)
      98            0 :         trailing++;
      99          864 :     for (int i = trailing; i < rs->pending_nl; i++) rs_push(rs, '\n');
     100          384 :     rs->col = 0;
     101          384 :     rs->pending_nl = 0;
     102          384 :     emit_bq_prefix(rs);
     103              : }
     104              : 
     105          928 : static void req_nl(RS *rs, int n) {
     106          928 :     if (rs->pending_nl < n) rs->pending_nl = n;
     107          928 : }
     108          384 : static void block_open(RS *rs)  { if (rs->len > 0) req_nl(rs, 1); }
     109          224 : static void block_close(RS *rs) { req_nl(rs, 1); }
     110           96 : static void para_open(RS *rs)   { if (rs->len > 0) req_nl(rs, 2); }
     111           96 : static void para_close(RS *rs)  { req_nl(rs, 2); }
     112              : 
     113              : /* ── ANSI helpers ────────────────────────────────────────────────────── */
     114              : 
     115          666 : static void esc(RS *rs, const char *e) { if (rs->ansi) rs_str(rs, e); }
     116           64 : static void open_bold(RS *rs)    { if (rs->bold++   == 0) esc(rs, "\033[1m");  }
     117           86 : static void close_bold(RS *rs)   { if (--rs->bold   == 0) esc(rs, "\033[22m"); }
     118           32 : static void open_italic(RS *rs)  { if (rs->italic++ == 0) esc(rs, "\033[3m");  }
     119           54 : static void close_italic(RS *rs) { if (--rs->italic == 0) esc(rs, "\033[23m"); }
     120           32 : static void open_uline(RS *rs)   { if (rs->uline++  == 0) esc(rs, "\033[4m");  }
     121           54 : static void close_uline(RS *rs)  { if (--rs->uline  == 0) esc(rs, "\033[24m"); }
     122              : 
     123              : /* ── Inline CSS ──────────────────────────────────────────────────────── */
     124              : 
     125           66 : static int hex_val(char c) {
     126           66 :     if (c>='0'&&c<='9') return c-'0';
     127            0 :     if (c>='a'&&c<='f') return c-'a'+10;
     128            0 :     if (c>='A'&&c<='F') return c-'A'+10;
     129            0 :     return 0;
     130              : }
     131              : static const struct { const char *name; int r,g,b; } CSS_COLORS[] = {
     132              :     {"black",0,0,0},{"silver",192,192,192},{"gray",128,128,128},
     133              :     {"white",255,255,255},{"maroon",128,0,0},{"red",255,0,0},
     134              :     {"purple",128,0,128},{"fuchsia",255,0,255},{"green",0,128,0},
     135              :     {"lime",0,255,0},{"olive",128,128,0},{"yellow",255,255,0},
     136              :     {"navy",0,0,128},{"blue",0,0,255},{"teal",0,128,128},{"aqua",0,255,255},
     137              :     {NULL,0,0,0}
     138              : };
     139           66 : static void apply_color(RS *rs, const char *v, int fg) {
     140          110 :     if (!rs->ansi) return;
     141              :     /* Background colors are never emitted: they break dark-theme terminals
     142              :      * and produce unreadable combinations when the email author's palette
     143              :      * does not match the user's terminal theme. */
     144           66 :     if (!fg) return;
     145           44 :     int r=-1,g=-1,b=-1;
     146           44 :     while (*v==' ') v++;
     147           44 :     if (*v=='#') {
     148           22 :         v++;
     149           22 :         size_t len=strlen(v); while(len>0&&v[len-1]==' ') len--;
     150           22 :         if (len==6) {
     151            0 :             r=hex_val(v[0])*16+hex_val(v[1]);
     152            0 :             g=hex_val(v[2])*16+hex_val(v[3]);
     153            0 :             b=hex_val(v[4])*16+hex_val(v[5]);
     154           22 :         } else if (len==3) {
     155           22 :             r=hex_val(v[0])*17; g=hex_val(v[1])*17; b=hex_val(v[2])*17;
     156              :         }
     157              :     } else {
     158          132 :         for (int i=0; CSS_COLORS[i].name; i++) {
     159          132 :             size_t nl=strlen(CSS_COLORS[i].name);
     160          132 :             if (strncasecmp(v, CSS_COLORS[i].name, nl)==0) {
     161           22 :                 r=CSS_COLORS[i].r; g=CSS_COLORS[i].g; b=CSS_COLORS[i].b; break;
     162              :             }
     163              :         }
     164              :     }
     165           44 :     if (r<0) return;
     166              :     /* Suppress dark foreground colors (max component < 160): they are
     167              :      * unreadable on dark-theme terminals and common in newsletter HTML
     168              :      * (e.g. #333, #666, gray).  Only bright colours are emitted. */
     169           44 :     int mx = r > g ? (r > b ? r : b) : (g > b ? g : b);
     170           44 :     if (mx < 160) return;
     171           22 :     char e[32]; snprintf(e,sizeof(e),"\033[38;2;%d;%d;%dm",r,g,b);
     172           22 :     rs_str(rs, e);
     173           22 :     rs->color_fg++;
     174              : }
     175           96 : static void parse_style(RS *rs, const char *style) {
     176           96 :     if (!style || !rs->ansi) return;
     177           66 :     const char *p = style;
     178          198 :     while (*p) {
     179          198 :         while (*p==' '||*p=='\t') p++;
     180         1496 :         const char *ps=p; while(*p&&*p!=':') p++;
     181          132 :         if (!*p) break;
     182          132 :         size_t pl=(size_t)(p-ps); while(pl>0&&ps[pl-1]==' ') pl--;
     183          264 :         p++; while(*p==' ') p++;
     184          836 :         const char *vs=p; while(*p&&*p!=';') p++;
     185          132 :         size_t vl=(size_t)(p-vs); if(*p==';') p++;
     186          132 :         char prop[32]={0},val[64]={0};
     187          132 :         if(pl<sizeof(prop)) memcpy(prop,ps,pl);
     188          132 :         if(vl>0&&vl<sizeof(val)) memcpy(val,vs,vl);
     189          132 :         if      (!strcasecmp(prop,"font-weight")&&!strncasecmp(val,"bold",4))      { esc(rs,"\033[1m"); rs->bold++; }
     190          110 :         else if (!strcasecmp(prop,"font-style")&&!strncasecmp(val,"italic",6))     { esc(rs,"\033[3m"); rs->italic++; }
     191           88 :         else if (!strcasecmp(prop,"text-decoration")&&!strncasecmp(val,"underline",9)){ esc(rs,"\033[4m"); rs->uline++; }
     192           66 :         else if (!strcasecmp(prop,"color"))           apply_color(rs,val,1);
     193           22 :         else if (!strcasecmp(prop,"background-color")) apply_color(rs,val,0);
     194              :     }
     195              : }
     196              : 
     197              : /* ── Text emission with word wrap ────────────────────────────────────── */
     198              : 
     199           32 : static void emit_wrap_nl(RS *rs) {
     200              :     /* remove trailing space before the newline */
     201           32 :     if (rs->len > 0 && rs->buf[rs->len-1] == ' ') {
     202            0 :         rs->len--; rs->buf[rs->len] = '\0'; rs->col--;
     203              :     }
     204           32 :     rs_push(rs, '\n');
     205           32 :     rs->col = 0;
     206           32 :     emit_bq_prefix(rs);
     207           32 : }
     208              : 
     209          768 : static void emit_text(RS *rs, const char *text) {
     210          768 :     if (!text || !*text || rs->skip) return;
     211          768 :     flush_nl(rs);
     212          768 :     emit_bq_prefix(rs);
     213              : 
     214          768 :     if (rs->pre) {
     215          544 :         for (const char *p = text; *p; ) {
     216          512 :             if (*p == '\r') { p++; continue; }
     217          512 :             if (*p == '\n') {
     218            0 :                 rs_push(rs, '\n'); rs->col = 0;
     219            0 :                 emit_bq_prefix(rs);
     220            0 :                 p++; continue;
     221              :             }
     222          512 :             const char *s = p;
     223          512 :             rs->col += html_medium_char_width(utf8_adv(&p));
     224          512 :             rs_write(rs, s, (int)(p - s));
     225              :         }
     226           32 :         return;
     227              :     }
     228              : 
     229              :     /* Normal mode: word-wrap */
     230         2624 :     for (const char *p = text; *p; ) {
     231         1888 :         if (isspace((unsigned char)*p)) {
     232              :             /* Collapse whitespace; only emit space if not at line start */
     233          736 :             int at_start = (rs->col <= rs->bq * 2);
     234          736 :             int already_space = (rs->len > 0 && rs->buf[rs->len-1] == ' ');
     235          736 :             if (!at_start && !already_space) {
     236          704 :                 rs_push(rs, ' ');
     237          704 :                 rs->col++;
     238              :             }
     239         1504 :             while (*p && isspace((unsigned char)*p)) p++;
     240          736 :             continue;
     241              :         }
     242              : 
     243              :         /* Collect one word */
     244         1152 :         const char *ws = p;
     245         1152 :         int ww = 0;
     246         6880 :         while (*p && !isspace((unsigned char)*p)) {
     247         5728 :             const char *q = p;
     248         5728 :             ww += html_medium_char_width(utf8_adv(&p));
     249              :             (void)q;
     250              :         }
     251         1152 :         int wlen = (int)(p - ws);
     252              : 
     253              :         /* URL tokens (http://, https://, ftp://, mailto:) are always placed
     254              :          * on their own line so terminal URL-recognition works reliably.
     255              :          * They are never broken regardless of width. */
     256         1504 :         int is_url = (wlen >= 6) && (
     257          352 :             strncmp(ws, "http://",  7) == 0 ||
     258          352 :             strncmp(ws, "https://", 8) == 0 ||
     259          320 :             strncmp(ws, "ftp://",   6) == 0 ||
     260          320 :             strncmp(ws, "mailto:",  7) == 0);
     261              : 
     262         1152 :         if (is_url) {
     263           32 :             if (rs->col > rs->bq * 2) emit_wrap_nl(rs);  /* start own line */
     264           32 :             esc(rs, "\033[34m");        /* blue */
     265           32 :             rs_write(rs, ws, wlen);
     266           32 :             esc(rs, "\033[39m");        /* reset fg */
     267           32 :             rs->col += ww;
     268           32 :             rs_push(rs, '\n');          /* trailing newline: next content fresh line */
     269           32 :             rs->col = 0;
     270           32 :             emit_bq_prefix(rs);
     271           32 :             continue;
     272              :         }
     273              : 
     274              :         /* Wrap if needed (never wrap an otherwise-empty line) */
     275         1120 :         if (rs->width > 0 && rs->col > rs->bq * 2 && rs->col + ww > rs->width)
     276            0 :             emit_wrap_nl(rs);
     277              : 
     278         1120 :         rs_write(rs, ws, wlen);
     279         1120 :         rs->col += ww;
     280              :     }
     281              : }
     282              : 
     283              : /* ── Whitespace helpers ──────────────────────────────────────────────── */
     284              : 
     285              : /** Returns 1 if s contains only ASCII whitespace, U+00A0 nbsp, U+200C zwnj,
     286              :  *  U+200D zwj, or U+00AD soft-hyphen — i.e. invisible/non-printing content. */
     287           32 : static int is_blank_str(const char *s) {
     288           32 :     const unsigned char *p = (const unsigned char *)s;
     289           32 :     while (*p) {
     290           32 :         if (*p <= ' ')                                    { p++;   continue; }
     291           32 :         if (p[0]==0xC2 && p[1]==0xA0)                    { p+=2;  continue; } /* nbsp  */
     292           32 :         if (p[0]==0xC2 && p[1]==0xAD)                    { p+=2;  continue; } /* shy   */
     293           32 :         if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8C)      { p+=3;  continue; } /* zwnj  */
     294           32 :         if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8D)      { p+=3;  continue; } /* zwj   */
     295           32 :         return 0;
     296              :     }
     297            0 :     return 1;
     298              : }
     299              : 
     300              : /** Collapses runs of >1 consecutive blank lines to exactly one blank line.
     301              :  *  Also trims trailing ASCII/nbsp/zwnj whitespace from each line.
     302              :  *  Preserves up to one trailing blank line.
     303              :  *  Takes ownership of s (frees it); returns a new heap string (or s on OOM). */
     304           32 : static char *compact_lines(char *s) {
     305           32 :     if (!s) return s;
     306           32 :     size_t n = strlen(s);
     307           32 :     char *out = malloc(n + 1);
     308           32 :     if (!out) return s;
     309              : 
     310           32 :     const unsigned char *p = (const unsigned char *)s;
     311           32 :     char *q = out;
     312           32 :     int blank_pending = 0;  /* at most 1: whether a blank line is pending */
     313           32 :     int have_content  = 0;
     314              : 
     315          598 :     while (*p) {
     316          566 :         const unsigned char *ls = p;
     317        13834 :         while (*p && *p != '\n') p++;
     318          566 :         int had_nl = (*p == '\n');
     319          566 :         const unsigned char *le = p;
     320          566 :         if (had_nl) p++;
     321              : 
     322              :         /* Trim trailing invisible chars (ASCII ws, nbsp C2A0, shy C2AD, zwnj/zwj E2808C/8D) */
     323          598 :         while (le > ls) {
     324          502 :             if (le[-1] <= ' ')                                              { le--;   continue; }
     325          470 :             if (le>=ls+2 && le[-2]==0xC2 && (le[-1]==0xA0||le[-1]==0xAD)) { le-=2;  continue; }
     326          470 :             if (le>=ls+3 && le[-3]==0xE2 && le[-2]==0x80 &&
     327            0 :                 (le[-1]==0x8C||le[-1]==0x8D))                              { le-=3;  continue; }
     328          470 :             break;
     329              :         }
     330              : 
     331          566 :         if (le == ls) {  /* blank line */
     332           96 :             if (had_nl) blank_pending = 1;
     333              :         } else {         /* non-blank line */
     334          470 :             if (blank_pending && have_content) *q++ = '\n';
     335          470 :             blank_pending = 0;
     336          470 :             have_content = 1;
     337          470 :             memcpy(q, ls, (size_t)(le - ls));
     338          470 :             q += (size_t)(le - ls);
     339          470 :             if (had_nl) *q++ = '\n';
     340              :         }
     341              :     }
     342              :     /* Preserve at most one trailing blank line */
     343           32 :     if (blank_pending) *q++ = '\n';
     344              : 
     345           32 :     *q = '\0';
     346           32 :     free(s);
     347           32 :     return out;
     348              : }
     349              : 
     350              : /* ── Tag open / close ────────────────────────────────────────────────── */
     351              : 
     352              : static void traverse(RS *rs, const HtmlNode *node);
     353              : 
     354          864 : static void tag_open(RS *rs, const HtmlNode *node) {
     355          864 :     if (!node->tag) return;
     356          864 :     const char *t = node->tag;
     357              : 
     358              :     /* Inline styles (applied before tag-specific behavior) */
     359          864 :     const char *style = html_attr_get(node, "style");
     360          864 :     if (style) parse_style(rs, style);
     361              : 
     362          864 :     if      (!strcmp(t,"b")||!strcmp(t,"strong"))  open_bold(rs);
     363          832 :     else if (!strcmp(t,"i")||!strcmp(t,"em"))       open_italic(rs);
     364          800 :     else if (!strcmp(t,"u"))                        open_uline(rs);
     365          768 :     else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[9m");
     366          704 :     else if (!strcmp(t,"br"))                       req_nl(rs, 1);
     367          704 :     else if (!strcmp(t,"hr")) {
     368           32 :         block_open(rs); flush_nl(rs); emit_bq_prefix(rs);
     369           32 :         int w = rs->width > 0 ? rs->width - rs->col : 20;
     370         2584 :         for (int i = 0; i < w; i++) rs_push(rs, '-');
     371           32 :         rs->col += w;
     372           32 :         block_close(rs);
     373              :     }
     374          672 :     else if (!strcmp(t,"p"))                        para_open(rs);
     375          608 :     else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
     376          576 :              !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
     377          608 :              !strcmp(t,"nav")||!strcmp(t,"aside"))  block_open(rs);
     378          576 :     else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
     379           32 :         para_open(rs); open_bold(rs);
     380              :     }
     381          544 :     else if (!strcmp(t,"ul")) {
     382           32 :         if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){0, 0};
     383           32 :         block_open(rs);
     384              :     }
     385          512 :     else if (!strcmp(t,"ol")) {
     386           32 :         if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){1, 0};
     387           32 :         block_open(rs);
     388              :     }
     389          480 :     else if (!strcmp(t,"li")) {
     390          128 :         block_open(rs);
     391          128 :         flush_nl(rs);
     392          128 :         emit_bq_prefix(rs);
     393          128 :         if (rs->list_top > 0 && rs->lists[rs->list_top-1].is_ol) {
     394           64 :             rs->lists[rs->list_top-1].cnt++;
     395              :             char buf[16];
     396           64 :             int n = snprintf(buf, sizeof(buf), "%d. ", rs->lists[rs->list_top-1].cnt);
     397           64 :             rs_str(rs, buf);
     398           64 :             rs->col += n;
     399              :         } else {
     400              :             /* U+2022 BULLET: UTF-8 E2 80 A2 */
     401           64 :             rs_push(rs,(char)0xE2); rs_push(rs,(char)0x80); rs_push(rs,(char)0xA2);
     402           64 :             rs_push(rs, ' ');
     403           64 :             rs->col += 2; /* bullet=1 col, space=1 col */
     404              :         }
     405              :     }
     406          352 :     else if (!strcmp(t,"blockquote")) {
     407           32 :         block_open(rs);
     408           32 :         rs->bq++;
     409              :         /* if something was already on the line, start fresh */
     410           32 :         if (rs->col > 0) { flush_nl(rs); } else emit_bq_prefix(rs);
     411              :     }
     412          320 :     else if (!strcmp(t,"pre"))    { block_open(rs); rs->pre++; }
     413          288 :     else if (!strcmp(t,"code"))   { /* inline: no special formatting */ }
     414          288 :     else if (!strcmp(t,"img")) {
     415           32 :         const char *alt = html_attr_get(node, "alt");
     416           32 :         if (alt && *alt && !is_blank_str(alt)) {
     417           32 :             flush_nl(rs); emit_bq_prefix(rs);
     418           32 :             rs_push(rs, '[');
     419           32 :             rs_str(rs, alt);
     420           32 :             rs_push(rs, ']');
     421           32 :             rs->col += 2 + str_vis_width(alt);
     422              :         }
     423              :     }
     424          256 :     else if (!strcmp(t,"a")) {
     425           32 :         const char *href = html_attr_get(node, "href");
     426           32 :         if (href && *href && href[0] != '#' && strncmp(href, "javascript:", 11) != 0)
     427           32 :             if (rs->ansi) { esc(rs, "\033[34m"); rs->color_fg++; }
     428              :     }
     429          224 :     else if (!strcmp(t,"td")||!strcmp(t,"th")) {
     430           64 :         if (rs->col > rs->bq * 2) { rs_push(rs, '\t'); rs->col++; }
     431              :     }
     432          160 :     else if (!strcmp(t,"tr")||!strcmp(t,"table")) block_open(rs);
     433           96 :     else if (!strcmp(t,"script")||!strcmp(t,"style")) rs->skip++;
     434           96 :     else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_open(rs);
     435           96 :     else if (!strcmp(t,"input")) {
     436            0 :         const char *val = html_attr_get(node, "value");
     437            0 :         if (val && *val) {
     438            0 :             flush_nl(rs); emit_bq_prefix(rs);
     439            0 :             rs_str(rs, val);
     440            0 :             rs->col += str_vis_width(val);
     441              :         }
     442              :     }
     443              :     /* __root__ and unknown tags: traverse children unchanged */
     444              : }
     445              : 
     446          864 : static void tag_close(RS *rs, const HtmlNode *node) {
     447          864 :     if (!node->tag) return;
     448          864 :     const char *t = node->tag;
     449              : 
     450          864 :     if      (!strcmp(t,"b")||!strcmp(t,"strong"))  close_bold(rs);
     451          832 :     else if (!strcmp(t,"i")||!strcmp(t,"em"))       close_italic(rs);
     452          800 :     else if (!strcmp(t,"u"))                        close_uline(rs);
     453          768 :     else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[29m");
     454          704 :     else if (!strcmp(t,"p"))                        para_close(rs);
     455          640 :     else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
     456          608 :              !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
     457          640 :              !strcmp(t,"nav")||!strcmp(t,"aside"))  block_close(rs);
     458          608 :     else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
     459           32 :         close_bold(rs); para_close(rs);
     460              :     }
     461          576 :     else if (!strcmp(t,"ul")||!strcmp(t,"ol")) {
     462           64 :         if (rs->list_top > 0) rs->list_top--;
     463           64 :         block_close(rs);
     464              :     }
     465          512 :     else if (!strcmp(t,"li"))  req_nl(rs, 1);
     466          384 :     else if (!strcmp(t,"blockquote")) {
     467           32 :         if (rs->bq > 0) rs->bq--;
     468           32 :         block_close(rs);
     469              :     }
     470          352 :     else if (!strcmp(t,"pre"))  { if (rs->pre>0) rs->pre--; block_close(rs); }
     471          320 :     else if (!strcmp(t,"a")) {
     472           32 :         const char *href = html_attr_get(node, "href");
     473           32 :         if (href && *href && href[0] != '#' &&
     474           32 :             strncmp(href, "javascript:", 11) != 0) {
     475           32 :             if (rs->ansi && rs->color_fg > 0) { esc(rs, "\033[39m"); rs->color_fg--; }
     476           32 :             esc(rs, "\033[34m");
     477           32 :             emit_text(rs, href);
     478           32 :             esc(rs, "\033[39m");
     479              :         }
     480              :     }
     481          288 :     else if (!strcmp(t,"script")||!strcmp(t,"style")) { if (rs->skip>0) rs->skip--; }
     482          288 :     else if (!strcmp(t,"tr"))   req_nl(rs, 1);
     483          256 :     else if (!strcmp(t,"table")) block_close(rs);
     484          224 :     else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_close(rs);
     485              : }
     486              : 
     487              : /* ── Tree traversal ──────────────────────────────────────────────────── */
     488              : 
     489         1600 : static void traverse(RS *rs, const HtmlNode *node) {
     490         1600 :     if (!node) return;
     491         1600 :     if (node->type == HTML_NODE_TEXT) {
     492          736 :         emit_text(rs, node->text);
     493          736 :         return;
     494              :     }
     495              :     /* Snapshot style depth so parse_style side-effects from inline
     496              :      * style= attributes are balanced even when tag_close has no handler
     497              :      * for this tag (e.g. <a>, <span>, <td> with style="…"). */
     498          864 :     int bold_sv     = rs->bold;
     499          864 :     int italic_sv   = rs->italic;
     500          864 :     int uline_sv    = rs->uline;
     501          864 :     int color_fg_sv = rs->color_fg;
     502          864 :     int color_bg_sv = rs->color_bg;
     503          864 :     tag_open(rs, node);
     504         2400 :     for (const HtmlNode *c = node->first_child; c; c = c->next_sibling)
     505         1536 :         traverse(rs, c);
     506          864 :     tag_close(rs, node);
     507              :     /* Close any depth-tracked style that tag_close left open */
     508          886 :     while (rs->uline    > uline_sv)    close_uline(rs);
     509          886 :     while (rs->italic   > italic_sv)   close_italic(rs);
     510          886 :     while (rs->bold     > bold_sv)     close_bold(rs);
     511          864 :     if (rs->ansi) {
     512          594 :         if (rs->color_fg > color_fg_sv) { esc(rs, "\033[39m"); rs->color_fg = color_fg_sv; }
     513          594 :         if (rs->color_bg > color_bg_sv) { esc(rs, "\033[49m"); rs->color_bg = color_bg_sv; }
     514              :     }
     515              : }
     516              : 
     517              : /* ── Public API ──────────────────────────────────────────────────────── */
     518              : 
     519           32 : char *html_render(const char *html, int width, int ansi) {
     520           32 :     if (!html) return NULL;
     521              : 
     522           32 :     HtmlNode *root = html_parse(html);
     523           32 :     if (!root) {
     524            0 :         char *empty = malloc(1);
     525            0 :         if (empty) *empty = '\0';
     526            0 :         return empty;
     527              :     }
     528              : 
     529              :     RS rs;
     530           32 :     memset(&rs, 0, sizeof(rs));
     531           32 :     rs.width = width;
     532           32 :     rs.ansi  = ansi;
     533              : 
     534              :     /* Traverse root's children (root itself is synthetic __root__) */
     535           96 :     for (const HtmlNode *c = root->first_child; c; c = c->next_sibling)
     536           64 :         traverse(&rs, c);
     537              : 
     538              :     /* Flush trailing newlines */
     539           32 :     flush_nl(&rs);
     540              : 
     541           32 :     html_node_free(root);
     542              : 
     543           32 :     if (!rs.buf) {
     544            0 :         char *empty = malloc(1);
     545            0 :         if (empty) *empty = '\0';
     546            0 :         return empty;
     547              :     }
     548           32 :     return compact_lines(rs.buf);
     549              : }
        

Generated by: LCOV version 2.0-1