LCOV - code coverage report
Current view: top level - libemail/src/core - html_render.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 98.6 % 347 342
Test Date: 2026-05-07 15:53:07 Functions: 100.0 % 30 30

            Line data    Source code
       1              : #include "html_render.h"
       2              : #include "html_parser.h"
       3              : #include "html_medium.h"
       4              : #include <stdlib.h>
       5              : #include <string.h>
       6              : #include <stdio.h>
       7              : #include <ctype.h>
       8              : #include <stdint.h>
       9              : 
      10              : /* ── List stack ──────────────────────────────────────────────────────── */
      11              : 
      12              : #define LIST_MAX 16
      13              : 
      14              : typedef struct { int is_ol; int cnt; } ListFrame;
      15              : 
      16              : /* ── Render state ────────────────────────────────────────────────────── */
      17              : 
      18              : typedef struct {
      19              :     char     *buf;
      20              :     size_t    len, cap;
      21              :     int       col;         /* visible column (ANSI not counted) */
      22              :     int       width;       /* wrap width; 0 = no wrap */
      23              :     int       ansi;        /* emit ANSI escapes? */
      24              :     int       bold;        /* depth counter */
      25              :     int       italic;
      26              :     int       uline;
      27              :     int       color_fg;    /* depth: foreground color set by parse_style */
      28              :     int       color_bg;    /* depth: background color set by parse_style */
      29              :     int       skip;        /* depth: no output (script/style) */
      30              :     int       pre;         /* depth: no wrap */
      31              :     int       pending_nl;  /* buffered newlines to emit: 0, 1, or 2 */
      32              :     int       bq;          /* blockquote depth */
      33              :     ListFrame lists[LIST_MAX];
      34              :     int       list_top;
      35              : } RS;
      36              : 
      37              : /* ── Buffer ──────────────────────────────────────────────────────────── */
      38              : 
      39        17106 : static void rs_push(RS *rs, char c) {
      40        17106 :     if (rs->len + 2 > rs->cap) {
      41          192 :         size_t nc = rs->cap ? rs->cap * 2 : 512;
      42          192 :         char *t = realloc(rs->buf, nc);
      43          192 :         if (!t) return;
      44          192 :         rs->buf = t; rs->cap = nc;
      45              :     }
      46        17106 :     rs->buf[rs->len++] = c;
      47        17106 :     rs->buf[rs->len]   = '\0';
      48              : }
      49         5887 : static void rs_str(RS *rs, const char *s) { for (; *s; s++) rs_push(rs, *s); }
      50         1996 : static void rs_write(RS *rs, const char *s, int n)
      51         9650 :     { for (int i = 0; i < n; i++) rs_push(rs, s[i]); }
      52              : 
      53              : /* ── UTF-8 helpers ───────────────────────────────────────────────────── */
      54              : 
      55         7843 : static uint32_t utf8_adv(const char **p) {
      56         7843 :     unsigned char c = (unsigned char)**p;
      57              :     uint32_t cp; int ex;
      58         7843 :     if      (c < 0x80) { cp = c;        ex = 0; }
      59          104 :     else if (c < 0xC0) { (*p)++; return 0xFFFD; }
      60          104 :     else if (c < 0xE0) { cp = c & 0x1F; ex = 1; }
      61           36 :     else if (c < 0xF0) { cp = c & 0x0F; ex = 2; }
      62            1 :     else               { cp = c & 0x07; ex = 3; }
      63         7843 :     (*p)++;
      64         7984 :     for (int i = 0; i < ex; i++) {
      65          141 :         if ((**p & 0xC0) != 0x80) return 0xFFFD;
      66          141 :         cp = (cp << 6) | (**p & 0x3F); (*p)++;
      67              :     }
      68         7843 :     return cp;
      69              : }
      70              : 
      71           35 : static int str_vis_width(const char *s) {
      72           35 :     int w = 0;
      73          367 :     while (*s) {
      74          332 :         if ((unsigned char)*s == 0x1B && s[1] == '[') {
      75            4 :             s += 2; while (*s && *s != 'm') s++; if (*s) s++;
      76            2 :             continue;
      77              :         }
      78          330 :         w += html_medium_char_width(utf8_adv(&s));
      79              :     }
      80           35 :     return w;
      81              : }
      82              : 
      83              : /* ── Newline / prefix management ─────────────────────────────────────── */
      84              : 
      85         1754 : static void emit_bq_prefix(RS *rs) {
      86         1754 :     if (rs->col == 0 && rs->bq > 0) {
      87           70 :         for (int i = 0; i < rs->bq; i++) { rs_push(rs, '>'); rs_push(rs, ' '); }
      88           35 :         rs->col = rs->bq * 2;
      89              :     }
      90         1754 : }
      91              : 
      92         1446 : static void flush_nl(RS *rs) {
      93         1446 :     if (!rs->pending_nl) return;
      94              :     /* Count consecutive newlines already at the end of the buffer so we
      95              :      * never accumulate more than pending_nl in a row. */
      96          444 :     int trailing = 0;
      97          444 :     for (int i = (int)rs->len - 1; i >= 0 && rs->buf[i] == '\n'; i--)
      98            0 :         trailing++;
      99          994 :     for (int i = trailing; i < rs->pending_nl; i++) rs_push(rs, '\n');
     100          444 :     rs->col = 0;
     101          444 :     rs->pending_nl = 0;
     102          444 :     emit_bq_prefix(rs);
     103              : }
     104              : 
     105         1023 : static void req_nl(RS *rs, int n) {
     106         1023 :     if (rs->pending_nl < n) rs->pending_nl = n;
     107         1023 : }
     108          456 : static void block_open(RS *rs)  { if (rs->len > 0) req_nl(rs, 1); }
     109          279 : static void block_close(RS *rs) { req_nl(rs, 1); }
     110          106 : static void para_open(RS *rs)   { if (rs->len > 0) req_nl(rs, 2); }
     111          106 : static void para_close(RS *rs)  { req_nl(rs, 2); }
     112              : 
     113              : /* ── ANSI helpers ────────────────────────────────────────────────────── */
     114              : 
     115          958 : static void esc(RS *rs, const char *e) { if (rs->ansi) rs_str(rs, e); }
     116           94 : static void open_bold(RS *rs)    { if (rs->bold++   == 0) esc(rs, "\033[1m");  }
     117          129 : static void close_bold(RS *rs)   { if (--rs->bold   == 0) esc(rs, "\033[22m"); }
     118           49 : static void open_italic(RS *rs)  { if (rs->italic++ == 0) esc(rs, "\033[3m");  }
     119           80 : static void close_italic(RS *rs) { if (--rs->italic == 0) esc(rs, "\033[23m"); }
     120           42 : static void open_uline(RS *rs)   { if (rs->uline++  == 0) esc(rs, "\033[4m");  }
     121           78 : static void close_uline(RS *rs)  { if (--rs->uline  == 0) esc(rs, "\033[24m"); }
     122              : 
     123              : /* ── Inline CSS ──────────────────────────────────────────────────────── */
     124              : 
     125          279 : static int hex_val(char c) {
     126          279 :     if (c>='0'&&c<='9') return c-'0';
     127           38 :     if (c>='a'&&c<='f') return c-'a'+10;
     128           38 :     if (c>='A'&&c<='F') return c-'A'+10;
     129            6 :     return 0;
     130              : }
     131              : static const struct { const char *name; int r,g,b; } CSS_COLORS[] = {
     132              :     {"black",0,0,0},{"silver",192,192,192},{"gray",128,128,128},
     133              :     {"white",255,255,255},{"maroon",128,0,0},{"red",255,0,0},
     134              :     {"purple",128,0,128},{"fuchsia",255,0,255},{"green",0,128,0},
     135              :     {"lime",0,255,0},{"olive",128,128,0},{"yellow",255,255,0},
     136              :     {"navy",0,0,128},{"blue",0,0,255},{"teal",0,128,128},{"aqua",0,255,255},
     137              :     {NULL,0,0,0}
     138              : };
     139          149 : static void apply_color(RS *rs, const char *v, int fg) {
     140          242 :     if (!rs->ansi) return;
     141              :     /* Background colors are never emitted: they break dark-theme terminals
     142              :      * and produce unreadable combinations when the email author's palette
     143              :      * does not match the user's terminal theme. */
     144          149 :     if (!fg) return;
     145          105 :     int r=-1,g=-1,b=-1;
     146          105 :     while (*v==' ') v++;
     147          105 :     if (*v=='#') {
     148           64 :         v++;
     149           64 :         size_t len=strlen(v); while(len>0&&v[len-1]==' ') len--;
     150           64 :         if (len==6) {
     151           29 :             r=hex_val(v[0])*16+hex_val(v[1]);
     152           29 :             g=hex_val(v[2])*16+hex_val(v[3]);
     153           29 :             b=hex_val(v[4])*16+hex_val(v[5]);
     154           35 :         } else if (len==3) {
     155           35 :             r=hex_val(v[0])*17; g=hex_val(v[1])*17; b=hex_val(v[2])*17;
     156              :         }
     157              :     } else {
     158          279 :         for (int i=0; CSS_COLORS[i].name; i++) {
     159          279 :             size_t nl=strlen(CSS_COLORS[i].name);
     160          279 :             if (strncasecmp(v, CSS_COLORS[i].name, nl)==0) {
     161           41 :                 r=CSS_COLORS[i].r; g=CSS_COLORS[i].g; b=CSS_COLORS[i].b; break;
     162              :             }
     163              :         }
     164              :     }
     165          105 :     if (r<0) return;
     166              :     /* Suppress dark foreground colors (max component < 160): they are
     167              :      * unreadable on dark-theme terminals and common in newsletter HTML
     168              :      * (e.g. #333, #666, gray).  Only bright colours are emitted. */
     169          105 :     int mx = r > g ? (r > b ? r : b) : (g > b ? g : b);
     170          105 :     if (mx < 160) return;
     171           56 :     char e[32]; snprintf(e,sizeof(e),"\033[38;2;%d;%d;%dm",r,g,b);
     172           56 :     rs_str(rs, e);
     173           56 :     rs->color_fg++;
     174              : }
     175          191 : static void parse_style(RS *rs, const char *style) {
     176          191 :     if (!style || !rs->ansi) return;
     177          160 :     const char *p = style;
     178          411 :     while (*p) {
     179          317 :         while (*p==' '||*p=='\t') p++;
     180         2715 :         const char *ps=p; while(*p&&*p!=':') p++;
     181          251 :         if (!*p) break;
     182          251 :         size_t pl=(size_t)(p-ps); while(pl>0&&ps[pl-1]==' ') pl--;
     183          383 :         p++; while(*p==' ') p++;
     184         1643 :         const char *vs=p; while(*p&&*p!=';') p++;
     185          251 :         size_t vl=(size_t)(p-vs); if(*p==';') p++;
     186          251 :         char prop[32]={0},val[64]={0};
     187          251 :         if(pl<sizeof(prop)) memcpy(prop,ps,pl);
     188          251 :         if(vl>0&&vl<sizeof(val)) memcpy(val,vs,vl);
     189          251 :         if      (!strcasecmp(prop,"font-weight")&&!strncasecmp(val,"bold",4))      { esc(rs,"\033[1m"); rs->bold++; }
     190          216 :         else if (!strcasecmp(prop,"font-style")&&!strncasecmp(val,"italic",6))     { esc(rs,"\033[3m"); rs->italic++; }
     191          185 :         else if (!strcasecmp(prop,"text-decoration")&&!strncasecmp(val,"underline",9)){ esc(rs,"\033[4m"); rs->uline++; }
     192          149 :         else if (!strcasecmp(prop,"color"))           apply_color(rs,val,1);
     193           44 :         else if (!strcasecmp(prop,"background-color")) apply_color(rs,val,0);
     194              :     }
     195              : }
     196              : 
     197              : /* ── Text emission with word wrap ────────────────────────────────────── */
     198              : 
     199           44 : static void emit_wrap_nl(RS *rs) {
     200              :     /* remove trailing space before the newline */
     201           44 :     if (rs->len > 0 && rs->buf[rs->len-1] == ' ') {
     202            9 :         rs->len--; rs->buf[rs->len] = '\0'; rs->col--;
     203              :     }
     204           44 :     rs_push(rs, '\n');
     205           44 :     rs->col = 0;
     206           44 :     emit_bq_prefix(rs);
     207           44 : }
     208              : 
     209         1012 : static void emit_text(RS *rs, const char *text) {
     210         1012 :     if (!text || !*text || rs->skip) return;
     211         1012 :     flush_nl(rs);
     212         1012 :     emit_bq_prefix(rs);
     213              : 
     214         1012 :     if (rs->pre) {
     215          571 :         for (const char *p = text; *p; ) {
     216          537 :             if (*p == '\r') { p++; continue; }
     217          537 :             if (*p == '\n') {
     218            2 :                 rs_push(rs, '\n'); rs->col = 0;
     219            2 :                 emit_bq_prefix(rs);
     220            2 :                 p++; continue;
     221              :             }
     222          535 :             const char *s = p;
     223          535 :             rs->col += html_medium_char_width(utf8_adv(&p));
     224          535 :             rs_write(rs, s, (int)(p - s));
     225              :         }
     226           34 :         return;
     227              :     }
     228              : 
     229              :     /* Normal mode: word-wrap */
     230         3275 :     for (const char *p = text; *p; ) {
     231         2297 :         if (isspace((unsigned char)*p)) {
     232              :             /* Collapse whitespace; only emit space if not at line start */
     233          836 :             int at_start = (rs->col <= rs->bq * 2);
     234          836 :             int already_space = (rs->len > 0 && rs->buf[rs->len-1] == ' ');
     235          836 :             if (!at_start && !already_space) {
     236          797 :                 rs_push(rs, ' ');
     237          797 :                 rs->col++;
     238              :             }
     239         1704 :             while (*p && isspace((unsigned char)*p)) p++;
     240          836 :             continue;
     241              :         }
     242              : 
     243              :         /* Collect one word */
     244         1461 :         const char *ws = p;
     245         1461 :         int ww = 0;
     246         8439 :         while (*p && !isspace((unsigned char)*p)) {
     247         6978 :             const char *q = p;
     248         6978 :             ww += html_medium_char_width(utf8_adv(&p));
     249              :             (void)q;
     250              :         }
     251         1461 :         int wlen = (int)(p - ws);
     252              : 
     253              :         /* URL tokens (http://, https://, ftp://, mailto:) are always placed
     254              :          * on their own line so terminal URL-recognition works reliably.
     255              :          * They are never broken regardless of width. */
     256         1878 :         int is_url = (wlen >= 6) && (
     257          417 :             strncmp(ws, "http://",  7) == 0 ||
     258          414 :             strncmp(ws, "https://", 8) == 0 ||
     259          372 :             strncmp(ws, "ftp://",   6) == 0 ||
     260          372 :             strncmp(ws, "mailto:",  7) == 0);
     261              : 
     262         1461 :         if (is_url) {
     263           45 :             if (rs->col > rs->bq * 2) emit_wrap_nl(rs);  /* start own line */
     264           45 :             esc(rs, "\033[34m");        /* blue */
     265           45 :             rs_write(rs, ws, wlen);
     266           45 :             esc(rs, "\033[39m");        /* reset fg */
     267           45 :             rs->col += ww;
     268           45 :             rs_push(rs, '\n');          /* trailing newline: next content fresh line */
     269           45 :             rs->col = 0;
     270           45 :             emit_bq_prefix(rs);
     271           45 :             continue;
     272              :         }
     273              : 
     274              :         /* Wrap if needed (never wrap an otherwise-empty line) */
     275         1416 :         if (rs->width > 0 && rs->col > rs->bq * 2 && rs->col + ww > rs->width)
     276            4 :             emit_wrap_nl(rs);
     277              : 
     278         1416 :         rs_write(rs, ws, wlen);
     279         1416 :         rs->col += ww;
     280              :     }
     281              : }
     282              : 
     283              : /* ── Whitespace helpers ──────────────────────────────────────────────── */
     284              : 
     285              : /** Returns 1 if s contains only ASCII whitespace, U+00A0 nbsp, U+200C zwnj,
     286              :  *  U+200D zwj, or U+00AD soft-hyphen — i.e. invisible/non-printing content. */
     287           37 : static int is_blank_str(const char *s) {
     288           37 :     const unsigned char *p = (const unsigned char *)s;
     289           41 :     while (*p) {
     290           38 :         if (*p <= ' ')                                    { p++;   continue; }
     291           36 :         if (p[0]==0xC2 && p[1]==0xA0)                    { p+=2;  continue; } /* nbsp  */
     292           35 :         if (p[0]==0xC2 && p[1]==0xAD)                    { p+=2;  continue; } /* shy   */
     293           35 :         if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8C)      { p+=3;  continue; } /* zwnj  */
     294           34 :         if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8D)      { p+=3;  continue; } /* zwj   */
     295           34 :         return 0;
     296              :     }
     297            3 :     return 1;
     298              : }
     299              : 
     300              : /** Collapses runs of >1 consecutive blank lines to exactly one blank line.
     301              :  *  Also trims trailing ASCII/nbsp/zwnj whitespace from each line.
     302              :  *  Preserves up to one trailing blank line.
     303              :  *  Takes ownership of s (frees it); returns a new heap string (or s on OOM). */
     304          192 : static char *compact_lines(char *s) {
     305          192 :     if (!s) return s;
     306          192 :     size_t n = strlen(s);
     307          192 :     char *out = malloc(n + 1);
     308          192 :     if (!out) return s;
     309              : 
     310          192 :     const unsigned char *p = (const unsigned char *)s;
     311          192 :     char *q = out;
     312          192 :     int blank_pending = 0;  /* at most 1: whether a blank line is pending */
     313          192 :     int have_content  = 0;
     314              : 
     315          989 :     while (*p) {
     316          797 :         const unsigned char *ls = p;
     317        17253 :         while (*p && *p != '\n') p++;
     318          797 :         int had_nl = (*p == '\n');
     319          797 :         const unsigned char *le = p;
     320          797 :         if (had_nl) p++;
     321              : 
     322              :         /* Trim trailing invisible chars (ASCII ws, nbsp C2A0, shy C2AD, zwnj/zwj E2808C/8D) */
     323          832 :         while (le > ls) {
     324          725 :             if (le[-1] <= ' ')                                              { le--;   continue; }
     325          690 :             if (le>=ls+2 && le[-2]==0xC2 && (le[-1]==0xA0||le[-1]==0xAD)) { le-=2;  continue; }
     326          690 :             if (le>=ls+3 && le[-3]==0xE2 && le[-2]==0x80 &&
     327            0 :                 (le[-1]==0x8C||le[-1]==0x8D))                              { le-=3;  continue; }
     328          690 :             break;
     329              :         }
     330              : 
     331          797 :         if (le == ls) {  /* blank line */
     332          107 :             if (had_nl) blank_pending = 1;
     333              :         } else {         /* non-blank line */
     334          690 :             if (blank_pending && have_content) *q++ = '\n';
     335          690 :             blank_pending = 0;
     336          690 :             have_content = 1;
     337          690 :             memcpy(q, ls, (size_t)(le - ls));
     338          690 :             q += (size_t)(le - ls);
     339          690 :             if (had_nl) *q++ = '\n';
     340              :         }
     341              :     }
     342              :     /* Preserve at most one trailing blank line */
     343          192 :     if (blank_pending) *q++ = '\n';
     344              : 
     345          192 :     *q = '\0';
     346          192 :     free(s);
     347          192 :     return out;
     348              : }
     349              : 
     350              : /* ── Tag open / close ────────────────────────────────────────────────── */
     351              : 
     352              : static void traverse(RS *rs, const HtmlNode *node);
     353              : 
     354         1119 : static void tag_open(RS *rs, const HtmlNode *node) {
     355         1119 :     if (!node->tag) return;
     356         1119 :     const char *t = node->tag;
     357              : 
     358              :     /* Inline styles (applied before tag-specific behavior) */
     359         1119 :     const char *style = html_attr_get(node, "style");
     360         1119 :     if (style) parse_style(rs, style);
     361              : 
     362         1119 :     if      (!strcmp(t,"b")||!strcmp(t,"strong"))  open_bold(rs);
     363         1059 :     else if (!strcmp(t,"i")||!strcmp(t,"em"))       open_italic(rs);
     364         1010 :     else if (!strcmp(t,"u"))                        open_uline(rs);
     365          968 :     else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[9m");
     366          896 :     else if (!strcmp(t,"br"))                       req_nl(rs, 1);
     367          895 :     else if (!strcmp(t,"hr")) {
     368           33 :         block_open(rs); flush_nl(rs); emit_bq_prefix(rs);
     369           33 :         int w = rs->width > 0 ? rs->width - rs->col : 20;
     370         2605 :         for (int i = 0; i < w; i++) rs_push(rs, '-');
     371           33 :         rs->col += w;
     372           33 :         block_close(rs);
     373              :     }
     374          862 :     else if (!strcmp(t,"p"))                        para_open(rs);
     375          790 :     else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
     376          724 :              !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
     377          790 :              !strcmp(t,"nav")||!strcmp(t,"aside"))  block_open(rs);
     378          724 :     else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
     379           34 :         para_open(rs); open_bold(rs);
     380              :     }
     381          690 :     else if (!strcmp(t,"ul")) {
     382           36 :         if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){0, 0};
     383           36 :         block_open(rs);
     384              :     }
     385          654 :     else if (!strcmp(t,"ol")) {
     386           34 :         if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){1, 0};
     387           34 :         block_open(rs);
     388              :     }
     389          620 :     else if (!strcmp(t,"li")) {
     390          136 :         block_open(rs);
     391          136 :         flush_nl(rs);
     392          136 :         emit_bq_prefix(rs);
     393          136 :         if (rs->list_top > 0 && rs->lists[rs->list_top-1].is_ol) {
     394           67 :             rs->lists[rs->list_top-1].cnt++;
     395              :             char buf[16];
     396           67 :             int n = snprintf(buf, sizeof(buf), "%d. ", rs->lists[rs->list_top-1].cnt);
     397           67 :             rs_str(rs, buf);
     398           67 :             rs->col += n;
     399              :         } else {
     400              :             /* U+2022 BULLET: UTF-8 E2 80 A2 */
     401           69 :             rs_push(rs,(char)0xE2); rs_push(rs,(char)0x80); rs_push(rs,(char)0xA2);
     402           69 :             rs_push(rs, ' ');
     403           69 :             rs->col += 2; /* bullet=1 col, space=1 col */
     404              :         }
     405              :     }
     406          484 :     else if (!strcmp(t,"blockquote")) {
     407           35 :         block_open(rs);
     408           35 :         rs->bq++;
     409              :         /* if something was already on the line, start fresh */
     410           35 :         if (rs->col > 0) { flush_nl(rs); } else emit_bq_prefix(rs);
     411              :     }
     412          449 :     else if (!strcmp(t,"pre"))    { block_open(rs); rs->pre++; }
     413          415 :     else if (!strcmp(t,"code"))   { /* inline: no special formatting */ }
     414          415 :     else if (!strcmp(t,"img")) {
     415           37 :         const char *alt = html_attr_get(node, "alt");
     416           37 :         if (alt && *alt && !is_blank_str(alt)) {
     417           34 :             flush_nl(rs); emit_bq_prefix(rs);
     418           34 :             rs_push(rs, '[');
     419           34 :             rs_str(rs, alt);
     420           34 :             rs_push(rs, ']');
     421           34 :             rs->col += 2 + str_vis_width(alt);
     422              :         }
     423              :     }
     424          378 :     else if (!strcmp(t,"a")) {
     425           45 :         const char *href = html_attr_get(node, "href");
     426           45 :         if (href && *href && href[0] != '#' && strncmp(href, "javascript:", 11) != 0)
     427           41 :             if (rs->ansi) { esc(rs, "\033[34m"); rs->color_fg++; }
     428              :     }
     429          333 :     else if (!strcmp(t,"td")||!strcmp(t,"th")) {
     430           79 :         if (rs->col > rs->bq * 2) { rs_push(rs, '\t'); rs->col++; }
     431              :     }
     432          254 :     else if (!strcmp(t,"tr")||!strcmp(t,"table")) block_open(rs);
     433          172 :     else if (!strcmp(t,"script")||!strcmp(t,"style")) rs->skip++;
     434          172 :     else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_open(rs);
     435          172 :     else if (!strcmp(t,"input")) {
     436            1 :         const char *val = html_attr_get(node, "value");
     437            1 :         if (val && *val) {
     438            1 :             flush_nl(rs); emit_bq_prefix(rs);
     439            1 :             rs_str(rs, val);
     440            1 :             rs->col += str_vis_width(val);
     441              :         }
     442              :     }
     443              :     /* __root__ and unknown tags: traverse children unchanged */
     444              : }
     445              : 
     446         1119 : static void tag_close(RS *rs, const HtmlNode *node) {
     447         1119 :     if (!node->tag) return;
     448         1119 :     const char *t = node->tag;
     449              : 
     450         1119 :     if      (!strcmp(t,"b")||!strcmp(t,"strong"))  close_bold(rs);
     451         1059 :     else if (!strcmp(t,"i")||!strcmp(t,"em"))       close_italic(rs);
     452         1010 :     else if (!strcmp(t,"u"))                        close_uline(rs);
     453          968 :     else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[29m");
     454          896 :     else if (!strcmp(t,"p"))                        para_close(rs);
     455          824 :     else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
     456          758 :              !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
     457          824 :              !strcmp(t,"nav")||!strcmp(t,"aside"))  block_close(rs);
     458          758 :     else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
     459           34 :         close_bold(rs); para_close(rs);
     460              :     }
     461          724 :     else if (!strcmp(t,"ul")||!strcmp(t,"ol")) {
     462           70 :         if (rs->list_top > 0) rs->list_top--;
     463           70 :         block_close(rs);
     464              :     }
     465          654 :     else if (!strcmp(t,"li"))  req_nl(rs, 1);
     466          518 :     else if (!strcmp(t,"blockquote")) {
     467           35 :         if (rs->bq > 0) rs->bq--;
     468           35 :         block_close(rs);
     469              :     }
     470          483 :     else if (!strcmp(t,"pre"))  { if (rs->pre>0) rs->pre--; block_close(rs); }
     471          449 :     else if (!strcmp(t,"a")) {
     472           45 :         const char *href = html_attr_get(node, "href");
     473           45 :         if (href && *href && href[0] != '#' &&
     474           42 :             strncmp(href, "javascript:", 11) != 0) {
     475           41 :             if (rs->ansi && rs->color_fg > 0) { esc(rs, "\033[39m"); rs->color_fg--; }
     476           41 :             esc(rs, "\033[34m");
     477           41 :             emit_text(rs, href);
     478           41 :             esc(rs, "\033[39m");
     479              :         }
     480              :     }
     481          404 :     else if (!strcmp(t,"script")||!strcmp(t,"style")) { if (rs->skip>0) rs->skip--; }
     482          404 :     else if (!strcmp(t,"tr"))   req_nl(rs, 1);
     483          363 :     else if (!strcmp(t,"table")) block_close(rs);
     484          322 :     else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_close(rs);
     485              : }
     486              : 
     487              : /* ── Tree traversal ──────────────────────────────────────────────────── */
     488              : 
     489         2090 : static void traverse(RS *rs, const HtmlNode *node) {
     490         2090 :     if (!node) return;
     491         2090 :     if (node->type == HTML_NODE_TEXT) {
     492          971 :         emit_text(rs, node->text);
     493          971 :         return;
     494              :     }
     495              :     /* Snapshot style depth so parse_style side-effects from inline
     496              :      * style= attributes are balanced even when tag_close has no handler
     497              :      * for this tag (e.g. <a>, <span>, <td> with style="…"). */
     498         1119 :     int bold_sv     = rs->bold;
     499         1119 :     int italic_sv   = rs->italic;
     500         1119 :     int uline_sv    = rs->uline;
     501         1119 :     int color_fg_sv = rs->color_fg;
     502         1119 :     int color_bg_sv = rs->color_bg;
     503         1119 :     tag_open(rs, node);
     504         2907 :     for (const HtmlNode *c = node->first_child; c; c = c->next_sibling)
     505         1788 :         traverse(rs, c);
     506         1119 :     tag_close(rs, node);
     507              :     /* Close any depth-tracked style that tag_close left open */
     508         1155 :     while (rs->uline    > uline_sv)    close_uline(rs);
     509         1150 :     while (rs->italic   > italic_sv)   close_italic(rs);
     510         1154 :     while (rs->bold     > bold_sv)     close_bold(rs);
     511         1119 :     if (rs->ansi) {
     512          800 :         if (rs->color_fg > color_fg_sv) { esc(rs, "\033[39m"); rs->color_fg = color_fg_sv; }
     513          800 :         if (rs->color_bg > color_bg_sv) { esc(rs, "\033[49m"); rs->color_bg = color_bg_sv; }
     514              :     }
     515              : }
     516              : 
     517              : /* ── Public API ──────────────────────────────────────────────────────── */
     518              : 
     519          199 : char *html_render(const char *html, int width, int ansi) {
     520          199 :     if (!html) return NULL;
     521              : 
     522          198 :     HtmlNode *root = html_parse(html);
     523          198 :     if (!root) {
     524            0 :         char *empty = malloc(1);
     525            0 :         if (empty) *empty = '\0';
     526            0 :         return empty;
     527              :     }
     528              : 
     529              :     RS rs;
     530          198 :     memset(&rs, 0, sizeof(rs));
     531          198 :     rs.width = width;
     532          198 :     rs.ansi  = ansi;
     533              : 
     534              :     /* Traverse root's children (root itself is synthetic __root__) */
     535          500 :     for (const HtmlNode *c = root->first_child; c; c = c->next_sibling)
     536          302 :         traverse(&rs, c);
     537              : 
     538              :     /* Flush trailing newlines */
     539          198 :     flush_nl(&rs);
     540              : 
     541          198 :     html_node_free(root);
     542              : 
     543          198 :     if (!rs.buf) {
     544            6 :         char *empty = malloc(1);
     545            6 :         if (empty) *empty = '\0';
     546            6 :         return empty;
     547              :     }
     548          192 :     return compact_lines(rs.buf);
     549              : }
        

Generated by: LCOV version 2.0-1