LCOV - code coverage report
Current view: top level - libemail/src/core - html_render.c (source / functions) Coverage Total Hit
Test: coverage.info Lines: 98.5 % 341 336
Test Date: 2026-04-15 21:12:52 Functions: 100.0 % 30 30

            Line data    Source code
       1              : #include "html_render.h"
       2              : #include "html_parser.h"
       3              : #include "html_medium.h"
       4              : #include <stdlib.h>
       5              : #include <string.h>
       6              : #include <stdio.h>
       7              : #include <ctype.h>
       8              : #include <stdint.h>
       9              : 
      10              : /* ── List stack ──────────────────────────────────────────────────────── */
      11              : 
      12              : #define LIST_MAX 16
      13              : 
      14              : typedef struct { int is_ol; int cnt; } ListFrame;
      15              : 
      16              : /* ── Render state ────────────────────────────────────────────────────── */
      17              : 
      18              : typedef struct {
      19              :     char     *buf;
      20              :     size_t    len, cap;
      21              :     int       col;         /* visible column (ANSI not counted) */
      22              :     int       width;       /* wrap width; 0 = no wrap */
      23              :     int       ansi;        /* emit ANSI escapes? */
      24              :     int       bold;        /* depth counter */
      25              :     int       italic;
      26              :     int       uline;
      27              :     int       color_fg;    /* depth: foreground color set by parse_style */
      28              :     int       color_bg;    /* depth: background color set by parse_style */
      29              :     int       skip;        /* depth: no output (script/style) */
      30              :     int       pre;         /* depth: no wrap */
      31              :     int       pending_nl;  /* buffered newlines to emit: 0, 1, or 2 */
      32              :     int       bq;          /* blockquote depth */
      33              :     ListFrame lists[LIST_MAX];
      34              :     int       list_top;
      35              : } RS;
      36              : 
      37              : /* ── Buffer ──────────────────────────────────────────────────────────── */
      38              : 
      39         3362 : static void rs_push(RS *rs, char c) {
      40         3362 :     if (rs->len + 2 > rs->cap) {
      41          161 :         size_t nc = rs->cap ? rs->cap * 2 : 512;
      42          161 :         char *t = realloc(rs->buf, nc);
      43          161 :         if (!t) return;
      44          161 :         rs->buf = t; rs->cap = nc;
      45              :     }
      46         3362 :     rs->buf[rs->len++] = c;
      47         3362 :     rs->buf[rs->len]   = '\0';
      48              : }
      49         1863 : static void rs_str(RS *rs, const char *s) { for (; *s; s++) rs_push(rs, *s); }
      50          408 : static void rs_write(RS *rs, const char *s, int n)
      51         1856 :     { for (int i = 0; i < n; i++) rs_push(rs, s[i]); }
      52              : 
      53              : /* ── UTF-8 helpers ───────────────────────────────────────────────────── */
      54              : 
      55         1445 : static uint32_t utf8_adv(const char **p) {
      56         1445 :     unsigned char c = (unsigned char)**p;
      57              :     uint32_t cp; int ex;
      58         1445 :     if      (c < 0x80) { cp = c;        ex = 0; }
      59            8 :     else if (c < 0xC0) { (*p)++; return 0xFFFD; }
      60            8 :     else if (c < 0xE0) { cp = c & 0x1F; ex = 1; }
      61            4 :     else if (c < 0xF0) { cp = c & 0x0F; ex = 2; }
      62            1 :     else               { cp = c & 0x07; ex = 3; }
      63         1445 :     (*p)++;
      64         1458 :     for (int i = 0; i < ex; i++) {
      65           13 :         if ((**p & 0xC0) != 0x80) return 0xFFFD;
      66           13 :         cp = (cp << 6) | (**p & 0x3F); (*p)++;
      67              :     }
      68         1445 :     return cp;
      69              : }
      70              : 
      71            3 : static int str_vis_width(const char *s) {
      72            3 :     int w = 0;
      73           15 :     while (*s) {
      74           12 :         if ((unsigned char)*s == 0x1B && s[1] == '[') {
      75            4 :             s += 2; while (*s && *s != 'm') s++; if (*s) s++;
      76            2 :             continue;
      77              :         }
      78           10 :         w += html_medium_char_width(utf8_adv(&s));
      79              :     }
      80            3 :     return w;
      81              : }
      82              : 
      83              : /* ── Newline / prefix management ─────────────────────────────────────── */
      84              : 
      85          410 : static void emit_bq_prefix(RS *rs) {
      86          410 :     if (rs->col == 0 && rs->bq > 0) {
      87            6 :         for (int i = 0; i < rs->bq; i++) { rs_push(rs, '>'); rs_push(rs, ' '); }
      88            3 :         rs->col = rs->bq * 2;
      89              :     }
      90          410 : }
      91              : 
      92          459 : static void flush_nl(RS *rs) {
      93          459 :     if (!rs->pending_nl) return;
      94              :     /* Count consecutive newlines already at the end of the buffer so we
      95              :      * never accumulate more than pending_nl in a row. */
      96           92 :     int trailing = 0;
      97           92 :     for (int i = (int)rs->len - 1; i >= 0 && rs->buf[i] == '\n'; i--)
      98            0 :         trailing++;
      99          194 :     for (int i = trailing; i < rs->pending_nl; i++) rs_push(rs, '\n');
     100           92 :     rs->col = 0;
     101           92 :     rs->pending_nl = 0;
     102           92 :     emit_bq_prefix(rs);
     103              : }
     104              : 
     105          127 : static void req_nl(RS *rs, int n) {
     106          127 :     if (rs->pending_nl < n) rs->pending_nl = n;
     107          127 : }
     108           72 : static void block_open(RS *rs)  { if (rs->len > 0) req_nl(rs, 1); }
     109           55 : static void block_close(RS *rs) { req_nl(rs, 1); }
     110           10 : static void para_open(RS *rs)   { if (rs->len > 0) req_nl(rs, 2); }
     111           10 : static void para_close(RS *rs)  { req_nl(rs, 2); }
     112              : 
     113              : /* ── ANSI helpers ────────────────────────────────────────────────────── */
     114              : 
     115          242 : static void esc(RS *rs, const char *e) { if (rs->ansi) rs_str(rs, e); }
     116           34 : static void open_bold(RS *rs)    { if (rs->bold++   == 0) esc(rs, "\033[1m");  }
     117           47 : static void close_bold(RS *rs)   { if (--rs->bold   == 0) esc(rs, "\033[22m"); }
     118           17 : static void open_italic(RS *rs)  { if (rs->italic++ == 0) esc(rs, "\033[3m");  }
     119           26 : static void close_italic(RS *rs) { if (--rs->italic == 0) esc(rs, "\033[23m"); }
     120           10 : static void open_uline(RS *rs)   { if (rs->uline++  == 0) esc(rs, "\033[4m");  }
     121           24 : static void close_uline(RS *rs)  { if (--rs->uline  == 0) esc(rs, "\033[24m"); }
     122              : 
     123              : /* ── Inline CSS ──────────────────────────────────────────────────────── */
     124              : 
     125          213 : static int hex_val(char c) {
     126          213 :     if (c>='0'&&c<='9') return c-'0';
     127           38 :     if (c>='a'&&c<='f') return c-'a'+10;
     128           38 :     if (c>='A'&&c<='F') return c-'A'+10;
     129            6 :     return 0;
     130              : }
     131              : static const struct { const char *name; int r,g,b; } CSS_COLORS[] = {
     132              :     {"black",0,0,0},{"silver",192,192,192},{"gray",128,128,128},
     133              :     {"white",255,255,255},{"maroon",128,0,0},{"red",255,0,0},
     134              :     {"purple",128,0,128},{"fuchsia",255,0,255},{"green",0,128,0},
     135              :     {"lime",0,255,0},{"olive",128,128,0},{"yellow",255,255,0},
     136              :     {"navy",0,0,128},{"blue",0,0,255},{"teal",0,128,128},{"aqua",0,255,255},
     137              :     {NULL,0,0,0}
     138              : };
     139           83 : static void apply_color(RS *rs, const char *v, int fg) {
     140           83 :     if (!rs->ansi) return;
     141              :     /* Background colors are never emitted: they break dark-theme terminals
     142              :      * and produce unreadable combinations when the email author's palette
     143              :      * does not match the user's terminal theme. */
     144           83 :     if (!fg) return;
     145           61 :     int r=-1,g=-1,b=-1;
     146           61 :     while (*v==' ') v++;
     147           61 :     if (*v=='#') {
     148           42 :         v++;
     149           42 :         size_t len=strlen(v); while(len>0&&v[len-1]==' ') len--;
     150           42 :         if (len==6) {
     151           29 :             r=hex_val(v[0])*16+hex_val(v[1]);
     152           29 :             g=hex_val(v[2])*16+hex_val(v[3]);
     153           29 :             b=hex_val(v[4])*16+hex_val(v[5]);
     154           13 :         } else if (len==3) {
     155           13 :             r=hex_val(v[0])*17; g=hex_val(v[1])*17; b=hex_val(v[2])*17;
     156              :         }
     157              :     } else {
     158          147 :         for (int i=0; CSS_COLORS[i].name; i++) {
     159          147 :             size_t nl=strlen(CSS_COLORS[i].name);
     160          147 :             if (strncasecmp(v, CSS_COLORS[i].name, nl)==0) {
     161           19 :                 r=CSS_COLORS[i].r; g=CSS_COLORS[i].g; b=CSS_COLORS[i].b; break;
     162              :             }
     163              :         }
     164              :     }
     165           61 :     if (r<0) return;
     166              :     /* Suppress dark foreground colors (max component < 160): they are
     167              :      * unreadable on dark-theme terminals and common in newsletter HTML
     168              :      * (e.g. #333, #666, gray).  Only bright colours are emitted. */
     169           61 :     int mx = r > g ? (r > b ? r : b) : (g > b ? g : b);
     170           61 :     if (mx < 160) return;
     171           34 :     char e[32]; snprintf(e,sizeof(e),"\033[38;2;%d;%d;%dm",r,g,b);
     172           34 :     rs_str(rs, e);
     173           34 :     rs->color_fg++;
     174              : }
     175           95 : static void parse_style(RS *rs, const char *style) {
     176           95 :     if (!style || !rs->ansi) return;
     177           94 :     const char *p = style;
     178          213 :     while (*p) {
     179          119 :         while (*p==' '||*p=='\t') p++;
     180         1219 :         const char *ps=p; while(*p&&*p!=':') p++;
     181          119 :         if (!*p) break;
     182          119 :         size_t pl=(size_t)(p-ps); while(pl>0&&ps[pl-1]==' ') pl--;
     183          119 :         p++; while(*p==' ') p++;
     184          807 :         const char *vs=p; while(*p&&*p!=';') p++;
     185          119 :         size_t vl=(size_t)(p-vs); if(*p==';') p++;
     186          119 :         char prop[32]={0},val[64]={0};
     187          119 :         if(pl<sizeof(prop)) memcpy(prop,ps,pl);
     188          119 :         if(vl>0&&vl<sizeof(val)) memcpy(val,vs,vl);
     189          119 :         if      (!strcasecmp(prop,"font-weight")&&!strncasecmp(val,"bold",4))      { esc(rs,"\033[1m"); rs->bold++; }
     190          106 :         else if (!strcasecmp(prop,"font-style")&&!strncasecmp(val,"italic",6))     { esc(rs,"\033[3m"); rs->italic++; }
     191           97 :         else if (!strcasecmp(prop,"text-decoration")&&!strncasecmp(val,"underline",9)){ esc(rs,"\033[4m"); rs->uline++; }
     192           83 :         else if (!strcasecmp(prop,"color"))           apply_color(rs,val,1);
     193           22 :         else if (!strcasecmp(prop,"background-color")) apply_color(rs,val,0);
     194              :     }
     195              : }
     196              : 
     197              : /* ── Text emission with word wrap ────────────────────────────────────── */
     198              : 
     199           11 : static void emit_wrap_nl(RS *rs) {
     200              :     /* remove trailing space before the newline */
     201           11 :     if (rs->len > 0 && rs->buf[rs->len-1] == ' ') {
     202            9 :         rs->len--; rs->buf[rs->len] = '\0'; rs->col--;
     203              :     }
     204           11 :     rs_push(rs, '\n');
     205           11 :     rs->col = 0;
     206           11 :     emit_bq_prefix(rs);
     207           11 : }
     208              : 
     209          280 : static void emit_text(RS *rs, const char *text) {
     210          280 :     if (!text || !*text || rs->skip) return;
     211          280 :     flush_nl(rs);
     212          280 :     emit_bq_prefix(rs);
     213              : 
     214          280 :     if (rs->pre) {
     215           27 :         for (const char *p = text; *p; ) {
     216           25 :             if (*p == '\r') { p++; continue; }
     217           25 :             if (*p == '\n') {
     218            2 :                 rs_push(rs, '\n'); rs->col = 0;
     219            2 :                 emit_bq_prefix(rs);
     220            2 :                 p++; continue;
     221              :             }
     222           23 :             const char *s = p;
     223           23 :             rs->col += html_medium_char_width(utf8_adv(&p));
     224           23 :             rs_write(rs, s, (int)(p - s));
     225              :         }
     226            2 :         return;
     227              :     }
     228              : 
     229              :     /* Normal mode: word-wrap */
     230          811 :     for (const char *p = text; *p; ) {
     231          533 :         if (isspace((unsigned char)*p)) {
     232              :             /* Collapse whitespace; only emit space if not at line start */
     233          148 :             int at_start = (rs->col <= rs->bq * 2);
     234          148 :             int already_space = (rs->len > 0 && rs->buf[rs->len-1] == ' ');
     235          148 :             if (!at_start && !already_space) {
     236          141 :                 rs_push(rs, ' ');
     237          141 :                 rs->col++;
     238              :             }
     239          300 :             while (*p && isspace((unsigned char)*p)) p++;
     240          148 :             continue;
     241              :         }
     242              : 
     243              :         /* Collect one word */
     244          385 :         const char *ws = p;
     245          385 :         int ww = 0;
     246         1797 :         while (*p && !isspace((unsigned char)*p)) {
     247         1412 :             const char *q = p;
     248         1412 :             ww += html_medium_char_width(utf8_adv(&p));
     249              :             (void)q;
     250              :         }
     251          385 :         int wlen = (int)(p - ws);
     252              : 
     253              :         /* URL tokens (http://, https://, ftp://, mailto:) are always placed
     254              :          * on their own line so terminal URL-recognition works reliably.
     255              :          * They are never broken regardless of width. */
     256          451 :         int is_url = (wlen >= 6) && (
     257           66 :             strncmp(ws, "http://",  7) == 0 ||
     258           63 :             strncmp(ws, "https://", 8) == 0 ||
     259           56 :             strncmp(ws, "ftp://",   6) == 0 ||
     260           56 :             strncmp(ws, "mailto:",  7) == 0);
     261              : 
     262          385 :         if (is_url) {
     263           10 :             if (rs->col > rs->bq * 2) emit_wrap_nl(rs);  /* start own line */
     264           10 :             rs_write(rs, ws, wlen);
     265           10 :             rs->col += ww;
     266           10 :             rs_push(rs, '\n');          /* trailing newline: next content fresh line */
     267           10 :             rs->col = 0;
     268           10 :             emit_bq_prefix(rs);
     269           10 :             continue;
     270              :         }
     271              : 
     272              :         /* Wrap if needed (never wrap an otherwise-empty line) */
     273          375 :         if (rs->width > 0 && rs->col > rs->bq * 2 && rs->col + ww > rs->width)
     274            4 :             emit_wrap_nl(rs);
     275              : 
     276          375 :         rs_write(rs, ws, wlen);
     277          375 :         rs->col += ww;
     278              :     }
     279              : }
     280              : 
     281              : /* ── Whitespace helpers ──────────────────────────────────────────────── */
     282              : 
     283              : /** Returns 1 if s contains only ASCII whitespace, U+00A0 nbsp, U+200C zwnj,
     284              :  *  U+200D zwj, or U+00AD soft-hyphen — i.e. invisible/non-printing content. */
     285            5 : static int is_blank_str(const char *s) {
     286            5 :     const unsigned char *p = (const unsigned char *)s;
     287            9 :     while (*p) {
     288            6 :         if (*p <= ' ')                                    { p++;   continue; }
     289            4 :         if (p[0]==0xC2 && p[1]==0xA0)                    { p+=2;  continue; } /* nbsp  */
     290            3 :         if (p[0]==0xC2 && p[1]==0xAD)                    { p+=2;  continue; } /* shy   */
     291            3 :         if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8C)      { p+=3;  continue; } /* zwnj  */
     292            2 :         if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8D)      { p+=3;  continue; } /* zwj   */
     293            2 :         return 0;
     294              :     }
     295            3 :     return 1;
     296              : }
     297              : 
     298              : /** Collapses runs of >1 consecutive blank lines to exactly one blank line.
     299              :  *  Also trims trailing ASCII/nbsp/zwnj whitespace from each line.
     300              :  *  Preserves up to one trailing blank line.
     301              :  *  Takes ownership of s (frees it); returns a new heap string (or s on OOM). */
     302          161 : static char *compact_lines(char *s) {
     303          161 :     if (!s) return s;
     304          161 :     size_t n = strlen(s);
     305          161 :     char *out = malloc(n + 1);
     306          161 :     if (!out) return s;
     307              : 
     308          161 :     const unsigned char *p = (const unsigned char *)s;
     309          161 :     char *q = out;
     310          161 :     int blank_pending = 0;  /* at most 1: whether a blank line is pending */
     311          161 :     int have_content  = 0;
     312              : 
     313          423 :     while (*p) {
     314          262 :         const unsigned char *ls = p;
     315         3490 :         while (*p && *p != '\n') p++;
     316          262 :         int had_nl = (*p == '\n');
     317          262 :         const unsigned char *le = p;
     318          262 :         if (had_nl) p++;
     319              : 
     320              :         /* Trim trailing invisible chars (ASCII ws, nbsp C2A0, shy C2AD, zwnj/zwj E2808C/8D) */
     321          269 :         while (le > ls) {
     322          258 :             if (le[-1] <= ' ')                                              { le--;   continue; }
     323          251 :             if (le>=ls+2 && le[-2]==0xC2 && (le[-1]==0xA0||le[-1]==0xAD)) { le-=2;  continue; }
     324          251 :             if (le>=ls+3 && le[-3]==0xE2 && le[-2]==0x80 &&
     325            0 :                 (le[-1]==0x8C||le[-1]==0x8D))                              { le-=3;  continue; }
     326          251 :             break;
     327              :         }
     328              : 
     329          262 :         if (le == ls) {  /* blank line */
     330           11 :             if (had_nl) blank_pending = 1;
     331              :         } else {         /* non-blank line */
     332          251 :             if (blank_pending && have_content) *q++ = '\n';
     333          251 :             blank_pending = 0;
     334          251 :             have_content = 1;
     335          251 :             memcpy(q, ls, (size_t)(le - ls));
     336          251 :             q += (size_t)(le - ls);
     337          251 :             if (had_nl) *q++ = '\n';
     338              :         }
     339              :     }
     340              :     /* Preserve at most one trailing blank line */
     341          161 :     if (blank_pending) *q++ = '\n';
     342              : 
     343          161 :     *q = '\0';
     344          161 :     free(s);
     345          161 :     return out;
     346              : }
     347              : 
     348              : /* ── Tag open / close ────────────────────────────────────────────────── */
     349              : 
     350              : static void traverse(RS *rs, const HtmlNode *node);
     351              : 
     352          302 : static void tag_open(RS *rs, const HtmlNode *node) {
     353          302 :     if (!node->tag) return;
     354          302 :     const char *t = node->tag;
     355              : 
     356              :     /* Inline styles (applied before tag-specific behavior) */
     357          302 :     const char *style = html_attr_get(node, "style");
     358          302 :     if (style) parse_style(rs, style);
     359              : 
     360          302 :     if      (!strcmp(t,"b")||!strcmp(t,"strong"))  open_bold(rs);
     361          270 :     else if (!strcmp(t,"i")||!strcmp(t,"em"))       open_italic(rs);
     362          253 :     else if (!strcmp(t,"u"))                        open_uline(rs);
     363          243 :     else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[9m");
     364          235 :     else if (!strcmp(t,"br"))                       req_nl(rs, 1);
     365          202 :     else if (!strcmp(t,"hr")) {
     366            1 :         block_open(rs); flush_nl(rs); emit_bq_prefix(rs);
     367            1 :         int w = rs->width > 0 ? rs->width - rs->col : 20;
     368           21 :         for (int i = 0; i < w; i++) rs_push(rs, '-');
     369            1 :         rs->col += w;
     370            1 :         block_close(rs);
     371              :     }
     372          201 :     else if (!strcmp(t,"p"))                        para_open(rs);
     373          193 :     else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
     374          159 :              !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
     375          193 :              !strcmp(t,"nav")||!strcmp(t,"aside"))  block_open(rs);
     376          159 :     else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
     377            2 :         para_open(rs); open_bold(rs);
     378              :     }
     379          157 :     else if (!strcmp(t,"ul")) {
     380            4 :         if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){0, 0};
     381            4 :         block_open(rs);
     382              :     }
     383          153 :     else if (!strcmp(t,"ol")) {
     384            2 :         if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){1, 0};
     385            2 :         block_open(rs);
     386              :     }
     387          151 :     else if (!strcmp(t,"li")) {
     388            8 :         block_open(rs);
     389            8 :         flush_nl(rs);
     390            8 :         emit_bq_prefix(rs);
     391           11 :         if (rs->list_top > 0 && rs->lists[rs->list_top-1].is_ol) {
     392            3 :             rs->lists[rs->list_top-1].cnt++;
     393            3 :             char buf[16];
     394            3 :             int n = snprintf(buf, sizeof(buf), "%d. ", rs->lists[rs->list_top-1].cnt);
     395            3 :             rs_str(rs, buf);
     396            3 :             rs->col += n;
     397              :         } else {
     398              :             /* U+2022 BULLET: UTF-8 E2 80 A2 */
     399            5 :             rs_push(rs,(char)0xE2); rs_push(rs,(char)0x80); rs_push(rs,(char)0xA2);
     400            5 :             rs_push(rs, ' ');
     401            5 :             rs->col += 2; /* bullet=1 col, space=1 col */
     402              :         }
     403              :     }
     404          143 :     else if (!strcmp(t,"blockquote")) {
     405            3 :         block_open(rs);
     406            3 :         rs->bq++;
     407              :         /* if something was already on the line, start fresh */
     408            3 :         if (rs->col > 0) { flush_nl(rs); } else emit_bq_prefix(rs);
     409              :     }
     410          140 :     else if (!strcmp(t,"pre"))    { block_open(rs); rs->pre++; }
     411          138 :     else if (!strcmp(t,"code"))   { /* inline: no special formatting */ }
     412          138 :     else if (!strcmp(t,"img")) {
     413            5 :         const char *alt = html_attr_get(node, "alt");
     414            5 :         if (alt && *alt && !is_blank_str(alt)) {
     415            2 :             flush_nl(rs); emit_bq_prefix(rs);
     416            2 :             rs_push(rs, '[');
     417            2 :             rs_str(rs, alt);
     418            2 :             rs_push(rs, ']');
     419            2 :             rs->col += 2 + str_vis_width(alt);
     420              :         }
     421              :     }
     422          133 :     else if (!strcmp(t,"a"))      { /* render children as-is */ }
     423          121 :     else if (!strcmp(t,"td")||!strcmp(t,"th")) {
     424           15 :         if (rs->col > rs->bq * 2) { rs_push(rs, '\t'); rs->col++; }
     425              :     }
     426          106 :     else if (!strcmp(t,"tr")||!strcmp(t,"table")) block_open(rs);
     427           88 :     else if (!strcmp(t,"script")||!strcmp(t,"style")) rs->skip++;
     428           88 :     else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_open(rs);
     429           88 :     else if (!strcmp(t,"input")) {
     430            1 :         const char *val = html_attr_get(node, "value");
     431            1 :         if (val && *val) {
     432            1 :             flush_nl(rs); emit_bq_prefix(rs);
     433            1 :             rs_str(rs, val);
     434            1 :             rs->col += str_vis_width(val);
     435              :         }
     436              :     }
     437              :     /* __root__ and unknown tags: traverse children unchanged */
     438              : }
     439              : 
     440          302 : static void tag_close(RS *rs, const HtmlNode *node) {
     441          302 :     if (!node->tag) return;
     442          302 :     const char *t = node->tag;
     443              : 
     444          302 :     if      (!strcmp(t,"b")||!strcmp(t,"strong"))  close_bold(rs);
     445          270 :     else if (!strcmp(t,"i")||!strcmp(t,"em"))       close_italic(rs);
     446          253 :     else if (!strcmp(t,"u"))                        close_uline(rs);
     447          243 :     else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[29m");
     448          235 :     else if (!strcmp(t,"p"))                        para_close(rs);
     449          227 :     else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
     450          193 :              !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
     451          227 :              !strcmp(t,"nav")||!strcmp(t,"aside"))  block_close(rs);
     452          193 :     else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
     453            2 :         close_bold(rs); para_close(rs);
     454              :     }
     455          191 :     else if (!strcmp(t,"ul")||!strcmp(t,"ol")) {
     456            6 :         if (rs->list_top > 0) rs->list_top--;
     457            6 :         block_close(rs);
     458              :     }
     459          185 :     else if (!strcmp(t,"li"))  req_nl(rs, 1);
     460          177 :     else if (!strcmp(t,"blockquote")) {
     461            3 :         if (rs->bq > 0) rs->bq--;
     462            3 :         block_close(rs);
     463              :     }
     464          174 :     else if (!strcmp(t,"pre"))  { if (rs->pre>0) rs->pre--; block_close(rs); }
     465          172 :     else if (!strcmp(t,"a")) {
     466           12 :         const char *href = html_attr_get(node, "href");
     467           12 :         if (href && *href && href[0] != '#' &&
     468            9 :             strncmp(href, "javascript:", 11) != 0)
     469            8 :             emit_text(rs, href);
     470              :     }
     471          160 :     else if (!strcmp(t,"script")||!strcmp(t,"style")) { if (rs->skip>0) rs->skip--; }
     472          160 :     else if (!strcmp(t,"tr"))   req_nl(rs, 1);
     473          151 :     else if (!strcmp(t,"table")) block_close(rs);
     474          142 :     else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_close(rs);
     475              : }
     476              : 
     477              : /* ── Tree traversal ──────────────────────────────────────────────────── */
     478              : 
     479          574 : static void traverse(RS *rs, const HtmlNode *node) {
     480          574 :     if (!node) return;
     481          574 :     if (node->type == HTML_NODE_TEXT) {
     482          272 :         emit_text(rs, node->text);
     483          272 :         return;
     484              :     }
     485              :     /* Snapshot style depth so parse_style side-effects from inline
     486              :      * style= attributes are balanced even when tag_close has no handler
     487              :      * for this tag (e.g. <a>, <span>, <td> with style="…"). */
     488          302 :     int bold_sv     = rs->bold;
     489          302 :     int italic_sv   = rs->italic;
     490          302 :     int uline_sv    = rs->uline;
     491          302 :     int color_fg_sv = rs->color_fg;
     492          302 :     int color_bg_sv = rs->color_bg;
     493          302 :     tag_open(rs, node);
     494          633 :     for (const HtmlNode *c = node->first_child; c; c = c->next_sibling)
     495          331 :         traverse(rs, c);
     496          302 :     tag_close(rs, node);
     497              :     /* Close any depth-tracked style that tag_close left open */
     498          316 :     while (rs->uline    > uline_sv)    close_uline(rs);
     499          311 :     while (rs->italic   > italic_sv)   close_italic(rs);
     500          315 :     while (rs->bold     > bold_sv)     close_bold(rs);
     501          302 :     if (rs->ansi) {
     502          205 :         if (rs->color_fg > color_fg_sv) { esc(rs, "\033[39m"); rs->color_fg = color_fg_sv; }
     503          205 :         if (rs->color_bg > color_bg_sv) { esc(rs, "\033[49m"); rs->color_bg = color_bg_sv; }
     504              :     }
     505              : }
     506              : 
     507              : /* ── Public API ──────────────────────────────────────────────────────── */
     508              : 
     509          168 : char *html_render(const char *html, int width, int ansi) {
     510          168 :     if (!html) return NULL;
     511              : 
     512          167 :     HtmlNode *root = html_parse(html);
     513          167 :     if (!root) {
     514            0 :         char *empty = malloc(1);
     515            0 :         if (empty) *empty = '\0';
     516            0 :         return empty;
     517              :     }
     518              : 
     519          167 :     RS rs;
     520          167 :     memset(&rs, 0, sizeof(rs));
     521          167 :     rs.width = width;
     522          167 :     rs.ansi  = ansi;
     523              : 
     524              :     /* Traverse root's children (root itself is synthetic __root__) */
     525          410 :     for (const HtmlNode *c = root->first_child; c; c = c->next_sibling)
     526          243 :         traverse(&rs, c);
     527              : 
     528              :     /* Flush trailing newlines */
     529          167 :     flush_nl(&rs);
     530              : 
     531          167 :     html_node_free(root);
     532              : 
     533          167 :     if (!rs.buf) {
     534            6 :         char *empty = malloc(1);
     535            6 :         if (empty) *empty = '\0';
     536            6 :         return empty;
     537              :     }
     538          161 :     return compact_lines(rs.buf);
     539              : }
        

Generated by: LCOV version 2.0-1