Line data Source code
1 : #include "html_render.h"
2 : #include "html_parser.h"
3 : #include "html_medium.h"
4 : #include <stdlib.h>
5 : #include <string.h>
6 : #include <stdio.h>
7 : #include <ctype.h>
8 : #include <stdint.h>
9 :
10 : /* ── List stack ──────────────────────────────────────────────────────── */
11 :
12 : #define LIST_MAX 16
13 :
14 : typedef struct { int is_ol; int cnt; } ListFrame;
15 :
16 : /* ── Render state ────────────────────────────────────────────────────── */
17 :
18 : typedef struct {
19 : char *buf;
20 : size_t len, cap;
21 : int col; /* visible column (ANSI not counted) */
22 : int width; /* wrap width; 0 = no wrap */
23 : int ansi; /* emit ANSI escapes? */
24 : int bold; /* depth counter */
25 : int italic;
26 : int uline;
27 : int color_fg; /* depth: foreground color set by parse_style */
28 : int color_bg; /* depth: background color set by parse_style */
29 : int skip; /* depth: no output (script/style) */
30 : int pre; /* depth: no wrap */
31 : int pending_nl; /* buffered newlines to emit: 0, 1, or 2 */
32 : int bq; /* blockquote depth */
33 : ListFrame lists[LIST_MAX];
34 : int list_top;
35 : } RS;
36 :
37 : /* ── Buffer ──────────────────────────────────────────────────────────── */
38 :
39 3362 : static void rs_push(RS *rs, char c) {
40 3362 : if (rs->len + 2 > rs->cap) {
41 161 : size_t nc = rs->cap ? rs->cap * 2 : 512;
42 161 : char *t = realloc(rs->buf, nc);
43 161 : if (!t) return;
44 161 : rs->buf = t; rs->cap = nc;
45 : }
46 3362 : rs->buf[rs->len++] = c;
47 3362 : rs->buf[rs->len] = '\0';
48 : }
49 1863 : static void rs_str(RS *rs, const char *s) { for (; *s; s++) rs_push(rs, *s); }
50 408 : static void rs_write(RS *rs, const char *s, int n)
51 1856 : { for (int i = 0; i < n; i++) rs_push(rs, s[i]); }
52 :
53 : /* ── UTF-8 helpers ───────────────────────────────────────────────────── */
54 :
55 1445 : static uint32_t utf8_adv(const char **p) {
56 1445 : unsigned char c = (unsigned char)**p;
57 : uint32_t cp; int ex;
58 1445 : if (c < 0x80) { cp = c; ex = 0; }
59 8 : else if (c < 0xC0) { (*p)++; return 0xFFFD; }
60 8 : else if (c < 0xE0) { cp = c & 0x1F; ex = 1; }
61 4 : else if (c < 0xF0) { cp = c & 0x0F; ex = 2; }
62 1 : else { cp = c & 0x07; ex = 3; }
63 1445 : (*p)++;
64 1458 : for (int i = 0; i < ex; i++) {
65 13 : if ((**p & 0xC0) != 0x80) return 0xFFFD;
66 13 : cp = (cp << 6) | (**p & 0x3F); (*p)++;
67 : }
68 1445 : return cp;
69 : }
70 :
71 3 : static int str_vis_width(const char *s) {
72 3 : int w = 0;
73 15 : while (*s) {
74 12 : if ((unsigned char)*s == 0x1B && s[1] == '[') {
75 4 : s += 2; while (*s && *s != 'm') s++; if (*s) s++;
76 2 : continue;
77 : }
78 10 : w += html_medium_char_width(utf8_adv(&s));
79 : }
80 3 : return w;
81 : }
82 :
83 : /* ── Newline / prefix management ─────────────────────────────────────── */
84 :
85 410 : static void emit_bq_prefix(RS *rs) {
86 410 : if (rs->col == 0 && rs->bq > 0) {
87 6 : for (int i = 0; i < rs->bq; i++) { rs_push(rs, '>'); rs_push(rs, ' '); }
88 3 : rs->col = rs->bq * 2;
89 : }
90 410 : }
91 :
92 459 : static void flush_nl(RS *rs) {
93 459 : if (!rs->pending_nl) return;
94 : /* Count consecutive newlines already at the end of the buffer so we
95 : * never accumulate more than pending_nl in a row. */
96 92 : int trailing = 0;
97 92 : for (int i = (int)rs->len - 1; i >= 0 && rs->buf[i] == '\n'; i--)
98 0 : trailing++;
99 194 : for (int i = trailing; i < rs->pending_nl; i++) rs_push(rs, '\n');
100 92 : rs->col = 0;
101 92 : rs->pending_nl = 0;
102 92 : emit_bq_prefix(rs);
103 : }
104 :
105 127 : static void req_nl(RS *rs, int n) {
106 127 : if (rs->pending_nl < n) rs->pending_nl = n;
107 127 : }
108 72 : static void block_open(RS *rs) { if (rs->len > 0) req_nl(rs, 1); }
109 55 : static void block_close(RS *rs) { req_nl(rs, 1); }
110 10 : static void para_open(RS *rs) { if (rs->len > 0) req_nl(rs, 2); }
111 10 : static void para_close(RS *rs) { req_nl(rs, 2); }
112 :
113 : /* ── ANSI helpers ────────────────────────────────────────────────────── */
114 :
115 242 : static void esc(RS *rs, const char *e) { if (rs->ansi) rs_str(rs, e); }
116 34 : static void open_bold(RS *rs) { if (rs->bold++ == 0) esc(rs, "\033[1m"); }
117 47 : static void close_bold(RS *rs) { if (--rs->bold == 0) esc(rs, "\033[22m"); }
118 17 : static void open_italic(RS *rs) { if (rs->italic++ == 0) esc(rs, "\033[3m"); }
119 26 : static void close_italic(RS *rs) { if (--rs->italic == 0) esc(rs, "\033[23m"); }
120 10 : static void open_uline(RS *rs) { if (rs->uline++ == 0) esc(rs, "\033[4m"); }
121 24 : static void close_uline(RS *rs) { if (--rs->uline == 0) esc(rs, "\033[24m"); }
122 :
123 : /* ── Inline CSS ──────────────────────────────────────────────────────── */
124 :
125 213 : static int hex_val(char c) {
126 213 : if (c>='0'&&c<='9') return c-'0';
127 38 : if (c>='a'&&c<='f') return c-'a'+10;
128 38 : if (c>='A'&&c<='F') return c-'A'+10;
129 6 : return 0;
130 : }
131 : static const struct { const char *name; int r,g,b; } CSS_COLORS[] = {
132 : {"black",0,0,0},{"silver",192,192,192},{"gray",128,128,128},
133 : {"white",255,255,255},{"maroon",128,0,0},{"red",255,0,0},
134 : {"purple",128,0,128},{"fuchsia",255,0,255},{"green",0,128,0},
135 : {"lime",0,255,0},{"olive",128,128,0},{"yellow",255,255,0},
136 : {"navy",0,0,128},{"blue",0,0,255},{"teal",0,128,128},{"aqua",0,255,255},
137 : {NULL,0,0,0}
138 : };
139 83 : static void apply_color(RS *rs, const char *v, int fg) {
140 83 : if (!rs->ansi) return;
141 : /* Background colors are never emitted: they break dark-theme terminals
142 : * and produce unreadable combinations when the email author's palette
143 : * does not match the user's terminal theme. */
144 83 : if (!fg) return;
145 61 : int r=-1,g=-1,b=-1;
146 61 : while (*v==' ') v++;
147 61 : if (*v=='#') {
148 42 : v++;
149 42 : size_t len=strlen(v); while(len>0&&v[len-1]==' ') len--;
150 42 : if (len==6) {
151 29 : r=hex_val(v[0])*16+hex_val(v[1]);
152 29 : g=hex_val(v[2])*16+hex_val(v[3]);
153 29 : b=hex_val(v[4])*16+hex_val(v[5]);
154 13 : } else if (len==3) {
155 13 : r=hex_val(v[0])*17; g=hex_val(v[1])*17; b=hex_val(v[2])*17;
156 : }
157 : } else {
158 147 : for (int i=0; CSS_COLORS[i].name; i++) {
159 147 : size_t nl=strlen(CSS_COLORS[i].name);
160 147 : if (strncasecmp(v, CSS_COLORS[i].name, nl)==0) {
161 19 : r=CSS_COLORS[i].r; g=CSS_COLORS[i].g; b=CSS_COLORS[i].b; break;
162 : }
163 : }
164 : }
165 61 : if (r<0) return;
166 : /* Suppress dark foreground colors (max component < 160): they are
167 : * unreadable on dark-theme terminals and common in newsletter HTML
168 : * (e.g. #333, #666, gray). Only bright colours are emitted. */
169 61 : int mx = r > g ? (r > b ? r : b) : (g > b ? g : b);
170 61 : if (mx < 160) return;
171 34 : char e[32]; snprintf(e,sizeof(e),"\033[38;2;%d;%d;%dm",r,g,b);
172 34 : rs_str(rs, e);
173 34 : rs->color_fg++;
174 : }
175 95 : static void parse_style(RS *rs, const char *style) {
176 95 : if (!style || !rs->ansi) return;
177 94 : const char *p = style;
178 213 : while (*p) {
179 119 : while (*p==' '||*p=='\t') p++;
180 1219 : const char *ps=p; while(*p&&*p!=':') p++;
181 119 : if (!*p) break;
182 119 : size_t pl=(size_t)(p-ps); while(pl>0&&ps[pl-1]==' ') pl--;
183 119 : p++; while(*p==' ') p++;
184 807 : const char *vs=p; while(*p&&*p!=';') p++;
185 119 : size_t vl=(size_t)(p-vs); if(*p==';') p++;
186 119 : char prop[32]={0},val[64]={0};
187 119 : if(pl<sizeof(prop)) memcpy(prop,ps,pl);
188 119 : if(vl>0&&vl<sizeof(val)) memcpy(val,vs,vl);
189 119 : if (!strcasecmp(prop,"font-weight")&&!strncasecmp(val,"bold",4)) { esc(rs,"\033[1m"); rs->bold++; }
190 106 : else if (!strcasecmp(prop,"font-style")&&!strncasecmp(val,"italic",6)) { esc(rs,"\033[3m"); rs->italic++; }
191 97 : else if (!strcasecmp(prop,"text-decoration")&&!strncasecmp(val,"underline",9)){ esc(rs,"\033[4m"); rs->uline++; }
192 83 : else if (!strcasecmp(prop,"color")) apply_color(rs,val,1);
193 22 : else if (!strcasecmp(prop,"background-color")) apply_color(rs,val,0);
194 : }
195 : }
196 :
197 : /* ── Text emission with word wrap ────────────────────────────────────── */
198 :
199 11 : static void emit_wrap_nl(RS *rs) {
200 : /* remove trailing space before the newline */
201 11 : if (rs->len > 0 && rs->buf[rs->len-1] == ' ') {
202 9 : rs->len--; rs->buf[rs->len] = '\0'; rs->col--;
203 : }
204 11 : rs_push(rs, '\n');
205 11 : rs->col = 0;
206 11 : emit_bq_prefix(rs);
207 11 : }
208 :
209 280 : static void emit_text(RS *rs, const char *text) {
210 280 : if (!text || !*text || rs->skip) return;
211 280 : flush_nl(rs);
212 280 : emit_bq_prefix(rs);
213 :
214 280 : if (rs->pre) {
215 27 : for (const char *p = text; *p; ) {
216 25 : if (*p == '\r') { p++; continue; }
217 25 : if (*p == '\n') {
218 2 : rs_push(rs, '\n'); rs->col = 0;
219 2 : emit_bq_prefix(rs);
220 2 : p++; continue;
221 : }
222 23 : const char *s = p;
223 23 : rs->col += html_medium_char_width(utf8_adv(&p));
224 23 : rs_write(rs, s, (int)(p - s));
225 : }
226 2 : return;
227 : }
228 :
229 : /* Normal mode: word-wrap */
230 811 : for (const char *p = text; *p; ) {
231 533 : if (isspace((unsigned char)*p)) {
232 : /* Collapse whitespace; only emit space if not at line start */
233 148 : int at_start = (rs->col <= rs->bq * 2);
234 148 : int already_space = (rs->len > 0 && rs->buf[rs->len-1] == ' ');
235 148 : if (!at_start && !already_space) {
236 141 : rs_push(rs, ' ');
237 141 : rs->col++;
238 : }
239 300 : while (*p && isspace((unsigned char)*p)) p++;
240 148 : continue;
241 : }
242 :
243 : /* Collect one word */
244 385 : const char *ws = p;
245 385 : int ww = 0;
246 1797 : while (*p && !isspace((unsigned char)*p)) {
247 1412 : const char *q = p;
248 1412 : ww += html_medium_char_width(utf8_adv(&p));
249 : (void)q;
250 : }
251 385 : int wlen = (int)(p - ws);
252 :
253 : /* URL tokens (http://, https://, ftp://, mailto:) are always placed
254 : * on their own line so terminal URL-recognition works reliably.
255 : * They are never broken regardless of width. */
256 451 : int is_url = (wlen >= 6) && (
257 66 : strncmp(ws, "http://", 7) == 0 ||
258 63 : strncmp(ws, "https://", 8) == 0 ||
259 56 : strncmp(ws, "ftp://", 6) == 0 ||
260 56 : strncmp(ws, "mailto:", 7) == 0);
261 :
262 385 : if (is_url) {
263 10 : if (rs->col > rs->bq * 2) emit_wrap_nl(rs); /* start own line */
264 10 : rs_write(rs, ws, wlen);
265 10 : rs->col += ww;
266 10 : rs_push(rs, '\n'); /* trailing newline: next content fresh line */
267 10 : rs->col = 0;
268 10 : emit_bq_prefix(rs);
269 10 : continue;
270 : }
271 :
272 : /* Wrap if needed (never wrap an otherwise-empty line) */
273 375 : if (rs->width > 0 && rs->col > rs->bq * 2 && rs->col + ww > rs->width)
274 4 : emit_wrap_nl(rs);
275 :
276 375 : rs_write(rs, ws, wlen);
277 375 : rs->col += ww;
278 : }
279 : }
280 :
281 : /* ── Whitespace helpers ──────────────────────────────────────────────── */
282 :
283 : /** Returns 1 if s contains only ASCII whitespace, U+00A0 nbsp, U+200C zwnj,
284 : * U+200D zwj, or U+00AD soft-hyphen — i.e. invisible/non-printing content. */
285 5 : static int is_blank_str(const char *s) {
286 5 : const unsigned char *p = (const unsigned char *)s;
287 9 : while (*p) {
288 6 : if (*p <= ' ') { p++; continue; }
289 4 : if (p[0]==0xC2 && p[1]==0xA0) { p+=2; continue; } /* nbsp */
290 3 : if (p[0]==0xC2 && p[1]==0xAD) { p+=2; continue; } /* shy */
291 3 : if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8C) { p+=3; continue; } /* zwnj */
292 2 : if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8D) { p+=3; continue; } /* zwj */
293 2 : return 0;
294 : }
295 3 : return 1;
296 : }
297 :
298 : /** Collapses runs of >1 consecutive blank lines to exactly one blank line.
299 : * Also trims trailing ASCII/nbsp/zwnj whitespace from each line.
300 : * Preserves up to one trailing blank line.
301 : * Takes ownership of s (frees it); returns a new heap string (or s on OOM). */
302 161 : static char *compact_lines(char *s) {
303 161 : if (!s) return s;
304 161 : size_t n = strlen(s);
305 161 : char *out = malloc(n + 1);
306 161 : if (!out) return s;
307 :
308 161 : const unsigned char *p = (const unsigned char *)s;
309 161 : char *q = out;
310 161 : int blank_pending = 0; /* at most 1: whether a blank line is pending */
311 161 : int have_content = 0;
312 :
313 423 : while (*p) {
314 262 : const unsigned char *ls = p;
315 3490 : while (*p && *p != '\n') p++;
316 262 : int had_nl = (*p == '\n');
317 262 : const unsigned char *le = p;
318 262 : if (had_nl) p++;
319 :
320 : /* Trim trailing invisible chars (ASCII ws, nbsp C2A0, shy C2AD, zwnj/zwj E2808C/8D) */
321 269 : while (le > ls) {
322 258 : if (le[-1] <= ' ') { le--; continue; }
323 251 : if (le>=ls+2 && le[-2]==0xC2 && (le[-1]==0xA0||le[-1]==0xAD)) { le-=2; continue; }
324 251 : if (le>=ls+3 && le[-3]==0xE2 && le[-2]==0x80 &&
325 0 : (le[-1]==0x8C||le[-1]==0x8D)) { le-=3; continue; }
326 251 : break;
327 : }
328 :
329 262 : if (le == ls) { /* blank line */
330 11 : if (had_nl) blank_pending = 1;
331 : } else { /* non-blank line */
332 251 : if (blank_pending && have_content) *q++ = '\n';
333 251 : blank_pending = 0;
334 251 : have_content = 1;
335 251 : memcpy(q, ls, (size_t)(le - ls));
336 251 : q += (size_t)(le - ls);
337 251 : if (had_nl) *q++ = '\n';
338 : }
339 : }
340 : /* Preserve at most one trailing blank line */
341 161 : if (blank_pending) *q++ = '\n';
342 :
343 161 : *q = '\0';
344 161 : free(s);
345 161 : return out;
346 : }
347 :
348 : /* ── Tag open / close ────────────────────────────────────────────────── */
349 :
350 : static void traverse(RS *rs, const HtmlNode *node);
351 :
352 302 : static void tag_open(RS *rs, const HtmlNode *node) {
353 302 : if (!node->tag) return;
354 302 : const char *t = node->tag;
355 :
356 : /* Inline styles (applied before tag-specific behavior) */
357 302 : const char *style = html_attr_get(node, "style");
358 302 : if (style) parse_style(rs, style);
359 :
360 302 : if (!strcmp(t,"b")||!strcmp(t,"strong")) open_bold(rs);
361 270 : else if (!strcmp(t,"i")||!strcmp(t,"em")) open_italic(rs);
362 253 : else if (!strcmp(t,"u")) open_uline(rs);
363 243 : else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[9m");
364 235 : else if (!strcmp(t,"br")) req_nl(rs, 1);
365 202 : else if (!strcmp(t,"hr")) {
366 1 : block_open(rs); flush_nl(rs); emit_bq_prefix(rs);
367 1 : int w = rs->width > 0 ? rs->width - rs->col : 20;
368 21 : for (int i = 0; i < w; i++) rs_push(rs, '-');
369 1 : rs->col += w;
370 1 : block_close(rs);
371 : }
372 201 : else if (!strcmp(t,"p")) para_open(rs);
373 193 : else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
374 159 : !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
375 193 : !strcmp(t,"nav")||!strcmp(t,"aside")) block_open(rs);
376 159 : else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
377 2 : para_open(rs); open_bold(rs);
378 : }
379 157 : else if (!strcmp(t,"ul")) {
380 4 : if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){0, 0};
381 4 : block_open(rs);
382 : }
383 153 : else if (!strcmp(t,"ol")) {
384 2 : if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){1, 0};
385 2 : block_open(rs);
386 : }
387 151 : else if (!strcmp(t,"li")) {
388 8 : block_open(rs);
389 8 : flush_nl(rs);
390 8 : emit_bq_prefix(rs);
391 11 : if (rs->list_top > 0 && rs->lists[rs->list_top-1].is_ol) {
392 3 : rs->lists[rs->list_top-1].cnt++;
393 3 : char buf[16];
394 3 : int n = snprintf(buf, sizeof(buf), "%d. ", rs->lists[rs->list_top-1].cnt);
395 3 : rs_str(rs, buf);
396 3 : rs->col += n;
397 : } else {
398 : /* U+2022 BULLET: UTF-8 E2 80 A2 */
399 5 : rs_push(rs,(char)0xE2); rs_push(rs,(char)0x80); rs_push(rs,(char)0xA2);
400 5 : rs_push(rs, ' ');
401 5 : rs->col += 2; /* bullet=1 col, space=1 col */
402 : }
403 : }
404 143 : else if (!strcmp(t,"blockquote")) {
405 3 : block_open(rs);
406 3 : rs->bq++;
407 : /* if something was already on the line, start fresh */
408 3 : if (rs->col > 0) { flush_nl(rs); } else emit_bq_prefix(rs);
409 : }
410 140 : else if (!strcmp(t,"pre")) { block_open(rs); rs->pre++; }
411 138 : else if (!strcmp(t,"code")) { /* inline: no special formatting */ }
412 138 : else if (!strcmp(t,"img")) {
413 5 : const char *alt = html_attr_get(node, "alt");
414 5 : if (alt && *alt && !is_blank_str(alt)) {
415 2 : flush_nl(rs); emit_bq_prefix(rs);
416 2 : rs_push(rs, '[');
417 2 : rs_str(rs, alt);
418 2 : rs_push(rs, ']');
419 2 : rs->col += 2 + str_vis_width(alt);
420 : }
421 : }
422 133 : else if (!strcmp(t,"a")) { /* render children as-is */ }
423 121 : else if (!strcmp(t,"td")||!strcmp(t,"th")) {
424 15 : if (rs->col > rs->bq * 2) { rs_push(rs, '\t'); rs->col++; }
425 : }
426 106 : else if (!strcmp(t,"tr")||!strcmp(t,"table")) block_open(rs);
427 88 : else if (!strcmp(t,"script")||!strcmp(t,"style")) rs->skip++;
428 88 : else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_open(rs);
429 88 : else if (!strcmp(t,"input")) {
430 1 : const char *val = html_attr_get(node, "value");
431 1 : if (val && *val) {
432 1 : flush_nl(rs); emit_bq_prefix(rs);
433 1 : rs_str(rs, val);
434 1 : rs->col += str_vis_width(val);
435 : }
436 : }
437 : /* __root__ and unknown tags: traverse children unchanged */
438 : }
439 :
440 302 : static void tag_close(RS *rs, const HtmlNode *node) {
441 302 : if (!node->tag) return;
442 302 : const char *t = node->tag;
443 :
444 302 : if (!strcmp(t,"b")||!strcmp(t,"strong")) close_bold(rs);
445 270 : else if (!strcmp(t,"i")||!strcmp(t,"em")) close_italic(rs);
446 253 : else if (!strcmp(t,"u")) close_uline(rs);
447 243 : else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[29m");
448 235 : else if (!strcmp(t,"p")) para_close(rs);
449 227 : else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
450 193 : !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
451 227 : !strcmp(t,"nav")||!strcmp(t,"aside")) block_close(rs);
452 193 : else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
453 2 : close_bold(rs); para_close(rs);
454 : }
455 191 : else if (!strcmp(t,"ul")||!strcmp(t,"ol")) {
456 6 : if (rs->list_top > 0) rs->list_top--;
457 6 : block_close(rs);
458 : }
459 185 : else if (!strcmp(t,"li")) req_nl(rs, 1);
460 177 : else if (!strcmp(t,"blockquote")) {
461 3 : if (rs->bq > 0) rs->bq--;
462 3 : block_close(rs);
463 : }
464 174 : else if (!strcmp(t,"pre")) { if (rs->pre>0) rs->pre--; block_close(rs); }
465 172 : else if (!strcmp(t,"a")) {
466 12 : const char *href = html_attr_get(node, "href");
467 12 : if (href && *href && href[0] != '#' &&
468 9 : strncmp(href, "javascript:", 11) != 0)
469 8 : emit_text(rs, href);
470 : }
471 160 : else if (!strcmp(t,"script")||!strcmp(t,"style")) { if (rs->skip>0) rs->skip--; }
472 160 : else if (!strcmp(t,"tr")) req_nl(rs, 1);
473 151 : else if (!strcmp(t,"table")) block_close(rs);
474 142 : else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_close(rs);
475 : }
476 :
477 : /* ── Tree traversal ──────────────────────────────────────────────────── */
478 :
479 574 : static void traverse(RS *rs, const HtmlNode *node) {
480 574 : if (!node) return;
481 574 : if (node->type == HTML_NODE_TEXT) {
482 272 : emit_text(rs, node->text);
483 272 : return;
484 : }
485 : /* Snapshot style depth so parse_style side-effects from inline
486 : * style= attributes are balanced even when tag_close has no handler
487 : * for this tag (e.g. <a>, <span>, <td> with style="…"). */
488 302 : int bold_sv = rs->bold;
489 302 : int italic_sv = rs->italic;
490 302 : int uline_sv = rs->uline;
491 302 : int color_fg_sv = rs->color_fg;
492 302 : int color_bg_sv = rs->color_bg;
493 302 : tag_open(rs, node);
494 633 : for (const HtmlNode *c = node->first_child; c; c = c->next_sibling)
495 331 : traverse(rs, c);
496 302 : tag_close(rs, node);
497 : /* Close any depth-tracked style that tag_close left open */
498 316 : while (rs->uline > uline_sv) close_uline(rs);
499 311 : while (rs->italic > italic_sv) close_italic(rs);
500 315 : while (rs->bold > bold_sv) close_bold(rs);
501 302 : if (rs->ansi) {
502 205 : if (rs->color_fg > color_fg_sv) { esc(rs, "\033[39m"); rs->color_fg = color_fg_sv; }
503 205 : if (rs->color_bg > color_bg_sv) { esc(rs, "\033[49m"); rs->color_bg = color_bg_sv; }
504 : }
505 : }
506 :
507 : /* ── Public API ──────────────────────────────────────────────────────── */
508 :
509 168 : char *html_render(const char *html, int width, int ansi) {
510 168 : if (!html) return NULL;
511 :
512 167 : HtmlNode *root = html_parse(html);
513 167 : if (!root) {
514 0 : char *empty = malloc(1);
515 0 : if (empty) *empty = '\0';
516 0 : return empty;
517 : }
518 :
519 167 : RS rs;
520 167 : memset(&rs, 0, sizeof(rs));
521 167 : rs.width = width;
522 167 : rs.ansi = ansi;
523 :
524 : /* Traverse root's children (root itself is synthetic __root__) */
525 410 : for (const HtmlNode *c = root->first_child; c; c = c->next_sibling)
526 243 : traverse(&rs, c);
527 :
528 : /* Flush trailing newlines */
529 167 : flush_nl(&rs);
530 :
531 167 : html_node_free(root);
532 :
533 167 : if (!rs.buf) {
534 6 : char *empty = malloc(1);
535 6 : if (empty) *empty = '\0';
536 6 : return empty;
537 : }
538 161 : return compact_lines(rs.buf);
539 : }
|