Line data Source code
1 : #include "html_render.h"
2 : #include "html_parser.h"
3 : #include "html_medium.h"
4 : #include <stdlib.h>
5 : #include <string.h>
6 : #include <stdio.h>
7 : #include <ctype.h>
8 : #include <stdint.h>
9 :
10 : /* ── List stack ──────────────────────────────────────────────────────── */
11 :
12 : #define LIST_MAX 16
13 :
14 : typedef struct { int is_ol; int cnt; } ListFrame;
15 :
16 : /* ── Render state ────────────────────────────────────────────────────── */
17 :
18 : typedef struct {
19 : char *buf;
20 : size_t len, cap;
21 : int col; /* visible column (ANSI not counted) */
22 : int width; /* wrap width; 0 = no wrap */
23 : int ansi; /* emit ANSI escapes? */
24 : int bold; /* depth counter */
25 : int italic;
26 : int uline;
27 : int color_fg; /* depth: foreground color set by parse_style */
28 : int color_bg; /* depth: background color set by parse_style */
29 : int skip; /* depth: no output (script/style) */
30 : int pre; /* depth: no wrap */
31 : int pending_nl; /* buffered newlines to emit: 0, 1, or 2 */
32 : int bq; /* blockquote depth */
33 : ListFrame lists[LIST_MAX];
34 : int list_top;
35 : } RS;
36 :
37 : /* ── Buffer ──────────────────────────────────────────────────────────── */
38 :
39 13812 : static void rs_push(RS *rs, char c) {
40 13812 : if (rs->len + 2 > rs->cap) {
41 32 : size_t nc = rs->cap ? rs->cap * 2 : 512;
42 32 : char *t = realloc(rs->buf, nc);
43 32 : if (!t) return;
44 32 : rs->buf = t; rs->cap = nc;
45 : }
46 13812 : rs->buf[rs->len++] = c;
47 13812 : rs->buf[rs->len] = '\0';
48 : }
49 3820 : static void rs_str(RS *rs, const char *s) { for (; *s; s++) rs_push(rs, *s); }
50 1664 : static void rs_write(RS *rs, const char *s, int n)
51 8032 : { for (int i = 0; i < n; i++) rs_push(rs, s[i]); }
52 :
53 : /* ── UTF-8 helpers ───────────────────────────────────────────────────── */
54 :
55 6560 : static uint32_t utf8_adv(const char **p) {
56 6560 : unsigned char c = (unsigned char)**p;
57 : uint32_t cp; int ex;
58 6560 : if (c < 0x80) { cp = c; ex = 0; }
59 96 : else if (c < 0xC0) { (*p)++; return 0xFFFD; }
60 96 : else if (c < 0xE0) { cp = c & 0x1F; ex = 1; }
61 32 : else if (c < 0xF0) { cp = c & 0x0F; ex = 2; }
62 0 : else { cp = c & 0x07; ex = 3; }
63 6560 : (*p)++;
64 6688 : for (int i = 0; i < ex; i++) {
65 128 : if ((**p & 0xC0) != 0x80) return 0xFFFD;
66 128 : cp = (cp << 6) | (**p & 0x3F); (*p)++;
67 : }
68 6560 : return cp;
69 : }
70 :
71 32 : static int str_vis_width(const char *s) {
72 32 : int w = 0;
73 352 : while (*s) {
74 320 : if ((unsigned char)*s == 0x1B && s[1] == '[') {
75 0 : s += 2; while (*s && *s != 'm') s++; if (*s) s++;
76 0 : continue;
77 : }
78 320 : w += html_medium_char_width(utf8_adv(&s));
79 : }
80 32 : return w;
81 : }
82 :
83 : /* ── Newline / prefix management ─────────────────────────────────────── */
84 :
85 1408 : static void emit_bq_prefix(RS *rs) {
86 1408 : if (rs->col == 0 && rs->bq > 0) {
87 64 : for (int i = 0; i < rs->bq; i++) { rs_push(rs, '>'); rs_push(rs, ' '); }
88 32 : rs->col = rs->bq * 2;
89 : }
90 1408 : }
91 :
92 1024 : static void flush_nl(RS *rs) {
93 1024 : if (!rs->pending_nl) return;
94 : /* Count consecutive newlines already at the end of the buffer so we
95 : * never accumulate more than pending_nl in a row. */
96 384 : int trailing = 0;
97 384 : for (int i = (int)rs->len - 1; i >= 0 && rs->buf[i] == '\n'; i--)
98 0 : trailing++;
99 864 : for (int i = trailing; i < rs->pending_nl; i++) rs_push(rs, '\n');
100 384 : rs->col = 0;
101 384 : rs->pending_nl = 0;
102 384 : emit_bq_prefix(rs);
103 : }
104 :
105 928 : static void req_nl(RS *rs, int n) {
106 928 : if (rs->pending_nl < n) rs->pending_nl = n;
107 928 : }
108 384 : static void block_open(RS *rs) { if (rs->len > 0) req_nl(rs, 1); }
109 224 : static void block_close(RS *rs) { req_nl(rs, 1); }
110 96 : static void para_open(RS *rs) { if (rs->len > 0) req_nl(rs, 2); }
111 96 : static void para_close(RS *rs) { req_nl(rs, 2); }
112 :
113 : /* ── ANSI helpers ────────────────────────────────────────────────────── */
114 :
115 666 : static void esc(RS *rs, const char *e) { if (rs->ansi) rs_str(rs, e); }
116 64 : static void open_bold(RS *rs) { if (rs->bold++ == 0) esc(rs, "\033[1m"); }
117 86 : static void close_bold(RS *rs) { if (--rs->bold == 0) esc(rs, "\033[22m"); }
118 32 : static void open_italic(RS *rs) { if (rs->italic++ == 0) esc(rs, "\033[3m"); }
119 54 : static void close_italic(RS *rs) { if (--rs->italic == 0) esc(rs, "\033[23m"); }
120 32 : static void open_uline(RS *rs) { if (rs->uline++ == 0) esc(rs, "\033[4m"); }
121 54 : static void close_uline(RS *rs) { if (--rs->uline == 0) esc(rs, "\033[24m"); }
122 :
123 : /* ── Inline CSS ──────────────────────────────────────────────────────── */
124 :
125 66 : static int hex_val(char c) {
126 66 : if (c>='0'&&c<='9') return c-'0';
127 0 : if (c>='a'&&c<='f') return c-'a'+10;
128 0 : if (c>='A'&&c<='F') return c-'A'+10;
129 0 : return 0;
130 : }
131 : static const struct { const char *name; int r,g,b; } CSS_COLORS[] = {
132 : {"black",0,0,0},{"silver",192,192,192},{"gray",128,128,128},
133 : {"white",255,255,255},{"maroon",128,0,0},{"red",255,0,0},
134 : {"purple",128,0,128},{"fuchsia",255,0,255},{"green",0,128,0},
135 : {"lime",0,255,0},{"olive",128,128,0},{"yellow",255,255,0},
136 : {"navy",0,0,128},{"blue",0,0,255},{"teal",0,128,128},{"aqua",0,255,255},
137 : {NULL,0,0,0}
138 : };
139 66 : static void apply_color(RS *rs, const char *v, int fg) {
140 110 : if (!rs->ansi) return;
141 : /* Background colors are never emitted: they break dark-theme terminals
142 : * and produce unreadable combinations when the email author's palette
143 : * does not match the user's terminal theme. */
144 66 : if (!fg) return;
145 44 : int r=-1,g=-1,b=-1;
146 44 : while (*v==' ') v++;
147 44 : if (*v=='#') {
148 22 : v++;
149 22 : size_t len=strlen(v); while(len>0&&v[len-1]==' ') len--;
150 22 : if (len==6) {
151 0 : r=hex_val(v[0])*16+hex_val(v[1]);
152 0 : g=hex_val(v[2])*16+hex_val(v[3]);
153 0 : b=hex_val(v[4])*16+hex_val(v[5]);
154 22 : } else if (len==3) {
155 22 : r=hex_val(v[0])*17; g=hex_val(v[1])*17; b=hex_val(v[2])*17;
156 : }
157 : } else {
158 132 : for (int i=0; CSS_COLORS[i].name; i++) {
159 132 : size_t nl=strlen(CSS_COLORS[i].name);
160 132 : if (strncasecmp(v, CSS_COLORS[i].name, nl)==0) {
161 22 : r=CSS_COLORS[i].r; g=CSS_COLORS[i].g; b=CSS_COLORS[i].b; break;
162 : }
163 : }
164 : }
165 44 : if (r<0) return;
166 : /* Suppress dark foreground colors (max component < 160): they are
167 : * unreadable on dark-theme terminals and common in newsletter HTML
168 : * (e.g. #333, #666, gray). Only bright colours are emitted. */
169 44 : int mx = r > g ? (r > b ? r : b) : (g > b ? g : b);
170 44 : if (mx < 160) return;
171 22 : char e[32]; snprintf(e,sizeof(e),"\033[38;2;%d;%d;%dm",r,g,b);
172 22 : rs_str(rs, e);
173 22 : rs->color_fg++;
174 : }
175 96 : static void parse_style(RS *rs, const char *style) {
176 96 : if (!style || !rs->ansi) return;
177 66 : const char *p = style;
178 198 : while (*p) {
179 198 : while (*p==' '||*p=='\t') p++;
180 1496 : const char *ps=p; while(*p&&*p!=':') p++;
181 132 : if (!*p) break;
182 132 : size_t pl=(size_t)(p-ps); while(pl>0&&ps[pl-1]==' ') pl--;
183 264 : p++; while(*p==' ') p++;
184 836 : const char *vs=p; while(*p&&*p!=';') p++;
185 132 : size_t vl=(size_t)(p-vs); if(*p==';') p++;
186 132 : char prop[32]={0},val[64]={0};
187 132 : if(pl<sizeof(prop)) memcpy(prop,ps,pl);
188 132 : if(vl>0&&vl<sizeof(val)) memcpy(val,vs,vl);
189 132 : if (!strcasecmp(prop,"font-weight")&&!strncasecmp(val,"bold",4)) { esc(rs,"\033[1m"); rs->bold++; }
190 110 : else if (!strcasecmp(prop,"font-style")&&!strncasecmp(val,"italic",6)) { esc(rs,"\033[3m"); rs->italic++; }
191 88 : else if (!strcasecmp(prop,"text-decoration")&&!strncasecmp(val,"underline",9)){ esc(rs,"\033[4m"); rs->uline++; }
192 66 : else if (!strcasecmp(prop,"color")) apply_color(rs,val,1);
193 22 : else if (!strcasecmp(prop,"background-color")) apply_color(rs,val,0);
194 : }
195 : }
196 :
197 : /* ── Text emission with word wrap ────────────────────────────────────── */
198 :
199 32 : static void emit_wrap_nl(RS *rs) {
200 : /* remove trailing space before the newline */
201 32 : if (rs->len > 0 && rs->buf[rs->len-1] == ' ') {
202 0 : rs->len--; rs->buf[rs->len] = '\0'; rs->col--;
203 : }
204 32 : rs_push(rs, '\n');
205 32 : rs->col = 0;
206 32 : emit_bq_prefix(rs);
207 32 : }
208 :
209 768 : static void emit_text(RS *rs, const char *text) {
210 768 : if (!text || !*text || rs->skip) return;
211 768 : flush_nl(rs);
212 768 : emit_bq_prefix(rs);
213 :
214 768 : if (rs->pre) {
215 544 : for (const char *p = text; *p; ) {
216 512 : if (*p == '\r') { p++; continue; }
217 512 : if (*p == '\n') {
218 0 : rs_push(rs, '\n'); rs->col = 0;
219 0 : emit_bq_prefix(rs);
220 0 : p++; continue;
221 : }
222 512 : const char *s = p;
223 512 : rs->col += html_medium_char_width(utf8_adv(&p));
224 512 : rs_write(rs, s, (int)(p - s));
225 : }
226 32 : return;
227 : }
228 :
229 : /* Normal mode: word-wrap */
230 2624 : for (const char *p = text; *p; ) {
231 1888 : if (isspace((unsigned char)*p)) {
232 : /* Collapse whitespace; only emit space if not at line start */
233 736 : int at_start = (rs->col <= rs->bq * 2);
234 736 : int already_space = (rs->len > 0 && rs->buf[rs->len-1] == ' ');
235 736 : if (!at_start && !already_space) {
236 704 : rs_push(rs, ' ');
237 704 : rs->col++;
238 : }
239 1504 : while (*p && isspace((unsigned char)*p)) p++;
240 736 : continue;
241 : }
242 :
243 : /* Collect one word */
244 1152 : const char *ws = p;
245 1152 : int ww = 0;
246 6880 : while (*p && !isspace((unsigned char)*p)) {
247 5728 : const char *q = p;
248 5728 : ww += html_medium_char_width(utf8_adv(&p));
249 : (void)q;
250 : }
251 1152 : int wlen = (int)(p - ws);
252 :
253 : /* URL tokens (http://, https://, ftp://, mailto:) are always placed
254 : * on their own line so terminal URL-recognition works reliably.
255 : * They are never broken regardless of width. */
256 1504 : int is_url = (wlen >= 6) && (
257 352 : strncmp(ws, "http://", 7) == 0 ||
258 352 : strncmp(ws, "https://", 8) == 0 ||
259 320 : strncmp(ws, "ftp://", 6) == 0 ||
260 320 : strncmp(ws, "mailto:", 7) == 0);
261 :
262 1152 : if (is_url) {
263 32 : if (rs->col > rs->bq * 2) emit_wrap_nl(rs); /* start own line */
264 32 : esc(rs, "\033[34m"); /* blue */
265 32 : rs_write(rs, ws, wlen);
266 32 : esc(rs, "\033[39m"); /* reset fg */
267 32 : rs->col += ww;
268 32 : rs_push(rs, '\n'); /* trailing newline: next content fresh line */
269 32 : rs->col = 0;
270 32 : emit_bq_prefix(rs);
271 32 : continue;
272 : }
273 :
274 : /* Wrap if needed (never wrap an otherwise-empty line) */
275 1120 : if (rs->width > 0 && rs->col > rs->bq * 2 && rs->col + ww > rs->width)
276 0 : emit_wrap_nl(rs);
277 :
278 1120 : rs_write(rs, ws, wlen);
279 1120 : rs->col += ww;
280 : }
281 : }
282 :
283 : /* ── Whitespace helpers ──────────────────────────────────────────────── */
284 :
285 : /** Returns 1 if s contains only ASCII whitespace, U+00A0 nbsp, U+200C zwnj,
286 : * U+200D zwj, or U+00AD soft-hyphen — i.e. invisible/non-printing content. */
287 32 : static int is_blank_str(const char *s) {
288 32 : const unsigned char *p = (const unsigned char *)s;
289 32 : while (*p) {
290 32 : if (*p <= ' ') { p++; continue; }
291 32 : if (p[0]==0xC2 && p[1]==0xA0) { p+=2; continue; } /* nbsp */
292 32 : if (p[0]==0xC2 && p[1]==0xAD) { p+=2; continue; } /* shy */
293 32 : if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8C) { p+=3; continue; } /* zwnj */
294 32 : if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8D) { p+=3; continue; } /* zwj */
295 32 : return 0;
296 : }
297 0 : return 1;
298 : }
299 :
300 : /** Collapses runs of >1 consecutive blank lines to exactly one blank line.
301 : * Also trims trailing ASCII/nbsp/zwnj whitespace from each line.
302 : * Preserves up to one trailing blank line.
303 : * Takes ownership of s (frees it); returns a new heap string (or s on OOM). */
304 32 : static char *compact_lines(char *s) {
305 32 : if (!s) return s;
306 32 : size_t n = strlen(s);
307 32 : char *out = malloc(n + 1);
308 32 : if (!out) return s;
309 :
310 32 : const unsigned char *p = (const unsigned char *)s;
311 32 : char *q = out;
312 32 : int blank_pending = 0; /* at most 1: whether a blank line is pending */
313 32 : int have_content = 0;
314 :
315 598 : while (*p) {
316 566 : const unsigned char *ls = p;
317 13834 : while (*p && *p != '\n') p++;
318 566 : int had_nl = (*p == '\n');
319 566 : const unsigned char *le = p;
320 566 : if (had_nl) p++;
321 :
322 : /* Trim trailing invisible chars (ASCII ws, nbsp C2A0, shy C2AD, zwnj/zwj E2808C/8D) */
323 598 : while (le > ls) {
324 502 : if (le[-1] <= ' ') { le--; continue; }
325 470 : if (le>=ls+2 && le[-2]==0xC2 && (le[-1]==0xA0||le[-1]==0xAD)) { le-=2; continue; }
326 470 : if (le>=ls+3 && le[-3]==0xE2 && le[-2]==0x80 &&
327 0 : (le[-1]==0x8C||le[-1]==0x8D)) { le-=3; continue; }
328 470 : break;
329 : }
330 :
331 566 : if (le == ls) { /* blank line */
332 96 : if (had_nl) blank_pending = 1;
333 : } else { /* non-blank line */
334 470 : if (blank_pending && have_content) *q++ = '\n';
335 470 : blank_pending = 0;
336 470 : have_content = 1;
337 470 : memcpy(q, ls, (size_t)(le - ls));
338 470 : q += (size_t)(le - ls);
339 470 : if (had_nl) *q++ = '\n';
340 : }
341 : }
342 : /* Preserve at most one trailing blank line */
343 32 : if (blank_pending) *q++ = '\n';
344 :
345 32 : *q = '\0';
346 32 : free(s);
347 32 : return out;
348 : }
349 :
350 : /* ── Tag open / close ────────────────────────────────────────────────── */
351 :
352 : static void traverse(RS *rs, const HtmlNode *node);
353 :
354 864 : static void tag_open(RS *rs, const HtmlNode *node) {
355 864 : if (!node->tag) return;
356 864 : const char *t = node->tag;
357 :
358 : /* Inline styles (applied before tag-specific behavior) */
359 864 : const char *style = html_attr_get(node, "style");
360 864 : if (style) parse_style(rs, style);
361 :
362 864 : if (!strcmp(t,"b")||!strcmp(t,"strong")) open_bold(rs);
363 832 : else if (!strcmp(t,"i")||!strcmp(t,"em")) open_italic(rs);
364 800 : else if (!strcmp(t,"u")) open_uline(rs);
365 768 : else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[9m");
366 704 : else if (!strcmp(t,"br")) req_nl(rs, 1);
367 704 : else if (!strcmp(t,"hr")) {
368 32 : block_open(rs); flush_nl(rs); emit_bq_prefix(rs);
369 32 : int w = rs->width > 0 ? rs->width - rs->col : 20;
370 2584 : for (int i = 0; i < w; i++) rs_push(rs, '-');
371 32 : rs->col += w;
372 32 : block_close(rs);
373 : }
374 672 : else if (!strcmp(t,"p")) para_open(rs);
375 608 : else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
376 576 : !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
377 608 : !strcmp(t,"nav")||!strcmp(t,"aside")) block_open(rs);
378 576 : else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
379 32 : para_open(rs); open_bold(rs);
380 : }
381 544 : else if (!strcmp(t,"ul")) {
382 32 : if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){0, 0};
383 32 : block_open(rs);
384 : }
385 512 : else if (!strcmp(t,"ol")) {
386 32 : if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){1, 0};
387 32 : block_open(rs);
388 : }
389 480 : else if (!strcmp(t,"li")) {
390 128 : block_open(rs);
391 128 : flush_nl(rs);
392 128 : emit_bq_prefix(rs);
393 128 : if (rs->list_top > 0 && rs->lists[rs->list_top-1].is_ol) {
394 64 : rs->lists[rs->list_top-1].cnt++;
395 : char buf[16];
396 64 : int n = snprintf(buf, sizeof(buf), "%d. ", rs->lists[rs->list_top-1].cnt);
397 64 : rs_str(rs, buf);
398 64 : rs->col += n;
399 : } else {
400 : /* U+2022 BULLET: UTF-8 E2 80 A2 */
401 64 : rs_push(rs,(char)0xE2); rs_push(rs,(char)0x80); rs_push(rs,(char)0xA2);
402 64 : rs_push(rs, ' ');
403 64 : rs->col += 2; /* bullet=1 col, space=1 col */
404 : }
405 : }
406 352 : else if (!strcmp(t,"blockquote")) {
407 32 : block_open(rs);
408 32 : rs->bq++;
409 : /* if something was already on the line, start fresh */
410 32 : if (rs->col > 0) { flush_nl(rs); } else emit_bq_prefix(rs);
411 : }
412 320 : else if (!strcmp(t,"pre")) { block_open(rs); rs->pre++; }
413 288 : else if (!strcmp(t,"code")) { /* inline: no special formatting */ }
414 288 : else if (!strcmp(t,"img")) {
415 32 : const char *alt = html_attr_get(node, "alt");
416 32 : if (alt && *alt && !is_blank_str(alt)) {
417 32 : flush_nl(rs); emit_bq_prefix(rs);
418 32 : rs_push(rs, '[');
419 32 : rs_str(rs, alt);
420 32 : rs_push(rs, ']');
421 32 : rs->col += 2 + str_vis_width(alt);
422 : }
423 : }
424 256 : else if (!strcmp(t,"a")) {
425 32 : const char *href = html_attr_get(node, "href");
426 32 : if (href && *href && href[0] != '#' && strncmp(href, "javascript:", 11) != 0)
427 32 : if (rs->ansi) { esc(rs, "\033[34m"); rs->color_fg++; }
428 : }
429 224 : else if (!strcmp(t,"td")||!strcmp(t,"th")) {
430 64 : if (rs->col > rs->bq * 2) { rs_push(rs, '\t'); rs->col++; }
431 : }
432 160 : else if (!strcmp(t,"tr")||!strcmp(t,"table")) block_open(rs);
433 96 : else if (!strcmp(t,"script")||!strcmp(t,"style")) rs->skip++;
434 96 : else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_open(rs);
435 96 : else if (!strcmp(t,"input")) {
436 0 : const char *val = html_attr_get(node, "value");
437 0 : if (val && *val) {
438 0 : flush_nl(rs); emit_bq_prefix(rs);
439 0 : rs_str(rs, val);
440 0 : rs->col += str_vis_width(val);
441 : }
442 : }
443 : /* __root__ and unknown tags: traverse children unchanged */
444 : }
445 :
446 864 : static void tag_close(RS *rs, const HtmlNode *node) {
447 864 : if (!node->tag) return;
448 864 : const char *t = node->tag;
449 :
450 864 : if (!strcmp(t,"b")||!strcmp(t,"strong")) close_bold(rs);
451 832 : else if (!strcmp(t,"i")||!strcmp(t,"em")) close_italic(rs);
452 800 : else if (!strcmp(t,"u")) close_uline(rs);
453 768 : else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[29m");
454 704 : else if (!strcmp(t,"p")) para_close(rs);
455 640 : else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
456 608 : !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
457 640 : !strcmp(t,"nav")||!strcmp(t,"aside")) block_close(rs);
458 608 : else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
459 32 : close_bold(rs); para_close(rs);
460 : }
461 576 : else if (!strcmp(t,"ul")||!strcmp(t,"ol")) {
462 64 : if (rs->list_top > 0) rs->list_top--;
463 64 : block_close(rs);
464 : }
465 512 : else if (!strcmp(t,"li")) req_nl(rs, 1);
466 384 : else if (!strcmp(t,"blockquote")) {
467 32 : if (rs->bq > 0) rs->bq--;
468 32 : block_close(rs);
469 : }
470 352 : else if (!strcmp(t,"pre")) { if (rs->pre>0) rs->pre--; block_close(rs); }
471 320 : else if (!strcmp(t,"a")) {
472 32 : const char *href = html_attr_get(node, "href");
473 32 : if (href && *href && href[0] != '#' &&
474 32 : strncmp(href, "javascript:", 11) != 0) {
475 32 : if (rs->ansi && rs->color_fg > 0) { esc(rs, "\033[39m"); rs->color_fg--; }
476 32 : esc(rs, "\033[34m");
477 32 : emit_text(rs, href);
478 32 : esc(rs, "\033[39m");
479 : }
480 : }
481 288 : else if (!strcmp(t,"script")||!strcmp(t,"style")) { if (rs->skip>0) rs->skip--; }
482 288 : else if (!strcmp(t,"tr")) req_nl(rs, 1);
483 256 : else if (!strcmp(t,"table")) block_close(rs);
484 224 : else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_close(rs);
485 : }
486 :
487 : /* ── Tree traversal ──────────────────────────────────────────────────── */
488 :
489 1600 : static void traverse(RS *rs, const HtmlNode *node) {
490 1600 : if (!node) return;
491 1600 : if (node->type == HTML_NODE_TEXT) {
492 736 : emit_text(rs, node->text);
493 736 : return;
494 : }
495 : /* Snapshot style depth so parse_style side-effects from inline
496 : * style= attributes are balanced even when tag_close has no handler
497 : * for this tag (e.g. <a>, <span>, <td> with style="…"). */
498 864 : int bold_sv = rs->bold;
499 864 : int italic_sv = rs->italic;
500 864 : int uline_sv = rs->uline;
501 864 : int color_fg_sv = rs->color_fg;
502 864 : int color_bg_sv = rs->color_bg;
503 864 : tag_open(rs, node);
504 2400 : for (const HtmlNode *c = node->first_child; c; c = c->next_sibling)
505 1536 : traverse(rs, c);
506 864 : tag_close(rs, node);
507 : /* Close any depth-tracked style that tag_close left open */
508 886 : while (rs->uline > uline_sv) close_uline(rs);
509 886 : while (rs->italic > italic_sv) close_italic(rs);
510 886 : while (rs->bold > bold_sv) close_bold(rs);
511 864 : if (rs->ansi) {
512 594 : if (rs->color_fg > color_fg_sv) { esc(rs, "\033[39m"); rs->color_fg = color_fg_sv; }
513 594 : if (rs->color_bg > color_bg_sv) { esc(rs, "\033[49m"); rs->color_bg = color_bg_sv; }
514 : }
515 : }
516 :
517 : /* ── Public API ──────────────────────────────────────────────────────── */
518 :
519 32 : char *html_render(const char *html, int width, int ansi) {
520 32 : if (!html) return NULL;
521 :
522 32 : HtmlNode *root = html_parse(html);
523 32 : if (!root) {
524 0 : char *empty = malloc(1);
525 0 : if (empty) *empty = '\0';
526 0 : return empty;
527 : }
528 :
529 : RS rs;
530 32 : memset(&rs, 0, sizeof(rs));
531 32 : rs.width = width;
532 32 : rs.ansi = ansi;
533 :
534 : /* Traverse root's children (root itself is synthetic __root__) */
535 96 : for (const HtmlNode *c = root->first_child; c; c = c->next_sibling)
536 64 : traverse(&rs, c);
537 :
538 : /* Flush trailing newlines */
539 32 : flush_nl(&rs);
540 :
541 32 : html_node_free(root);
542 :
543 32 : if (!rs.buf) {
544 0 : char *empty = malloc(1);
545 0 : if (empty) *empty = '\0';
546 0 : return empty;
547 : }
548 32 : return compact_lines(rs.buf);
549 : }
|