Line data Source code
1 : #include "html_render.h"
2 : #include "html_parser.h"
3 : #include "html_medium.h"
4 : #include <stdlib.h>
5 : #include <string.h>
6 : #include <stdio.h>
7 : #include <ctype.h>
8 : #include <stdint.h>
9 :
10 : /* ── List stack ──────────────────────────────────────────────────────── */
11 :
12 : #define LIST_MAX 16
13 :
14 : typedef struct { int is_ol; int cnt; } ListFrame;
15 :
16 : /* ── Render state ────────────────────────────────────────────────────── */
17 :
18 : typedef struct {
19 : char *buf;
20 : size_t len, cap;
21 : int col; /* visible column (ANSI not counted) */
22 : int width; /* wrap width; 0 = no wrap */
23 : int ansi; /* emit ANSI escapes? */
24 : int bold; /* depth counter */
25 : int italic;
26 : int uline;
27 : int color_fg; /* depth: foreground color set by parse_style */
28 : int color_bg; /* depth: background color set by parse_style */
29 : int skip; /* depth: no output (script/style) */
30 : int pre; /* depth: no wrap */
31 : int pending_nl; /* buffered newlines to emit: 0, 1, or 2 */
32 : int bq; /* blockquote depth */
33 : ListFrame lists[LIST_MAX];
34 : int list_top;
35 : } RS;
36 :
37 : /* ── Buffer ──────────────────────────────────────────────────────────── */
38 :
39 17106 : static void rs_push(RS *rs, char c) {
40 17106 : if (rs->len + 2 > rs->cap) {
41 192 : size_t nc = rs->cap ? rs->cap * 2 : 512;
42 192 : char *t = realloc(rs->buf, nc);
43 192 : if (!t) return;
44 192 : rs->buf = t; rs->cap = nc;
45 : }
46 17106 : rs->buf[rs->len++] = c;
47 17106 : rs->buf[rs->len] = '\0';
48 : }
49 5887 : static void rs_str(RS *rs, const char *s) { for (; *s; s++) rs_push(rs, *s); }
50 1996 : static void rs_write(RS *rs, const char *s, int n)
51 9650 : { for (int i = 0; i < n; i++) rs_push(rs, s[i]); }
52 :
53 : /* ── UTF-8 helpers ───────────────────────────────────────────────────── */
54 :
55 7843 : static uint32_t utf8_adv(const char **p) {
56 7843 : unsigned char c = (unsigned char)**p;
57 : uint32_t cp; int ex;
58 7843 : if (c < 0x80) { cp = c; ex = 0; }
59 104 : else if (c < 0xC0) { (*p)++; return 0xFFFD; }
60 104 : else if (c < 0xE0) { cp = c & 0x1F; ex = 1; }
61 36 : else if (c < 0xF0) { cp = c & 0x0F; ex = 2; }
62 1 : else { cp = c & 0x07; ex = 3; }
63 7843 : (*p)++;
64 7984 : for (int i = 0; i < ex; i++) {
65 141 : if ((**p & 0xC0) != 0x80) return 0xFFFD;
66 141 : cp = (cp << 6) | (**p & 0x3F); (*p)++;
67 : }
68 7843 : return cp;
69 : }
70 :
71 35 : static int str_vis_width(const char *s) {
72 35 : int w = 0;
73 367 : while (*s) {
74 332 : if ((unsigned char)*s == 0x1B && s[1] == '[') {
75 4 : s += 2; while (*s && *s != 'm') s++; if (*s) s++;
76 2 : continue;
77 : }
78 330 : w += html_medium_char_width(utf8_adv(&s));
79 : }
80 35 : return w;
81 : }
82 :
83 : /* ── Newline / prefix management ─────────────────────────────────────── */
84 :
85 1754 : static void emit_bq_prefix(RS *rs) {
86 1754 : if (rs->col == 0 && rs->bq > 0) {
87 70 : for (int i = 0; i < rs->bq; i++) { rs_push(rs, '>'); rs_push(rs, ' '); }
88 35 : rs->col = rs->bq * 2;
89 : }
90 1754 : }
91 :
92 1446 : static void flush_nl(RS *rs) {
93 1446 : if (!rs->pending_nl) return;
94 : /* Count consecutive newlines already at the end of the buffer so we
95 : * never accumulate more than pending_nl in a row. */
96 444 : int trailing = 0;
97 444 : for (int i = (int)rs->len - 1; i >= 0 && rs->buf[i] == '\n'; i--)
98 0 : trailing++;
99 994 : for (int i = trailing; i < rs->pending_nl; i++) rs_push(rs, '\n');
100 444 : rs->col = 0;
101 444 : rs->pending_nl = 0;
102 444 : emit_bq_prefix(rs);
103 : }
104 :
105 1023 : static void req_nl(RS *rs, int n) {
106 1023 : if (rs->pending_nl < n) rs->pending_nl = n;
107 1023 : }
108 456 : static void block_open(RS *rs) { if (rs->len > 0) req_nl(rs, 1); }
109 279 : static void block_close(RS *rs) { req_nl(rs, 1); }
110 106 : static void para_open(RS *rs) { if (rs->len > 0) req_nl(rs, 2); }
111 106 : static void para_close(RS *rs) { req_nl(rs, 2); }
112 :
113 : /* ── ANSI helpers ────────────────────────────────────────────────────── */
114 :
115 958 : static void esc(RS *rs, const char *e) { if (rs->ansi) rs_str(rs, e); }
116 94 : static void open_bold(RS *rs) { if (rs->bold++ == 0) esc(rs, "\033[1m"); }
117 129 : static void close_bold(RS *rs) { if (--rs->bold == 0) esc(rs, "\033[22m"); }
118 49 : static void open_italic(RS *rs) { if (rs->italic++ == 0) esc(rs, "\033[3m"); }
119 80 : static void close_italic(RS *rs) { if (--rs->italic == 0) esc(rs, "\033[23m"); }
120 42 : static void open_uline(RS *rs) { if (rs->uline++ == 0) esc(rs, "\033[4m"); }
121 78 : static void close_uline(RS *rs) { if (--rs->uline == 0) esc(rs, "\033[24m"); }
122 :
123 : /* ── Inline CSS ──────────────────────────────────────────────────────── */
124 :
125 279 : static int hex_val(char c) {
126 279 : if (c>='0'&&c<='9') return c-'0';
127 38 : if (c>='a'&&c<='f') return c-'a'+10;
128 38 : if (c>='A'&&c<='F') return c-'A'+10;
129 6 : return 0;
130 : }
131 : static const struct { const char *name; int r,g,b; } CSS_COLORS[] = {
132 : {"black",0,0,0},{"silver",192,192,192},{"gray",128,128,128},
133 : {"white",255,255,255},{"maroon",128,0,0},{"red",255,0,0},
134 : {"purple",128,0,128},{"fuchsia",255,0,255},{"green",0,128,0},
135 : {"lime",0,255,0},{"olive",128,128,0},{"yellow",255,255,0},
136 : {"navy",0,0,128},{"blue",0,0,255},{"teal",0,128,128},{"aqua",0,255,255},
137 : {NULL,0,0,0}
138 : };
139 149 : static void apply_color(RS *rs, const char *v, int fg) {
140 242 : if (!rs->ansi) return;
141 : /* Background colors are never emitted: they break dark-theme terminals
142 : * and produce unreadable combinations when the email author's palette
143 : * does not match the user's terminal theme. */
144 149 : if (!fg) return;
145 105 : int r=-1,g=-1,b=-1;
146 105 : while (*v==' ') v++;
147 105 : if (*v=='#') {
148 64 : v++;
149 64 : size_t len=strlen(v); while(len>0&&v[len-1]==' ') len--;
150 64 : if (len==6) {
151 29 : r=hex_val(v[0])*16+hex_val(v[1]);
152 29 : g=hex_val(v[2])*16+hex_val(v[3]);
153 29 : b=hex_val(v[4])*16+hex_val(v[5]);
154 35 : } else if (len==3) {
155 35 : r=hex_val(v[0])*17; g=hex_val(v[1])*17; b=hex_val(v[2])*17;
156 : }
157 : } else {
158 279 : for (int i=0; CSS_COLORS[i].name; i++) {
159 279 : size_t nl=strlen(CSS_COLORS[i].name);
160 279 : if (strncasecmp(v, CSS_COLORS[i].name, nl)==0) {
161 41 : r=CSS_COLORS[i].r; g=CSS_COLORS[i].g; b=CSS_COLORS[i].b; break;
162 : }
163 : }
164 : }
165 105 : if (r<0) return;
166 : /* Suppress dark foreground colors (max component < 160): they are
167 : * unreadable on dark-theme terminals and common in newsletter HTML
168 : * (e.g. #333, #666, gray). Only bright colours are emitted. */
169 105 : int mx = r > g ? (r > b ? r : b) : (g > b ? g : b);
170 105 : if (mx < 160) return;
171 56 : char e[32]; snprintf(e,sizeof(e),"\033[38;2;%d;%d;%dm",r,g,b);
172 56 : rs_str(rs, e);
173 56 : rs->color_fg++;
174 : }
175 191 : static void parse_style(RS *rs, const char *style) {
176 191 : if (!style || !rs->ansi) return;
177 160 : const char *p = style;
178 411 : while (*p) {
179 317 : while (*p==' '||*p=='\t') p++;
180 2715 : const char *ps=p; while(*p&&*p!=':') p++;
181 251 : if (!*p) break;
182 251 : size_t pl=(size_t)(p-ps); while(pl>0&&ps[pl-1]==' ') pl--;
183 383 : p++; while(*p==' ') p++;
184 1643 : const char *vs=p; while(*p&&*p!=';') p++;
185 251 : size_t vl=(size_t)(p-vs); if(*p==';') p++;
186 251 : char prop[32]={0},val[64]={0};
187 251 : if(pl<sizeof(prop)) memcpy(prop,ps,pl);
188 251 : if(vl>0&&vl<sizeof(val)) memcpy(val,vs,vl);
189 251 : if (!strcasecmp(prop,"font-weight")&&!strncasecmp(val,"bold",4)) { esc(rs,"\033[1m"); rs->bold++; }
190 216 : else if (!strcasecmp(prop,"font-style")&&!strncasecmp(val,"italic",6)) { esc(rs,"\033[3m"); rs->italic++; }
191 185 : else if (!strcasecmp(prop,"text-decoration")&&!strncasecmp(val,"underline",9)){ esc(rs,"\033[4m"); rs->uline++; }
192 149 : else if (!strcasecmp(prop,"color")) apply_color(rs,val,1);
193 44 : else if (!strcasecmp(prop,"background-color")) apply_color(rs,val,0);
194 : }
195 : }
196 :
197 : /* ── Text emission with word wrap ────────────────────────────────────── */
198 :
199 44 : static void emit_wrap_nl(RS *rs) {
200 : /* remove trailing space before the newline */
201 44 : if (rs->len > 0 && rs->buf[rs->len-1] == ' ') {
202 9 : rs->len--; rs->buf[rs->len] = '\0'; rs->col--;
203 : }
204 44 : rs_push(rs, '\n');
205 44 : rs->col = 0;
206 44 : emit_bq_prefix(rs);
207 44 : }
208 :
209 1012 : static void emit_text(RS *rs, const char *text) {
210 1012 : if (!text || !*text || rs->skip) return;
211 1012 : flush_nl(rs);
212 1012 : emit_bq_prefix(rs);
213 :
214 1012 : if (rs->pre) {
215 571 : for (const char *p = text; *p; ) {
216 537 : if (*p == '\r') { p++; continue; }
217 537 : if (*p == '\n') {
218 2 : rs_push(rs, '\n'); rs->col = 0;
219 2 : emit_bq_prefix(rs);
220 2 : p++; continue;
221 : }
222 535 : const char *s = p;
223 535 : rs->col += html_medium_char_width(utf8_adv(&p));
224 535 : rs_write(rs, s, (int)(p - s));
225 : }
226 34 : return;
227 : }
228 :
229 : /* Normal mode: word-wrap */
230 3275 : for (const char *p = text; *p; ) {
231 2297 : if (isspace((unsigned char)*p)) {
232 : /* Collapse whitespace; only emit space if not at line start */
233 836 : int at_start = (rs->col <= rs->bq * 2);
234 836 : int already_space = (rs->len > 0 && rs->buf[rs->len-1] == ' ');
235 836 : if (!at_start && !already_space) {
236 797 : rs_push(rs, ' ');
237 797 : rs->col++;
238 : }
239 1704 : while (*p && isspace((unsigned char)*p)) p++;
240 836 : continue;
241 : }
242 :
243 : /* Collect one word */
244 1461 : const char *ws = p;
245 1461 : int ww = 0;
246 8439 : while (*p && !isspace((unsigned char)*p)) {
247 6978 : const char *q = p;
248 6978 : ww += html_medium_char_width(utf8_adv(&p));
249 : (void)q;
250 : }
251 1461 : int wlen = (int)(p - ws);
252 :
253 : /* URL tokens (http://, https://, ftp://, mailto:) are always placed
254 : * on their own line so terminal URL-recognition works reliably.
255 : * They are never broken regardless of width. */
256 1878 : int is_url = (wlen >= 6) && (
257 417 : strncmp(ws, "http://", 7) == 0 ||
258 414 : strncmp(ws, "https://", 8) == 0 ||
259 372 : strncmp(ws, "ftp://", 6) == 0 ||
260 372 : strncmp(ws, "mailto:", 7) == 0);
261 :
262 1461 : if (is_url) {
263 45 : if (rs->col > rs->bq * 2) emit_wrap_nl(rs); /* start own line */
264 45 : esc(rs, "\033[34m"); /* blue */
265 45 : rs_write(rs, ws, wlen);
266 45 : esc(rs, "\033[39m"); /* reset fg */
267 45 : rs->col += ww;
268 45 : rs_push(rs, '\n'); /* trailing newline: next content fresh line */
269 45 : rs->col = 0;
270 45 : emit_bq_prefix(rs);
271 45 : continue;
272 : }
273 :
274 : /* Wrap if needed (never wrap an otherwise-empty line) */
275 1416 : if (rs->width > 0 && rs->col > rs->bq * 2 && rs->col + ww > rs->width)
276 4 : emit_wrap_nl(rs);
277 :
278 1416 : rs_write(rs, ws, wlen);
279 1416 : rs->col += ww;
280 : }
281 : }
282 :
283 : /* ── Whitespace helpers ──────────────────────────────────────────────── */
284 :
285 : /** Returns 1 if s contains only ASCII whitespace, U+00A0 nbsp, U+200C zwnj,
286 : * U+200D zwj, or U+00AD soft-hyphen — i.e. invisible/non-printing content. */
287 37 : static int is_blank_str(const char *s) {
288 37 : const unsigned char *p = (const unsigned char *)s;
289 41 : while (*p) {
290 38 : if (*p <= ' ') { p++; continue; }
291 36 : if (p[0]==0xC2 && p[1]==0xA0) { p+=2; continue; } /* nbsp */
292 35 : if (p[0]==0xC2 && p[1]==0xAD) { p+=2; continue; } /* shy */
293 35 : if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8C) { p+=3; continue; } /* zwnj */
294 34 : if (p[0]==0xE2 && p[1]==0x80 && p[2]==0x8D) { p+=3; continue; } /* zwj */
295 34 : return 0;
296 : }
297 3 : return 1;
298 : }
299 :
300 : /** Collapses runs of >1 consecutive blank lines to exactly one blank line.
301 : * Also trims trailing ASCII/nbsp/zwnj whitespace from each line.
302 : * Preserves up to one trailing blank line.
303 : * Takes ownership of s (frees it); returns a new heap string (or s on OOM). */
304 192 : static char *compact_lines(char *s) {
305 192 : if (!s) return s;
306 192 : size_t n = strlen(s);
307 192 : char *out = malloc(n + 1);
308 192 : if (!out) return s;
309 :
310 192 : const unsigned char *p = (const unsigned char *)s;
311 192 : char *q = out;
312 192 : int blank_pending = 0; /* at most 1: whether a blank line is pending */
313 192 : int have_content = 0;
314 :
315 989 : while (*p) {
316 797 : const unsigned char *ls = p;
317 17253 : while (*p && *p != '\n') p++;
318 797 : int had_nl = (*p == '\n');
319 797 : const unsigned char *le = p;
320 797 : if (had_nl) p++;
321 :
322 : /* Trim trailing invisible chars (ASCII ws, nbsp C2A0, shy C2AD, zwnj/zwj E2808C/8D) */
323 832 : while (le > ls) {
324 725 : if (le[-1] <= ' ') { le--; continue; }
325 690 : if (le>=ls+2 && le[-2]==0xC2 && (le[-1]==0xA0||le[-1]==0xAD)) { le-=2; continue; }
326 690 : if (le>=ls+3 && le[-3]==0xE2 && le[-2]==0x80 &&
327 0 : (le[-1]==0x8C||le[-1]==0x8D)) { le-=3; continue; }
328 690 : break;
329 : }
330 :
331 797 : if (le == ls) { /* blank line */
332 107 : if (had_nl) blank_pending = 1;
333 : } else { /* non-blank line */
334 690 : if (blank_pending && have_content) *q++ = '\n';
335 690 : blank_pending = 0;
336 690 : have_content = 1;
337 690 : memcpy(q, ls, (size_t)(le - ls));
338 690 : q += (size_t)(le - ls);
339 690 : if (had_nl) *q++ = '\n';
340 : }
341 : }
342 : /* Preserve at most one trailing blank line */
343 192 : if (blank_pending) *q++ = '\n';
344 :
345 192 : *q = '\0';
346 192 : free(s);
347 192 : return out;
348 : }
349 :
350 : /* ── Tag open / close ────────────────────────────────────────────────── */
351 :
352 : static void traverse(RS *rs, const HtmlNode *node);
353 :
354 1119 : static void tag_open(RS *rs, const HtmlNode *node) {
355 1119 : if (!node->tag) return;
356 1119 : const char *t = node->tag;
357 :
358 : /* Inline styles (applied before tag-specific behavior) */
359 1119 : const char *style = html_attr_get(node, "style");
360 1119 : if (style) parse_style(rs, style);
361 :
362 1119 : if (!strcmp(t,"b")||!strcmp(t,"strong")) open_bold(rs);
363 1059 : else if (!strcmp(t,"i")||!strcmp(t,"em")) open_italic(rs);
364 1010 : else if (!strcmp(t,"u")) open_uline(rs);
365 968 : else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[9m");
366 896 : else if (!strcmp(t,"br")) req_nl(rs, 1);
367 895 : else if (!strcmp(t,"hr")) {
368 33 : block_open(rs); flush_nl(rs); emit_bq_prefix(rs);
369 33 : int w = rs->width > 0 ? rs->width - rs->col : 20;
370 2605 : for (int i = 0; i < w; i++) rs_push(rs, '-');
371 33 : rs->col += w;
372 33 : block_close(rs);
373 : }
374 862 : else if (!strcmp(t,"p")) para_open(rs);
375 790 : else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
376 724 : !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
377 790 : !strcmp(t,"nav")||!strcmp(t,"aside")) block_open(rs);
378 724 : else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
379 34 : para_open(rs); open_bold(rs);
380 : }
381 690 : else if (!strcmp(t,"ul")) {
382 36 : if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){0, 0};
383 36 : block_open(rs);
384 : }
385 654 : else if (!strcmp(t,"ol")) {
386 34 : if (rs->list_top < LIST_MAX) rs->lists[rs->list_top++] = (ListFrame){1, 0};
387 34 : block_open(rs);
388 : }
389 620 : else if (!strcmp(t,"li")) {
390 136 : block_open(rs);
391 136 : flush_nl(rs);
392 136 : emit_bq_prefix(rs);
393 136 : if (rs->list_top > 0 && rs->lists[rs->list_top-1].is_ol) {
394 67 : rs->lists[rs->list_top-1].cnt++;
395 : char buf[16];
396 67 : int n = snprintf(buf, sizeof(buf), "%d. ", rs->lists[rs->list_top-1].cnt);
397 67 : rs_str(rs, buf);
398 67 : rs->col += n;
399 : } else {
400 : /* U+2022 BULLET: UTF-8 E2 80 A2 */
401 69 : rs_push(rs,(char)0xE2); rs_push(rs,(char)0x80); rs_push(rs,(char)0xA2);
402 69 : rs_push(rs, ' ');
403 69 : rs->col += 2; /* bullet=1 col, space=1 col */
404 : }
405 : }
406 484 : else if (!strcmp(t,"blockquote")) {
407 35 : block_open(rs);
408 35 : rs->bq++;
409 : /* if something was already on the line, start fresh */
410 35 : if (rs->col > 0) { flush_nl(rs); } else emit_bq_prefix(rs);
411 : }
412 449 : else if (!strcmp(t,"pre")) { block_open(rs); rs->pre++; }
413 415 : else if (!strcmp(t,"code")) { /* inline: no special formatting */ }
414 415 : else if (!strcmp(t,"img")) {
415 37 : const char *alt = html_attr_get(node, "alt");
416 37 : if (alt && *alt && !is_blank_str(alt)) {
417 34 : flush_nl(rs); emit_bq_prefix(rs);
418 34 : rs_push(rs, '[');
419 34 : rs_str(rs, alt);
420 34 : rs_push(rs, ']');
421 34 : rs->col += 2 + str_vis_width(alt);
422 : }
423 : }
424 378 : else if (!strcmp(t,"a")) {
425 45 : const char *href = html_attr_get(node, "href");
426 45 : if (href && *href && href[0] != '#' && strncmp(href, "javascript:", 11) != 0)
427 41 : if (rs->ansi) { esc(rs, "\033[34m"); rs->color_fg++; }
428 : }
429 333 : else if (!strcmp(t,"td")||!strcmp(t,"th")) {
430 79 : if (rs->col > rs->bq * 2) { rs_push(rs, '\t'); rs->col++; }
431 : }
432 254 : else if (!strcmp(t,"tr")||!strcmp(t,"table")) block_open(rs);
433 172 : else if (!strcmp(t,"script")||!strcmp(t,"style")) rs->skip++;
434 172 : else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_open(rs);
435 172 : else if (!strcmp(t,"input")) {
436 1 : const char *val = html_attr_get(node, "value");
437 1 : if (val && *val) {
438 1 : flush_nl(rs); emit_bq_prefix(rs);
439 1 : rs_str(rs, val);
440 1 : rs->col += str_vis_width(val);
441 : }
442 : }
443 : /* __root__ and unknown tags: traverse children unchanged */
444 : }
445 :
446 1119 : static void tag_close(RS *rs, const HtmlNode *node) {
447 1119 : if (!node->tag) return;
448 1119 : const char *t = node->tag;
449 :
450 1119 : if (!strcmp(t,"b")||!strcmp(t,"strong")) close_bold(rs);
451 1059 : else if (!strcmp(t,"i")||!strcmp(t,"em")) close_italic(rs);
452 1010 : else if (!strcmp(t,"u")) close_uline(rs);
453 968 : else if (!strcmp(t,"s")||!strcmp(t,"del")||!strcmp(t,"strike")) esc(rs,"\033[29m");
454 896 : else if (!strcmp(t,"p")) para_close(rs);
455 824 : else if (!strcmp(t,"div")||!strcmp(t,"article")||!strcmp(t,"section")||
456 758 : !strcmp(t,"main")||!strcmp(t,"header")||!strcmp(t,"footer")||
457 824 : !strcmp(t,"nav")||!strcmp(t,"aside")) block_close(rs);
458 758 : else if (t[0]=='h' && t[1]>='1' && t[1]<='6' && !t[2]) {
459 34 : close_bold(rs); para_close(rs);
460 : }
461 724 : else if (!strcmp(t,"ul")||!strcmp(t,"ol")) {
462 70 : if (rs->list_top > 0) rs->list_top--;
463 70 : block_close(rs);
464 : }
465 654 : else if (!strcmp(t,"li")) req_nl(rs, 1);
466 518 : else if (!strcmp(t,"blockquote")) {
467 35 : if (rs->bq > 0) rs->bq--;
468 35 : block_close(rs);
469 : }
470 483 : else if (!strcmp(t,"pre")) { if (rs->pre>0) rs->pre--; block_close(rs); }
471 449 : else if (!strcmp(t,"a")) {
472 45 : const char *href = html_attr_get(node, "href");
473 45 : if (href && *href && href[0] != '#' &&
474 42 : strncmp(href, "javascript:", 11) != 0) {
475 41 : if (rs->ansi && rs->color_fg > 0) { esc(rs, "\033[39m"); rs->color_fg--; }
476 41 : esc(rs, "\033[34m");
477 41 : emit_text(rs, href);
478 41 : esc(rs, "\033[39m");
479 : }
480 : }
481 404 : else if (!strcmp(t,"script")||!strcmp(t,"style")) { if (rs->skip>0) rs->skip--; }
482 404 : else if (!strcmp(t,"tr")) req_nl(rs, 1);
483 363 : else if (!strcmp(t,"table")) block_close(rs);
484 322 : else if (!strcmp(t,"textarea")||!strcmp(t,"button")) block_close(rs);
485 : }
486 :
487 : /* ── Tree traversal ──────────────────────────────────────────────────── */
488 :
489 2090 : static void traverse(RS *rs, const HtmlNode *node) {
490 2090 : if (!node) return;
491 2090 : if (node->type == HTML_NODE_TEXT) {
492 971 : emit_text(rs, node->text);
493 971 : return;
494 : }
495 : /* Snapshot style depth so parse_style side-effects from inline
496 : * style= attributes are balanced even when tag_close has no handler
497 : * for this tag (e.g. <a>, <span>, <td> with style="…"). */
498 1119 : int bold_sv = rs->bold;
499 1119 : int italic_sv = rs->italic;
500 1119 : int uline_sv = rs->uline;
501 1119 : int color_fg_sv = rs->color_fg;
502 1119 : int color_bg_sv = rs->color_bg;
503 1119 : tag_open(rs, node);
504 2907 : for (const HtmlNode *c = node->first_child; c; c = c->next_sibling)
505 1788 : traverse(rs, c);
506 1119 : tag_close(rs, node);
507 : /* Close any depth-tracked style that tag_close left open */
508 1155 : while (rs->uline > uline_sv) close_uline(rs);
509 1150 : while (rs->italic > italic_sv) close_italic(rs);
510 1154 : while (rs->bold > bold_sv) close_bold(rs);
511 1119 : if (rs->ansi) {
512 800 : if (rs->color_fg > color_fg_sv) { esc(rs, "\033[39m"); rs->color_fg = color_fg_sv; }
513 800 : if (rs->color_bg > color_bg_sv) { esc(rs, "\033[49m"); rs->color_bg = color_bg_sv; }
514 : }
515 : }
516 :
517 : /* ── Public API ──────────────────────────────────────────────────────── */
518 :
519 199 : char *html_render(const char *html, int width, int ansi) {
520 199 : if (!html) return NULL;
521 :
522 198 : HtmlNode *root = html_parse(html);
523 198 : if (!root) {
524 0 : char *empty = malloc(1);
525 0 : if (empty) *empty = '\0';
526 0 : return empty;
527 : }
528 :
529 : RS rs;
530 198 : memset(&rs, 0, sizeof(rs));
531 198 : rs.width = width;
532 198 : rs.ansi = ansi;
533 :
534 : /* Traverse root's children (root itself is synthetic __root__) */
535 500 : for (const HtmlNode *c = root->first_child; c; c = c->next_sibling)
536 302 : traverse(&rs, c);
537 :
538 : /* Flush trailing newlines */
539 198 : flush_nl(&rs);
540 :
541 198 : html_node_free(root);
542 :
543 198 : if (!rs.buf) {
544 6 : char *empty = malloc(1);
545 6 : if (empty) *empty = '\0';
546 6 : return empty;
547 : }
548 192 : return compact_lines(rs.buf);
549 : }
|