Statistics
| Branch: | Tag: | Revision:

root / libsylph / html.c @ 8d7dcace

History | View | Annotate | Download (15.8 KB)

1
/*
2
 * LibSylph -- E-Mail client library
3
 * Copyright (C) 1999-2011 Hiroyuki Yamamoto
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2.1 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
 */
19

    
20
#include <glib.h>
21
#include <stdio.h>
22
#include <string.h>
23
#include <ctype.h>
24

    
25
#include "html.h"
26
#include "codeconv.h"
27
#include "utils.h"
28

    
29
#define HTMLBUFSIZE        8192
30
#define HR_STR                "------------------------------------------------"
31

    
32
typedef struct _HTMLSymbol        HTMLSymbol;
33

    
34
struct _HTMLSymbol
35
{
36
        gchar *const key;
37
        gchar *const val;
38
};
39

    
40
static HTMLSymbol symbol_list[] = {
41
        {"&lt;"    , "<"},
42
        {"&gt;"    , ">"},
43
        {"&amp;"   , "&"},
44
        {"&quot;"  , "\""}
45
};
46

    
47
/* &#160; - &#255; */
48
static HTMLSymbol latin_symbol_list[] = {
49
        {"&nbsp;"  , " "},
50
        /* {"&nbsp;"  , "\302\240"}, */
51
        {"&iexcl;" , "\302\241"},
52
        {"&cent;"  , "\302\242"},
53
        {"&pound;" , "\302\243"},
54
        {"&curren;", "\302\244"},
55
        {"&yen;"   , "\302\245"},
56
        {"&brvbar;", "\302\246"},
57
        {"&sect;"  , "\302\247"},
58
        {"&uml;"   , "\302\250"},
59
        {"&copy;"  , "\302\251"},
60
        {"&ordf;"  , "\302\252"},
61
        {"&laquo;" , "\302\253"},
62
        {"&not;"   , "\302\254"},
63
        {"&shy;"   , "\302\255"},
64
        {"&reg;"   , "\302\256"},
65
        {"&macr;"  , "\302\257"},
66
        {"&deg;"   , "\302\260"},
67
        {"&plusm;" , "\302\261"},
68
        {"&sup2;"  , "\302\262"},
69
        {"&sup3;"  , "\302\263"},
70
        {"&acute;" , "\302\264"},
71
        {"&micro;" , "\302\265"},
72
        {"&para;"  , "\302\266"},
73
        {"&middot;", "\302\267"},
74
        {"&cedil;" , "\302\270"},
75
        {"&sup1;"  , "\302\271"},
76
        {"&ordm;"  , "\302\272"},
77
        {"&raquo;" , "\302\273"},
78
        {"&frac14;", "\302\274"},
79
        {"&frac12;", "\302\275"},
80
        {"&frac34;", "\302\276"},
81
        {"&iquest;", "\302\277"},
82

    
83
        {"&Agrave;", "\303\200"},
84
        {"&Aacute;", "\303\201"},
85
        {"&Acirc;" , "\303\202"},
86
        {"&Atilde;", "\303\203"},
87
        {"&Auml;"  , "\303\204"},
88
        {"&Aring;" , "\303\205"},
89
        {"&AElig;" , "\303\206"},
90
        {"&Ccedil;", "\303\207"},
91
        {"&Egrave;", "\303\210"},
92
        {"&Eacute;", "\303\211"},
93
        {"&Ecirc;" , "\303\212"},
94
        {"&Euml;"  , "\303\213"},
95
        {"&Igrave;", "\303\214"},
96
        {"&Iacute;", "\303\215"},
97
        {"&Icirc;" , "\303\216"},
98
        {"&Iuml;"  , "\303\217"},
99
        {"&ETH;"   , "\303\220"},
100
        {"&Ntilde;", "\303\221"},
101
        {"&Ograve;", "\303\222"},
102
        {"&Oacute;", "\303\223"},
103
        {"&Ocirc;" , "\303\224"},
104
        {"&Otilde;", "\303\225"},
105
        {"&Ouml;"  , "\303\226"},
106
        {"&times;" , "\303\227"},
107
        {"&Oslash;", "\303\230"},
108
        {"&Ugrave;", "\303\231"},
109
        {"&Uacute;", "\303\232"},
110
        {"&Ucirc;" , "\303\233"},
111
        {"&Uuml;"  , "\303\234"},
112
        {"&Yacute;", "\303\235"},
113
        {"&THORN;" , "\303\236"},
114
        {"&szlig;" , "\303\237"},
115
        {"&agrave;", "\303\240"},
116
        {"&aacute;", "\303\241"},
117
        {"&acirc;" , "\303\242"},
118
        {"&atilde;", "\303\243"},
119
        {"&auml;"  , "\303\244"},
120
        {"&aring;" , "\303\245"},
121
        {"&aelig;" , "\303\246"},
122
        {"&ccedil;", "\303\247"},
123
        {"&egrave;", "\303\250"},
124
        {"&eacute;", "\303\251"},
125
        {"&ecirc;" , "\303\252"},
126
        {"&euml;"  , "\303\253"},
127
        {"&igrave;", "\303\254"},
128
        {"&iacute;", "\303\255"},
129
        {"&icirc;" , "\303\256"},
130
        {"&iuml;"  , "\303\257"},
131
        {"&eth;"   , "\303\260"},
132
        {"&ntilde;", "\303\261"},
133
        {"&ograve;", "\303\262"},
134
        {"&oacute;", "\303\263"},
135
        {"&ocirc;" , "\303\264"},
136
        {"&otilde;", "\303\265"},
137
        {"&ouml;"  , "\303\266"},
138
        {"&divide;", "\303\267"},
139
        {"&oslash;", "\303\270"},
140
        {"&ugrave;", "\303\271"},
141
        {"&uacute;", "\303\272"},
142
        {"&ucirc;" , "\303\273"},
143
        {"&uuml;"  , "\303\274"},
144
        {"&yacute;", "\303\275"},
145
        {"&thorn;" , "\303\276"},
146
        {"&yuml;"  , "\303\277"}
147
};
148

    
149
static HTMLSymbol other_symbol_list[] = {
150
        /* Non-standard? */
151
        {"&#133;"  , "..."},
152
        {"&#146;"  , "'"},
153
        {"&#150;"  , "-"},
154
        {"&#153;"  , "\xe2\x84\xa2"},
155
        {"&#156;"  , "\xc5\x93"},
156

    
157
        /* Symbolic characters */
158
        {"&trade;" , "\xe2\x84\xa2"},
159

    
160
        /* Latin extended */
161
        {"&OElig;" , "\xc5\x92"},
162
        {"&oelig;" , "\xc5\x93"},
163
        {"&Scaron;", "\xc5\xa0"},
164
        {"&scaron;", "\xc5\xa1"},
165
        {"&Yuml;"  , "\xc5\xb8"},
166
        {"&circ;"  , "\xcb\x86"},
167
        {"&tilde;" , "\xcb\x9c"},
168
        {"&fnof;"  , "\xc6\x92"},
169
};
170

    
171
static GHashTable *default_symbol_table;
172

    
173
static HTMLState html_read_line                (HTMLParser        *parser);
174

    
175
static void html_append_char                (HTMLParser        *parser,
176
                                         gchar                 ch);
177
static void html_append_str                (HTMLParser        *parser,
178
                                         const gchar        *str,
179
                                         gint                 len);
180

    
181
static gchar *html_find_char                (HTMLParser        *parser,
182
                                         gchar                 ch);
183
static gchar *html_find_str                (HTMLParser        *parser,
184
                                         const gchar        *str);
185
static gchar *html_find_str_case        (HTMLParser        *parser,
186
                                         const gchar        *str);
187

    
188
static HTMLState html_parse_tag                (HTMLParser        *parser);
189
static void html_parse_special                (HTMLParser        *parser);
190
static void html_get_parenthesis        (HTMLParser        *parser,
191
                                         gchar                *buf,
192
                                         gint                 len);
193

    
194

    
195
HTMLParser *html_parser_new(FILE *fp, CodeConverter *conv)
196
{
197
        HTMLParser *parser;
198

    
199
        g_return_val_if_fail(fp != NULL, NULL);
200
        g_return_val_if_fail(conv != NULL, NULL);
201

    
202
        parser = g_new0(HTMLParser, 1);
203
        parser->fp = fp;
204
        parser->conv = conv;
205
        parser->str = g_string_new(NULL);
206
        parser->buf = g_string_new(NULL);
207
        parser->bufp = parser->buf->str;
208
        parser->state = HTML_NORMAL;
209
        parser->href = NULL;
210
        parser->newline = TRUE;
211
        parser->empty_line = TRUE;
212
        parser->space = FALSE;
213
        parser->pre = FALSE;
214

    
215
#define SYMBOL_TABLE_ADD(table, list) \
216
{ \
217
        gint i; \
218
 \
219
        for (i = 0; i < sizeof(list) / sizeof(list[0]); i++) \
220
                g_hash_table_insert(table, list[i].key, list[i].val); \
221
}
222

    
223
        if (!default_symbol_table) {
224
                default_symbol_table =
225
                        g_hash_table_new(g_str_hash, g_str_equal);
226
                SYMBOL_TABLE_ADD(default_symbol_table, symbol_list);
227
                SYMBOL_TABLE_ADD(default_symbol_table, latin_symbol_list);
228
                SYMBOL_TABLE_ADD(default_symbol_table, other_symbol_list);
229
        }
230

    
231
#undef SYMBOL_TABLE_ADD
232

    
233
        parser->symbol_table = default_symbol_table;
234

    
235
        return parser;
236
}
237

    
238
void html_parser_destroy(HTMLParser *parser)
239
{
240
        g_string_free(parser->str, TRUE);
241
        g_string_free(parser->buf, TRUE);
242
        g_free(parser->href);
243
        g_free(parser);
244
}
245

    
246
const gchar *html_parse(HTMLParser *parser)
247
{
248
        parser->state = HTML_NORMAL;
249
        g_string_truncate(parser->str, 0);
250

    
251
        if (*parser->bufp == '\0') {
252
                g_string_truncate(parser->buf, 0);
253
                parser->bufp = parser->buf->str;
254
                if (html_read_line(parser) == HTML_EOF)
255
                        return NULL;
256
        }
257

    
258
        while (*parser->bufp != '\0') {
259
                switch (*parser->bufp) {
260
                case '<':
261
                        if (parser->str->len == 0)
262
                                html_parse_tag(parser);
263
                        else
264
                                return parser->str->str;
265
                        break;
266
                case '&':
267
                        html_parse_special(parser);
268
                        break;
269
                case ' ':
270
                case '\t':
271
                case '\r':
272
                case '\n':
273
                        if (parser->bufp[0] == '\r' && parser->bufp[1] == '\n')
274
                                parser->bufp++;
275

    
276
                        if (!parser->pre) {
277
                                if (!parser->newline)
278
                                        parser->space = TRUE;
279

    
280
                                parser->bufp++;
281
                                break;
282
                        }
283
                        /* fallthrough */
284
                default:
285
                        html_append_char(parser, *parser->bufp++);
286
                }
287
        }
288

    
289
        return parser->str->str;
290
}
291

    
292
static HTMLState html_read_line(HTMLParser *parser)
293
{
294
        gchar buf[HTMLBUFSIZE];
295
        gchar *conv_str;
296
        gint index;
297

    
298
        if (fgets(buf, sizeof(buf), parser->fp) == NULL) {
299
                parser->state = HTML_EOF;
300
                return HTML_EOF;
301
        }
302

    
303
        conv_str = conv_convert(parser->conv, buf);
304
        if (!conv_str) {
305
                index = parser->bufp - parser->buf->str;
306

    
307
                conv_str = conv_utf8todisp(buf, NULL);
308
                g_string_append(parser->buf, conv_str);
309
                g_free(conv_str);
310

    
311
                parser->bufp = parser->buf->str + index;
312

    
313
                return HTML_CONV_FAILED;
314
        }
315

    
316
        index = parser->bufp - parser->buf->str;
317

    
318
        g_string_append(parser->buf, conv_str);
319
        g_free(conv_str);
320

    
321
        parser->bufp = parser->buf->str + index;
322

    
323
        return HTML_NORMAL;
324
}
325

    
326
static void html_append_char(HTMLParser *parser, gchar ch)
327
{
328
        GString *str = parser->str;
329

    
330
        if (!parser->pre && parser->space) {
331
                g_string_append_c(str, ' ');
332
                parser->space = FALSE;
333
        }
334

    
335
        g_string_append_c(str, ch);
336

    
337
        parser->empty_line = FALSE;
338
        if (ch == '\n') {
339
                parser->newline = TRUE;
340
                if (str->len > 1 && str->str[str->len - 2] == '\n')
341
                        parser->empty_line = TRUE;
342
        } else
343
                parser->newline = FALSE;
344
}
345

    
346
static void html_append_str(HTMLParser *parser, const gchar *str, gint len)
347
{
348
        GString *string = parser->str;
349

    
350
        if (!parser->pre && parser->space) {
351
                g_string_append_c(string, ' ');
352
                parser->space = FALSE;
353
        }
354

    
355
        if (len == 0) return;
356
        if (len < 0)
357
                g_string_append(string, str);
358
        else
359
                g_string_append_len(string, str, len);
360

    
361
        parser->empty_line = FALSE;
362
        if (string->len > 0 && string->str[string->len - 1] == '\n') {
363
                parser->newline = TRUE;
364
                if (string->len > 1 && string->str[string->len - 2] == '\n')
365
                        parser->empty_line = TRUE;
366
        } else
367
                parser->newline = FALSE;
368
}
369

    
370
static gchar *html_find_char(HTMLParser *parser, gchar ch)
371
{
372
        gchar *p;
373

    
374
        while ((p = strchr(parser->bufp, ch)) == NULL) {
375
                if (html_read_line(parser) == HTML_EOF)
376
                        return NULL;
377
        }
378

    
379
        return p;
380
}
381

    
382
static gchar *html_find_str(HTMLParser *parser, const gchar *str)
383
{
384
        gchar *p;
385

    
386
        while ((p = strstr(parser->bufp, str)) == NULL) {
387
                if (html_read_line(parser) == HTML_EOF)
388
                        return NULL;
389
        }
390

    
391
        return p;
392
}
393

    
394
static gchar *html_find_str_case(HTMLParser *parser, const gchar *str)
395
{
396
        gchar *p;
397

    
398
        while ((p = strcasestr(parser->bufp, str)) == NULL) {
399
                if (html_read_line(parser) == HTML_EOF)
400
                        return NULL;
401
        }
402

    
403
        return p;
404
}
405

    
406
static HTMLTag *html_get_tag(const gchar *str)
407
{
408
        HTMLTag *tag;
409
        gchar *tmp;
410
        gchar *tmpp;
411

    
412
        g_return_val_if_fail(str != NULL, NULL);
413

    
414
        if (*str == '\0' || *str == '!') return NULL;
415

    
416
        tmp = g_strdup(str);
417

    
418
        tag = g_new0(HTMLTag, 1);
419

    
420
        for (tmpp = tmp; *tmpp != '\0' && !g_ascii_isspace(*tmpp); tmpp++) {
421
                if (tmpp > tmp && *tmpp == '/') {
422
                        *tmpp = '\0';
423
                        break;
424
                }
425
        }
426

    
427
        if (*tmpp == '\0') {
428
                g_strdown(tmp);
429
                tag->name = tmp;
430
                return tag;
431
        } else {
432
                *tmpp++ = '\0';
433
                g_strdown(tmp);
434
                tag->name = g_strdup(tmp);
435
        }
436

    
437
        while (*tmpp != '\0') {
438
                HTMLAttr *attr;
439
                gchar *attr_name;
440
                gchar *attr_value;
441
                gchar *p;
442
                gchar quote;
443

    
444
                while (g_ascii_isspace(*tmpp)) tmpp++;
445
                if (tmpp > tmp && *tmpp == '/')
446
                        break;
447
                attr_name = tmpp;
448

    
449
                while (*tmpp != '\0' && !g_ascii_isspace(*tmpp) &&
450
                       *tmpp != '=')
451
                        tmpp++;
452
                if (*tmpp != '\0' && *tmpp != '=') {
453
                        *tmpp++ = '\0';
454
                        while (g_ascii_isspace(*tmpp)) tmpp++;
455
                }
456

    
457
                if (*tmpp == '=') {
458
                        *tmpp++ = '\0';
459
                        while (g_ascii_isspace(*tmpp)) tmpp++;
460

    
461
                        if (*tmpp == '"' || *tmpp == '\'') {
462
                                /* name="value" */
463
                                quote = *tmpp;
464
                                tmpp++;
465
                                attr_value = tmpp;
466
                                if ((p = strchr(attr_value, quote)) == NULL) {
467
                                        g_warning("html_get_tag(): syntax error in tag: '%s'\n", str);
468
                                        break;
469
                                }
470
                                tmpp = p;
471
                                *tmpp++ = '\0';
472
                                while (g_ascii_isspace(*tmpp)) tmpp++;
473
                        } else {
474
                                /* name=value */
475
                                attr_value = tmpp;
476
                                while (*tmpp != '\0' && !g_ascii_isspace(*tmpp)) tmpp++;
477
                                if (*tmpp != '\0')
478
                                        *tmpp++ = '\0';
479
                        }
480
                } else
481
                        attr_value = "";
482

    
483
                g_strchomp(attr_name);
484
                g_strdown(attr_name);
485
                attr = g_new(HTMLAttr, 1);
486
                attr->name = g_strdup(attr_name);
487
                attr->value = g_strdup(attr_value);
488
                tag->attr = g_list_append(tag->attr, attr);
489
        }
490

    
491
        g_free(tmp);
492

    
493
        return tag;
494
}
495

    
496
static void html_free_tag(HTMLTag *tag)
497
{
498
        if (!tag) return;
499

    
500
        g_free(tag->name);
501
        while (tag->attr != NULL) {
502
                HTMLAttr *attr = (HTMLAttr *)tag->attr->data;
503
                g_free(attr->name);
504
                g_free(attr->value);
505
                g_free(attr);
506
                tag->attr = g_list_remove(tag->attr, tag->attr->data);
507
        }
508
        g_free(tag);
509
}
510

    
511
static HTMLState html_parse_tag(HTMLParser *parser)
512
{
513
        gchar buf[HTMLBUFSIZE];
514
        HTMLTag *tag;
515

    
516
        html_get_parenthesis(parser, buf, sizeof(buf));
517

    
518
        tag = html_get_tag(buf);
519

    
520
        parser->state = HTML_UNKNOWN;
521
        if (!tag) return HTML_UNKNOWN;
522

    
523
        if (!strcmp(tag->name, "br")) {
524
                parser->space = FALSE;
525
                html_append_char(parser, '\n');
526
                parser->state = HTML_BR;
527
        } else if (!strcmp(tag->name, "a")) {
528
                GList *cur;
529

    
530
                for (cur = tag->attr; cur != NULL; cur = cur->next) {
531
                        HTMLAttr *attr = (HTMLAttr *)cur->data;
532

    
533
                        if (attr && !strcmp(attr->name, "href")) {
534
                                g_free(parser->href);
535
                                parser->href = g_strdup(attr->value);
536
                                parser->state = HTML_HREF;
537
                                break;
538
                        }
539
                }
540
        } else if (!strcmp(tag->name, "/a")) {
541
                g_free(parser->href);
542
                parser->href = NULL;
543
                parser->state = HTML_NORMAL;
544
        } else if (!strcmp(tag->name, "p")) {
545
                parser->space = FALSE;
546
                if (!parser->empty_line) {
547
                        parser->space = FALSE;
548
                        if (!parser->newline) html_append_char(parser, '\n');
549
                        html_append_char(parser, '\n');
550
                }
551
                parser->state = HTML_PAR;
552
        } else if (!strcmp(tag->name, "pre")) {
553
                parser->pre = TRUE;
554
                parser->state = HTML_PRE;
555
        } else if (!strcmp(tag->name, "/pre")) {
556
                parser->pre = FALSE;
557
                parser->state = HTML_NORMAL;
558
        } else if (!strcmp(tag->name, "hr")) {
559
                if (!parser->newline) {
560
                        parser->space = FALSE;
561
                        html_append_char(parser, '\n');
562
                }
563
                html_append_str(parser, HR_STR "\n", -1);
564
                parser->state = HTML_HR;
565
        } else if (!strcmp(tag->name, "div")    ||
566
                   !strcmp(tag->name, "ul")     ||
567
                   !strcmp(tag->name, "li")     ||
568
                   !strcmp(tag->name, "table")  ||
569
                   !strcmp(tag->name, "tr")     ||
570
                   (tag->name[0] == 'h' && g_ascii_isdigit(tag->name[1]))) {
571
                if (!parser->newline) {
572
                        parser->space = FALSE;
573
                        html_append_char(parser, '\n');
574
                }
575
                parser->state = HTML_NORMAL;
576
        } else if (!strcmp(tag->name, "/table") ||
577
                   (tag->name[0] == '/' &&
578
                    tag->name[1] == 'h' &&
579
                    g_ascii_isdigit(tag->name[1]))) {
580
                if (!parser->empty_line) {
581
                        parser->space = FALSE;
582
                        if (!parser->newline) html_append_char(parser, '\n');
583
                        html_append_char(parser, '\n');
584
                }
585
                parser->state = HTML_NORMAL;
586
        } else if (!strcmp(tag->name, "/div")   ||
587
                   !strcmp(tag->name, "/ul")    ||
588
                   !strcmp(tag->name, "/li")) {
589
                if (!parser->newline) {
590
                        parser->space = FALSE;
591
                        html_append_char(parser, '\n');
592
                }
593
                parser->state = HTML_NORMAL;
594
        }
595

    
596
        html_free_tag(tag);
597

    
598
        return parser->state;
599
}
600

    
601
static void html_parse_special(HTMLParser *parser)
602
{
603
        gchar symbol_name[9];
604
        gint n;
605
        const gchar *val;
606

    
607
        parser->state = HTML_UNKNOWN;
608
        g_return_if_fail(*parser->bufp == '&');
609

    
610
        /* &foo; */
611
        for (n = 0; parser->bufp[n] != '\0' && parser->bufp[n] != ';'; n++)
612
                ;
613
        if (n > 7 || parser->bufp[n] != ';') {
614
                /* output literal `&' */
615
                html_append_char(parser, *parser->bufp++);
616
                parser->state = HTML_NORMAL;
617
                return;
618
        }
619
        strncpy2(symbol_name, parser->bufp, n + 2);
620
        parser->bufp += n + 1;
621

    
622
        if ((val = g_hash_table_lookup(parser->symbol_table, symbol_name))
623
            != NULL) {
624
                html_append_str(parser, val, -1);
625
                parser->state = HTML_NORMAL;
626
                return;
627
        } else if (symbol_name[1] == '#' && g_ascii_isdigit(symbol_name[2])) {
628
                gint ch;
629

    
630
                ch = atoi(symbol_name + 2);
631
                if (ch < 128 && g_ascii_isprint(ch)) {
632
                        html_append_char(parser, ch);
633
                        parser->state = HTML_NORMAL;
634
                        return;
635
                } else {
636
                        /* ISO 10646 to UTF-8 */
637
                        gchar buf[6];
638
                        gint len;
639

    
640
                        len = g_unichar_to_utf8((gunichar)ch, buf);
641
                        if (len > 0) {
642
                                html_append_str(parser, buf, len);
643
                                parser->state = HTML_NORMAL;
644
                                return;
645
                        }
646
                }
647
        }
648

    
649
        html_append_str(parser, symbol_name, -1);
650
}
651

    
652
static void html_get_parenthesis(HTMLParser *parser, gchar *buf, gint len)
653
{
654
        gchar *p;
655

    
656
        buf[0] = '\0';
657
        g_return_if_fail(*parser->bufp == '<');
658

    
659
        /* ignore comment / CSS / script stuff */
660
        if (!strncmp(parser->bufp, "<!--", 4)) {
661
                parser->bufp += 4;
662
                if ((p = html_find_str(parser, "-->")) != NULL)
663
                        parser->bufp = p + 3;
664
                return;
665
        }
666
        if (!g_ascii_strncasecmp(parser->bufp, "<style", 6)) {
667
                parser->bufp += 6;
668
                if ((p = html_find_str_case(parser, "</style")) != NULL) {
669
                        parser->bufp = p + 7;
670
                        if ((p = html_find_char(parser, '>')) != NULL)
671
                                parser->bufp = p + 1;
672
                }
673
                return;
674
        }
675
        if (!g_ascii_strncasecmp(parser->bufp, "<script", 7)) {
676
                parser->bufp += 7;
677
                if ((p = html_find_str_case(parser, "</script")) != NULL) {
678
                        parser->bufp = p + 8;
679
                        if ((p = html_find_char(parser, '>')) != NULL)
680
                                parser->bufp = p + 1;
681
                }
682
                return;
683
        }
684

    
685
        parser->bufp++;
686
        if ((p = html_find_char(parser, '>')) == NULL)
687
                return;
688

    
689
        strncpy2(buf, parser->bufp, MIN(p - parser->bufp + 1, len));
690
        g_strstrip(buf);
691
        parser->bufp = p + 1;
692
}