| 1 |
1 |
/*
|
| 2 |
2 |
* LibSylph -- E-Mail client library
|
| 3 |
|
* Copyright (C) 1999-2010 Hiroyuki Yamamoto
|
|
3 |
* Copyright (C) 1999-2011 Hiroyuki Yamamoto
|
| 4 |
4 |
*
|
| 5 |
5 |
* This library is free software; you can redistribute it and/or
|
| 6 |
6 |
* modify it under the terms of the GNU Lesser General Public
|
| ... | ... | |
| 171 |
171 |
static GHashTable *default_symbol_table;
|
| 172 |
172 |
|
| 173 |
173 |
static HTMLState html_read_line (HTMLParser *parser);
|
|
174 |
|
| 174 |
175 |
static void html_append_char (HTMLParser *parser,
|
| 175 |
176 |
gchar ch);
|
| 176 |
177 |
static void html_append_str (HTMLParser *parser,
|
| 177 |
178 |
const gchar *str,
|
| 178 |
179 |
gint len);
|
|
180 |
|
|
181 |
static gchar *html_find_char (HTMLParser *parser,
|
|
182 |
gchar ch);
|
|
183 |
static gchar *html_find_str (HTMLParser *parser,
|
|
184 |
const gchar *str);
|
|
185 |
static gchar *html_find_str_case (HTMLParser *parser,
|
|
186 |
const gchar *str);
|
|
187 |
|
| 179 |
188 |
static HTMLState html_parse_tag (HTMLParser *parser);
|
| 180 |
189 |
static void html_parse_special (HTMLParser *parser);
|
| 181 |
190 |
static void html_get_parenthesis (HTMLParser *parser,
|
| ... | ... | |
| 358 |
367 |
parser->newline = FALSE;
|
| 359 |
368 |
}
|
| 360 |
369 |
|
|
370 |
static gchar *html_find_char(HTMLParser *parser, gchar ch)
|
|
371 |
{
|
|
372 |
gchar *p;
|
|
373 |
|
|
374 |
while ((p = strchr(parser->bufp, ch)) == NULL) {
|
|
375 |
if (html_read_line(parser) == HTML_EOF)
|
|
376 |
return NULL;
|
|
377 |
}
|
|
378 |
|
|
379 |
return p;
|
|
380 |
}
|
|
381 |
|
|
382 |
static gchar *html_find_str(HTMLParser *parser, const gchar *str)
|
|
383 |
{
|
|
384 |
gchar *p;
|
|
385 |
|
|
386 |
while ((p = strstr(parser->bufp, str)) == NULL) {
|
|
387 |
if (html_read_line(parser) == HTML_EOF)
|
|
388 |
return NULL;
|
|
389 |
}
|
|
390 |
|
|
391 |
return p;
|
|
392 |
}
|
|
393 |
|
|
394 |
static gchar *html_find_str_case(HTMLParser *parser, const gchar *str)
|
|
395 |
{
|
|
396 |
gchar *p;
|
|
397 |
|
|
398 |
while ((p = strcasestr(parser->bufp, str)) == NULL) {
|
|
399 |
if (html_read_line(parser) == HTML_EOF)
|
|
400 |
return NULL;
|
|
401 |
}
|
|
402 |
|
|
403 |
return p;
|
|
404 |
}
|
|
405 |
|
| 361 |
406 |
static HTMLTag *html_get_tag(const gchar *str)
|
| 362 |
407 |
{
|
| 363 |
408 |
HTMLTag *tag;
|
| ... | ... | |
| 614 |
659 |
/* ignore comment / CSS / script stuff */
|
| 615 |
660 |
if (!strncmp(parser->bufp, "<!--", 4)) {
|
| 616 |
661 |
parser->bufp += 4;
|
| 617 |
|
while ((p = strstr(parser->bufp, "-->")) == NULL)
|
| 618 |
|
if (html_read_line(parser) == HTML_EOF) return;
|
| 619 |
|
parser->bufp = p + 3;
|
|
662 |
if ((p = html_find_str(parser, "-->")) != NULL)
|
|
663 |
parser->bufp = p + 3;
|
| 620 |
664 |
return;
|
| 621 |
665 |
}
|
| 622 |
666 |
if (!g_ascii_strncasecmp(parser->bufp, "<style", 6)) {
|
| 623 |
667 |
parser->bufp += 6;
|
| 624 |
|
while ((p = strcasestr(parser->bufp, "</style")) == NULL)
|
| 625 |
|
if (html_read_line(parser) == HTML_EOF) return;
|
| 626 |
|
parser->bufp = p + 7;
|
| 627 |
|
while ((p = strchr(parser->bufp, '>')) == NULL)
|
| 628 |
|
if (html_read_line(parser) == HTML_EOF) return;
|
| 629 |
|
parser->bufp = p + 1;
|
|
668 |
if ((p = html_find_str_case(parser, "</style")) != NULL) {
|
|
669 |
parser->bufp = p + 7;
|
|
670 |
if ((p = html_find_char(parser, '>')) != NULL)
|
|
671 |
parser->bufp = p + 1;
|
|
672 |
}
|
| 630 |
673 |
return;
|
| 631 |
674 |
}
|
| 632 |
675 |
if (!g_ascii_strncasecmp(parser->bufp, "<script", 7)) {
|
| 633 |
676 |
parser->bufp += 7;
|
| 634 |
|
while ((p = strcasestr(parser->bufp, "</script")) == NULL)
|
| 635 |
|
if (html_read_line(parser) == HTML_EOF) return;
|
| 636 |
|
parser->bufp = p + 8;
|
| 637 |
|
while ((p = strchr(parser->bufp, '>')) == NULL)
|
| 638 |
|
if (html_read_line(parser) == HTML_EOF) return;
|
| 639 |
|
parser->bufp = p + 1;
|
|
677 |
if ((p = html_find_str_case(parser, "</script")) != NULL) {
|
|
678 |
parser->bufp = p + 8;
|
|
679 |
if ((p = html_find_char(parser, '>')) != NULL)
|
|
680 |
parser->bufp = p + 1;
|
|
681 |
}
|
| 640 |
682 |
return;
|
| 641 |
683 |
}
|
| 642 |
684 |
|
| 643 |
685 |
parser->bufp++;
|
| 644 |
|
while ((p = strchr(parser->bufp, '>')) == NULL)
|
| 645 |
|
if (html_read_line(parser) == HTML_EOF) return;
|
|
686 |
if ((p = html_find_char(parser, '>')) == NULL)
|
|
687 |
return;
|
| 646 |
688 |
|
| 647 |
689 |
strncpy2(buf, parser->bufp, MIN(p - parser->bufp + 1, len));
|
| 648 |
690 |
g_strstrip(buf);
|