Revision 2936 libsylph/html.c
| html.c (revision 2936) | ||
|---|---|---|
| 1 | 1 |
/* |
| 2 | 2 |
* LibSylph -- E-Mail client library |
| 3 |
* Copyright (C) 1999-2010 Hiroyuki Yamamoto
|
|
| 3 |
* Copyright (C) 1999-2011 Hiroyuki Yamamoto
|
|
| 4 | 4 |
* |
| 5 | 5 |
* This library is free software; you can redistribute it and/or |
| 6 | 6 |
* modify it under the terms of the GNU Lesser General Public |
| ... | ... | |
| 171 | 171 |
static GHashTable *default_symbol_table; |
| 172 | 172 |
|
| 173 | 173 |
static HTMLState html_read_line (HTMLParser *parser); |
| 174 |
|
|
| 174 | 175 |
static void html_append_char (HTMLParser *parser, |
| 175 | 176 |
gchar ch); |
| 176 | 177 |
static void html_append_str (HTMLParser *parser, |
| 177 | 178 |
const gchar *str, |
| 178 | 179 |
gint len); |
| 180 |
|
|
| 181 |
static gchar *html_find_char (HTMLParser *parser, |
|
| 182 |
gchar ch); |
|
| 183 |
static gchar *html_find_str (HTMLParser *parser, |
|
| 184 |
const gchar *str); |
|
| 185 |
static gchar *html_find_str_case (HTMLParser *parser, |
|
| 186 |
const gchar *str); |
|
| 187 |
|
|
| 179 | 188 |
static HTMLState html_parse_tag (HTMLParser *parser); |
| 180 | 189 |
static void html_parse_special (HTMLParser *parser); |
| 181 | 190 |
static void html_get_parenthesis (HTMLParser *parser, |
| ... | ... | |
| 358 | 367 |
parser->newline = FALSE; |
| 359 | 368 |
} |
| 360 | 369 |
|
| 370 |
static gchar *html_find_char(HTMLParser *parser, gchar ch) |
|
| 371 |
{
|
|
| 372 |
gchar *p; |
|
| 373 |
|
|
| 374 |
while ((p = strchr(parser->bufp, ch)) == NULL) {
|
|
| 375 |
if (html_read_line(parser) == HTML_EOF) |
|
| 376 |
return NULL; |
|
| 377 |
} |
|
| 378 |
|
|
| 379 |
return p; |
|
| 380 |
} |
|
| 381 |
|
|
| 382 |
static gchar *html_find_str(HTMLParser *parser, const gchar *str) |
|
| 383 |
{
|
|
| 384 |
gchar *p; |
|
| 385 |
|
|
| 386 |
while ((p = strstr(parser->bufp, str)) == NULL) {
|
|
| 387 |
if (html_read_line(parser) == HTML_EOF) |
|
| 388 |
return NULL; |
|
| 389 |
} |
|
| 390 |
|
|
| 391 |
return p; |
|
| 392 |
} |
|
| 393 |
|
|
| 394 |
static gchar *html_find_str_case(HTMLParser *parser, const gchar *str) |
|
| 395 |
{
|
|
| 396 |
gchar *p; |
|
| 397 |
|
|
| 398 |
while ((p = strcasestr(parser->bufp, str)) == NULL) {
|
|
| 399 |
if (html_read_line(parser) == HTML_EOF) |
|
| 400 |
return NULL; |
|
| 401 |
} |
|
| 402 |
|
|
| 403 |
return p; |
|
| 404 |
} |
|
| 405 |
|
|
| 361 | 406 |
static HTMLTag *html_get_tag(const gchar *str) |
| 362 | 407 |
{
|
| 363 | 408 |
HTMLTag *tag; |
| ... | ... | |
| 614 | 659 |
/* ignore comment / CSS / script stuff */ |
| 615 | 660 |
if (!strncmp(parser->bufp, "<!--", 4)) {
|
| 616 | 661 |
parser->bufp += 4; |
| 617 |
while ((p = strstr(parser->bufp, "-->")) == NULL) |
|
| 618 |
if (html_read_line(parser) == HTML_EOF) return; |
|
| 619 |
parser->bufp = p + 3; |
|
| 662 |
if ((p = html_find_str(parser, "-->")) != NULL) |
|
| 663 |
parser->bufp = p + 3; |
|
| 620 | 664 |
return; |
| 621 | 665 |
} |
| 622 | 666 |
if (!g_ascii_strncasecmp(parser->bufp, "<style", 6)) {
|
| 623 | 667 |
parser->bufp += 6; |
| 624 |
while ((p = strcasestr(parser->bufp, "</style")) == NULL) |
|
| 625 |
if (html_read_line(parser) == HTML_EOF) return; |
|
| 626 |
parser->bufp = p + 7; |
|
| 627 |
while ((p = strchr(parser->bufp, '>')) == NULL) |
|
| 628 |
if (html_read_line(parser) == HTML_EOF) return; |
|
| 629 |
parser->bufp = p + 1; |
|
| 668 |
if ((p = html_find_str_case(parser, "</style")) != NULL) {
|
|
| 669 |
parser->bufp = p + 7; |
|
| 670 |
if ((p = html_find_char(parser, '>')) != NULL) |
|
| 671 |
parser->bufp = p + 1; |
|
| 672 |
} |
|
| 630 | 673 |
return; |
| 631 | 674 |
} |
| 632 | 675 |
if (!g_ascii_strncasecmp(parser->bufp, "<script", 7)) {
|
| 633 | 676 |
parser->bufp += 7; |
| 634 |
while ((p = strcasestr(parser->bufp, "</script")) == NULL) |
|
| 635 |
if (html_read_line(parser) == HTML_EOF) return; |
|
| 636 |
parser->bufp = p + 8; |
|
| 637 |
while ((p = strchr(parser->bufp, '>')) == NULL) |
|
| 638 |
if (html_read_line(parser) == HTML_EOF) return; |
|
| 639 |
parser->bufp = p + 1; |
|
| 677 |
if ((p = html_find_str_case(parser, "</script")) != NULL) {
|
|
| 678 |
parser->bufp = p + 8; |
|
| 679 |
if ((p = html_find_char(parser, '>')) != NULL) |
|
| 680 |
parser->bufp = p + 1; |
|
| 681 |
} |
|
| 640 | 682 |
return; |
| 641 | 683 |
} |
| 642 | 684 |
|
| 643 | 685 |
parser->bufp++; |
| 644 |
while ((p = strchr(parser->bufp, '>')) == NULL)
|
|
| 645 |
if (html_read_line(parser) == HTML_EOF) return;
|
|
| 686 |
if ((p = html_find_char(parser, '>')) == NULL)
|
|
| 687 |
return; |
|
| 646 | 688 |
|
| 647 | 689 |
strncpy2(buf, parser->bufp, MIN(p - parser->bufp + 1, len)); |
| 648 | 690 |
g_strstrip(buf); |
Also available in: Unified diff