Statistics
| Branch: | Tag: | Revision:

root / libsylph / html.c @ aebfd4cc

History | View | Annotate | Download (15.1 KB)

1
/*
2
 * LibSylph -- E-Mail client library
3
 * Copyright (C) 1999-2011 Hiroyuki Yamamoto
4
 */
5

    
6
#include <glib.h>
7
#include <stdio.h>
8
#include <string.h>
9
#include <ctype.h>
10

    
11
#include "html.h"
12
#include "codeconv.h"
13
#include "utils.h"
14

    
15
#define HTMLBUFSIZE        8192
16
#define HR_STR                "------------------------------------------------"
17

    
18
typedef struct _HTMLSymbol        HTMLSymbol;
19

    
20
struct _HTMLSymbol
21
{
22
        gchar *const key;
23
        gchar *const val;
24
};
25

    
26
static HTMLSymbol symbol_list[] = {
27
        {"&lt;"    , "<"},
28
        {"&gt;"    , ">"},
29
        {"&amp;"   , "&"},
30
        {"&quot;"  , "\""}
31
};
32

    
33
/* &#160; - &#255; */
34
static HTMLSymbol latin_symbol_list[] = {
35
        {"&nbsp;"  , " "},
36
        /* {"&nbsp;"  , "\302\240"}, */
37
        {"&iexcl;" , "\302\241"},
38
        {"&cent;"  , "\302\242"},
39
        {"&pound;" , "\302\243"},
40
        {"&curren;", "\302\244"},
41
        {"&yen;"   , "\302\245"},
42
        {"&brvbar;", "\302\246"},
43
        {"&sect;"  , "\302\247"},
44
        {"&uml;"   , "\302\250"},
45
        {"&copy;"  , "\302\251"},
46
        {"&ordf;"  , "\302\252"},
47
        {"&laquo;" , "\302\253"},
48
        {"&not;"   , "\302\254"},
49
        {"&shy;"   , "\302\255"},
50
        {"&reg;"   , "\302\256"},
51
        {"&macr;"  , "\302\257"},
52
        {"&deg;"   , "\302\260"},
53
        {"&plusm;" , "\302\261"},
54
        {"&sup2;"  , "\302\262"},
55
        {"&sup3;"  , "\302\263"},
56
        {"&acute;" , "\302\264"},
57
        {"&micro;" , "\302\265"},
58
        {"&para;"  , "\302\266"},
59
        {"&middot;", "\302\267"},
60
        {"&cedil;" , "\302\270"},
61
        {"&sup1;"  , "\302\271"},
62
        {"&ordm;"  , "\302\272"},
63
        {"&raquo;" , "\302\273"},
64
        {"&frac14;", "\302\274"},
65
        {"&frac12;", "\302\275"},
66
        {"&frac34;", "\302\276"},
67
        {"&iquest;", "\302\277"},
68

    
69
        {"&Agrave;", "\303\200"},
70
        {"&Aacute;", "\303\201"},
71
        {"&Acirc;" , "\303\202"},
72
        {"&Atilde;", "\303\203"},
73
        {"&Auml;"  , "\303\204"},
74
        {"&Aring;" , "\303\205"},
75
        {"&AElig;" , "\303\206"},
76
        {"&Ccedil;", "\303\207"},
77
        {"&Egrave;", "\303\210"},
78
        {"&Eacute;", "\303\211"},
79
        {"&Ecirc;" , "\303\212"},
80
        {"&Euml;"  , "\303\213"},
81
        {"&Igrave;", "\303\214"},
82
        {"&Iacute;", "\303\215"},
83
        {"&Icirc;" , "\303\216"},
84
        {"&Iuml;"  , "\303\217"},
85
        {"&ETH;"   , "\303\220"},
86
        {"&Ntilde;", "\303\221"},
87
        {"&Ograve;", "\303\222"},
88
        {"&Oacute;", "\303\223"},
89
        {"&Ocirc;" , "\303\224"},
90
        {"&Otilde;", "\303\225"},
91
        {"&Ouml;"  , "\303\226"},
92
        {"&times;" , "\303\227"},
93
        {"&Oslash;", "\303\230"},
94
        {"&Ugrave;", "\303\231"},
95
        {"&Uacute;", "\303\232"},
96
        {"&Ucirc;" , "\303\233"},
97
        {"&Uuml;"  , "\303\234"},
98
        {"&Yacute;", "\303\235"},
99
        {"&THORN;" , "\303\236"},
100
        {"&szlig;" , "\303\237"},
101
        {"&agrave;", "\303\240"},
102
        {"&aacute;", "\303\241"},
103
        {"&acirc;" , "\303\242"},
104
        {"&atilde;", "\303\243"},
105
        {"&auml;"  , "\303\244"},
106
        {"&aring;" , "\303\245"},
107
        {"&aelig;" , "\303\246"},
108
        {"&ccedil;", "\303\247"},
109
        {"&egrave;", "\303\250"},
110
        {"&eacute;", "\303\251"},
111
        {"&ecirc;" , "\303\252"},
112
        {"&euml;"  , "\303\253"},
113
        {"&igrave;", "\303\254"},
114
        {"&iacute;", "\303\255"},
115
        {"&icirc;" , "\303\256"},
116
        {"&iuml;"  , "\303\257"},
117
        {"&eth;"   , "\303\260"},
118
        {"&ntilde;", "\303\261"},
119
        {"&ograve;", "\303\262"},
120
        {"&oacute;", "\303\263"},
121
        {"&ocirc;" , "\303\264"},
122
        {"&otilde;", "\303\265"},
123
        {"&ouml;"  , "\303\266"},
124
        {"&divide;", "\303\267"},
125
        {"&oslash;", "\303\270"},
126
        {"&ugrave;", "\303\271"},
127
        {"&uacute;", "\303\272"},
128
        {"&ucirc;" , "\303\273"},
129
        {"&uuml;"  , "\303\274"},
130
        {"&yacute;", "\303\275"},
131
        {"&thorn;" , "\303\276"},
132
        {"&yuml;"  , "\303\277"}
133
};
134

    
135
static HTMLSymbol other_symbol_list[] = {
136
        /* Non-standard? */
137
        {"&#133;"  , "..."},
138
        {"&#146;"  , "'"},
139
        {"&#150;"  , "-"},
140
        {"&#153;"  , "\xe2\x84\xa2"},
141
        {"&#156;"  , "\xc5\x93"},
142

    
143
        /* Symbolic characters */
144
        {"&trade;" , "\xe2\x84\xa2"},
145

    
146
        /* Latin extended */
147
        {"&OElig;" , "\xc5\x92"},
148
        {"&oelig;" , "\xc5\x93"},
149
        {"&Scaron;", "\xc5\xa0"},
150
        {"&scaron;", "\xc5\xa1"},
151
        {"&Yuml;"  , "\xc5\xb8"},
152
        {"&circ;"  , "\xcb\x86"},
153
        {"&tilde;" , "\xcb\x9c"},
154
        {"&fnof;"  , "\xc6\x92"},
155
};
156

    
157
static GHashTable *default_symbol_table;
158

    
159
static HTMLState html_read_line                (HTMLParser        *parser);
160

    
161
static void html_append_char                (HTMLParser        *parser,
162
                                         gchar                 ch);
163
static void html_append_str                (HTMLParser        *parser,
164
                                         const gchar        *str,
165
                                         gint                 len);
166

    
167
static gchar *html_find_char                (HTMLParser        *parser,
168
                                         gchar                 ch);
169
static gchar *html_find_str                (HTMLParser        *parser,
170
                                         const gchar        *str);
171
static gchar *html_find_str_case        (HTMLParser        *parser,
172
                                         const gchar        *str);
173

    
174
static HTMLState html_parse_tag                (HTMLParser        *parser);
175
static void html_parse_special                (HTMLParser        *parser);
176
static void html_get_parenthesis        (HTMLParser        *parser,
177
                                         gchar                *buf,
178
                                         gint                 len);
179

    
180

    
181
HTMLParser *html_parser_new(FILE *fp, CodeConverter *conv)
182
{
183
        HTMLParser *parser;
184

    
185
        g_return_val_if_fail(fp != NULL, NULL);
186
        g_return_val_if_fail(conv != NULL, NULL);
187

    
188
        parser = g_new0(HTMLParser, 1);
189
        parser->fp = fp;
190
        parser->conv = conv;
191
        parser->str = g_string_new(NULL);
192
        parser->buf = g_string_new(NULL);
193
        parser->bufp = parser->buf->str;
194
        parser->state = HTML_NORMAL;
195
        parser->href = NULL;
196
        parser->newline = TRUE;
197
        parser->empty_line = TRUE;
198
        parser->space = FALSE;
199
        parser->pre = FALSE;
200

    
201
#define SYMBOL_TABLE_ADD(table, list) \
202
{ \
203
        gint i; \
204
 \
205
        for (i = 0; i < sizeof(list) / sizeof(list[0]); i++) \
206
                g_hash_table_insert(table, list[i].key, list[i].val); \
207
}
208

    
209
        if (!default_symbol_table) {
210
                default_symbol_table =
211
                        g_hash_table_new(g_str_hash, g_str_equal);
212
                SYMBOL_TABLE_ADD(default_symbol_table, symbol_list);
213
                SYMBOL_TABLE_ADD(default_symbol_table, latin_symbol_list);
214
                SYMBOL_TABLE_ADD(default_symbol_table, other_symbol_list);
215
        }
216

    
217
#undef SYMBOL_TABLE_ADD
218

    
219
        parser->symbol_table = default_symbol_table;
220

    
221
        return parser;
222
}
223

    
224
void html_parser_destroy(HTMLParser *parser)
225
{
226
        g_string_free(parser->str, TRUE);
227
        g_string_free(parser->buf, TRUE);
228
        g_free(parser->href);
229
        g_free(parser);
230
}
231

    
232
const gchar *html_parse(HTMLParser *parser)
233
{
234
        parser->state = HTML_NORMAL;
235
        g_string_truncate(parser->str, 0);
236

    
237
        if (*parser->bufp == '\0') {
238
                g_string_truncate(parser->buf, 0);
239
                parser->bufp = parser->buf->str;
240
                if (html_read_line(parser) == HTML_EOF)
241
                        return NULL;
242
        }
243

    
244
        while (*parser->bufp != '\0') {
245
                switch (*parser->bufp) {
246
                case '<':
247
                        if (parser->str->len == 0)
248
                                html_parse_tag(parser);
249
                        else
250
                                return parser->str->str;
251
                        break;
252
                case '&':
253
                        html_parse_special(parser);
254
                        break;
255
                case ' ':
256
                case '\t':
257
                case '\r':
258
                case '\n':
259
                        if (parser->bufp[0] == '\r' && parser->bufp[1] == '\n')
260
                                parser->bufp++;
261

    
262
                        if (!parser->pre) {
263
                                if (!parser->newline)
264
                                        parser->space = TRUE;
265

    
266
                                parser->bufp++;
267
                                break;
268
                        }
269
                        /* fallthrough */
270
                default:
271
                        html_append_char(parser, *parser->bufp++);
272
                }
273
        }
274

    
275
        return parser->str->str;
276
}
277

    
278
static HTMLState html_read_line(HTMLParser *parser)
279
{
280
        gchar buf[HTMLBUFSIZE];
281
        gchar *conv_str;
282
        gint index;
283

    
284
        if (fgets(buf, sizeof(buf), parser->fp) == NULL) {
285
                parser->state = HTML_EOF;
286
                return HTML_EOF;
287
        }
288

    
289
        conv_str = conv_convert(parser->conv, buf);
290
        if (!conv_str) {
291
                index = parser->bufp - parser->buf->str;
292

    
293
                conv_str = conv_utf8todisp(buf, NULL);
294
                g_string_append(parser->buf, conv_str);
295
                g_free(conv_str);
296

    
297
                parser->bufp = parser->buf->str + index;
298

    
299
                return HTML_CONV_FAILED;
300
        }
301

    
302
        index = parser->bufp - parser->buf->str;
303

    
304
        g_string_append(parser->buf, conv_str);
305
        g_free(conv_str);
306

    
307
        parser->bufp = parser->buf->str + index;
308

    
309
        return HTML_NORMAL;
310
}
311

    
312
static void html_append_char(HTMLParser *parser, gchar ch)
313
{
314
        GString *str = parser->str;
315

    
316
        if (!parser->pre && parser->space) {
317
                g_string_append_c(str, ' ');
318
                parser->space = FALSE;
319
        }
320

    
321
        g_string_append_c(str, ch);
322

    
323
        parser->empty_line = FALSE;
324
        if (ch == '\n') {
325
                parser->newline = TRUE;
326
                if (str->len > 1 && str->str[str->len - 2] == '\n')
327
                        parser->empty_line = TRUE;
328
        } else
329
                parser->newline = FALSE;
330
}
331

    
332
static void html_append_str(HTMLParser *parser, const gchar *str, gint len)
333
{
334
        GString *string = parser->str;
335

    
336
        if (!parser->pre && parser->space) {
337
                g_string_append_c(string, ' ');
338
                parser->space = FALSE;
339
        }
340

    
341
        if (len == 0) return;
342
        if (len < 0)
343
                g_string_append(string, str);
344
        else
345
                g_string_append_len(string, str, len);
346

    
347
        parser->empty_line = FALSE;
348
        if (string->len > 0 && string->str[string->len - 1] == '\n') {
349
                parser->newline = TRUE;
350
                if (string->len > 1 && string->str[string->len - 2] == '\n')
351
                        parser->empty_line = TRUE;
352
        } else
353
                parser->newline = FALSE;
354
}
355

    
356
static gchar *html_find_char(HTMLParser *parser, gchar ch)
357
{
358
        gchar *p;
359

    
360
        while ((p = strchr(parser->bufp, ch)) == NULL) {
361
                if (html_read_line(parser) == HTML_EOF)
362
                        return NULL;
363
        }
364

    
365
        return p;
366
}
367

    
368
static gchar *html_find_str(HTMLParser *parser, const gchar *str)
369
{
370
        gchar *p;
371

    
372
        while ((p = strstr(parser->bufp, str)) == NULL) {
373
                if (html_read_line(parser) == HTML_EOF)
374
                        return NULL;
375
        }
376

    
377
        return p;
378
}
379

    
380
static gchar *html_find_str_case(HTMLParser *parser, const gchar *str)
381
{
382
        gchar *p;
383

    
384
        while ((p = strcasestr(parser->bufp, str)) == NULL) {
385
                if (html_read_line(parser) == HTML_EOF)
386
                        return NULL;
387
        }
388

    
389
        return p;
390
}
391

    
392
static HTMLTag *html_get_tag(const gchar *str)
393
{
394
        HTMLTag *tag;
395
        gchar *tmp;
396
        gchar *tmpp;
397

    
398
        g_return_val_if_fail(str != NULL, NULL);
399

    
400
        if (*str == '\0' || *str == '!') return NULL;
401

    
402
        tmp = g_strdup(str);
403

    
404
        tag = g_new0(HTMLTag, 1);
405

    
406
        for (tmpp = tmp; *tmpp != '\0' && !g_ascii_isspace(*tmpp); tmpp++) {
407
                if (tmpp > tmp && *tmpp == '/') {
408
                        *tmpp = '\0';
409
                        break;
410
                }
411
        }
412

    
413
        if (*tmpp == '\0') {
414
                g_strdown(tmp);
415
                tag->name = tmp;
416
                return tag;
417
        } else {
418
                *tmpp++ = '\0';
419
                g_strdown(tmp);
420
                tag->name = g_strdup(tmp);
421
        }
422

    
423
        while (*tmpp != '\0') {
424
                HTMLAttr *attr;
425
                gchar *attr_name;
426
                gchar *attr_value;
427
                gchar *p;
428
                gchar quote;
429

    
430
                while (g_ascii_isspace(*tmpp)) tmpp++;
431
                if (tmpp > tmp && *tmpp == '/')
432
                        break;
433
                attr_name = tmpp;
434

    
435
                while (*tmpp != '\0' && !g_ascii_isspace(*tmpp) &&
436
                       *tmpp != '=')
437
                        tmpp++;
438
                if (*tmpp != '\0' && *tmpp != '=') {
439
                        *tmpp++ = '\0';
440
                        while (g_ascii_isspace(*tmpp)) tmpp++;
441
                }
442

    
443
                if (*tmpp == '=') {
444
                        *tmpp++ = '\0';
445
                        while (g_ascii_isspace(*tmpp)) tmpp++;
446

    
447
                        if (*tmpp == '"' || *tmpp == '\'') {
448
                                /* name="value" */
449
                                quote = *tmpp;
450
                                tmpp++;
451
                                attr_value = tmpp;
452
                                if ((p = strchr(attr_value, quote)) == NULL) {
453
                                        g_warning("html_get_tag(): syntax error in tag: '%s'\n", str);
454
                                        break;
455
                                }
456
                                tmpp = p;
457
                                *tmpp++ = '\0';
458
                                while (g_ascii_isspace(*tmpp)) tmpp++;
459
                        } else {
460
                                /* name=value */
461
                                attr_value = tmpp;
462
                                while (*tmpp != '\0' && !g_ascii_isspace(*tmpp)) tmpp++;
463
                                if (*tmpp != '\0')
464
                                        *tmpp++ = '\0';
465
                        }
466
                } else
467
                        attr_value = "";
468

    
469
                g_strchomp(attr_name);
470
                g_strdown(attr_name);
471
                attr = g_new(HTMLAttr, 1);
472
                attr->name = g_strdup(attr_name);
473
                attr->value = g_strdup(attr_value);
474
                tag->attr = g_list_append(tag->attr, attr);
475
        }
476

    
477
        g_free(tmp);
478

    
479
        return tag;
480
}
481

    
482
static void html_free_tag(HTMLTag *tag)
483
{
484
        if (!tag) return;
485

    
486
        g_free(tag->name);
487
        while (tag->attr != NULL) {
488
                HTMLAttr *attr = (HTMLAttr *)tag->attr->data;
489
                g_free(attr->name);
490
                g_free(attr->value);
491
                g_free(attr);
492
                tag->attr = g_list_remove(tag->attr, tag->attr->data);
493
        }
494
        g_free(tag);
495
}
496

    
497
static HTMLState html_parse_tag(HTMLParser *parser)
498
{
499
        gchar buf[HTMLBUFSIZE];
500
        HTMLTag *tag;
501

    
502
        html_get_parenthesis(parser, buf, sizeof(buf));
503

    
504
        tag = html_get_tag(buf);
505

    
506
        parser->state = HTML_UNKNOWN;
507
        if (!tag) return HTML_UNKNOWN;
508

    
509
        if (!strcmp(tag->name, "br")) {
510
                parser->space = FALSE;
511
                html_append_char(parser, '\n');
512
                parser->state = HTML_BR;
513
        } else if (!strcmp(tag->name, "a")) {
514
                GList *cur;
515

    
516
                for (cur = tag->attr; cur != NULL; cur = cur->next) {
517
                        HTMLAttr *attr = (HTMLAttr *)cur->data;
518

    
519
                        if (attr && !strcmp(attr->name, "href")) {
520
                                g_free(parser->href);
521
                                parser->href = g_strdup(attr->value);
522
                                parser->state = HTML_HREF;
523
                                break;
524
                        }
525
                }
526
        } else if (!strcmp(tag->name, "/a")) {
527
                g_free(parser->href);
528
                parser->href = NULL;
529
                parser->state = HTML_NORMAL;
530
        } else if (!strcmp(tag->name, "p")) {
531
                parser->space = FALSE;
532
                if (!parser->empty_line) {
533
                        parser->space = FALSE;
534
                        if (!parser->newline) html_append_char(parser, '\n');
535
                        html_append_char(parser, '\n');
536
                }
537
                parser->state = HTML_PAR;
538
        } else if (!strcmp(tag->name, "pre")) {
539
                parser->pre = TRUE;
540
                parser->state = HTML_PRE;
541
        } else if (!strcmp(tag->name, "/pre")) {
542
                parser->pre = FALSE;
543
                parser->state = HTML_NORMAL;
544
        } else if (!strcmp(tag->name, "hr")) {
545
                if (!parser->newline) {
546
                        parser->space = FALSE;
547
                        html_append_char(parser, '\n');
548
                }
549
                html_append_str(parser, HR_STR "\n", -1);
550
                parser->state = HTML_HR;
551
        } else if (!strcmp(tag->name, "div")    ||
552
                   !strcmp(tag->name, "ul")     ||
553
                   !strcmp(tag->name, "li")     ||
554
                   !strcmp(tag->name, "table")  ||
555
                   !strcmp(tag->name, "tr")     ||
556
                   (tag->name[0] == 'h' && g_ascii_isdigit(tag->name[1]))) {
557
                if (!parser->newline) {
558
                        parser->space = FALSE;
559
                        html_append_char(parser, '\n');
560
                }
561
                parser->state = HTML_NORMAL;
562
        } else if (!strcmp(tag->name, "/table") ||
563
                   (tag->name[0] == '/' &&
564
                    tag->name[1] == 'h' &&
565
                    g_ascii_isdigit(tag->name[1]))) {
566
                if (!parser->empty_line) {
567
                        parser->space = FALSE;
568
                        if (!parser->newline) html_append_char(parser, '\n');
569
                        html_append_char(parser, '\n');
570
                }
571
                parser->state = HTML_NORMAL;
572
        } else if (!strcmp(tag->name, "/div")   ||
573
                   !strcmp(tag->name, "/ul")    ||
574
                   !strcmp(tag->name, "/li")) {
575
                if (!parser->newline) {
576
                        parser->space = FALSE;
577
                        html_append_char(parser, '\n');
578
                }
579
                parser->state = HTML_NORMAL;
580
        }
581

    
582
        html_free_tag(tag);
583

    
584
        return parser->state;
585
}
586

    
587
static void html_parse_special(HTMLParser *parser)
588
{
589
        gchar symbol_name[9];
590
        gint n;
591
        const gchar *val;
592

    
593
        parser->state = HTML_UNKNOWN;
594
        g_return_if_fail(*parser->bufp == '&');
595

    
596
        /* &foo; */
597
        for (n = 0; parser->bufp[n] != '\0' && parser->bufp[n] != ';'; n++)
598
                ;
599
        if (n > 7 || parser->bufp[n] != ';') {
600
                /* output literal `&' */
601
                html_append_char(parser, *parser->bufp++);
602
                parser->state = HTML_NORMAL;
603
                return;
604
        }
605
        strncpy2(symbol_name, parser->bufp, n + 2);
606
        parser->bufp += n + 1;
607

    
608
        if ((val = g_hash_table_lookup(parser->symbol_table, symbol_name))
609
            != NULL) {
610
                html_append_str(parser, val, -1);
611
                parser->state = HTML_NORMAL;
612
                return;
613
        } else if (symbol_name[1] == '#' && g_ascii_isdigit(symbol_name[2])) {
614
                gint ch;
615

    
616
                ch = atoi(symbol_name + 2);
617
                if (ch < 128 && g_ascii_isprint(ch)) {
618
                        html_append_char(parser, ch);
619
                        parser->state = HTML_NORMAL;
620
                        return;
621
                } else {
622
                        /* ISO 10646 to UTF-8 */
623
                        gchar buf[6];
624
                        gint len;
625

    
626
                        len = g_unichar_to_utf8((gunichar)ch, buf);
627
                        if (len > 0) {
628
                                html_append_str(parser, buf, len);
629
                                parser->state = HTML_NORMAL;
630
                                return;
631
                        }
632
                }
633
        }
634

    
635
        html_append_str(parser, symbol_name, -1);
636
}
637

    
638
static void html_get_parenthesis(HTMLParser *parser, gchar *buf, gint len)
639
{
640
        gchar *p;
641

    
642
        buf[0] = '\0';
643
        g_return_if_fail(*parser->bufp == '<');
644

    
645
        /* ignore comment / CSS / script stuff */
646
        if (!strncmp(parser->bufp, "<!--", 4)) {
647
                parser->bufp += 4;
648
                if ((p = html_find_str(parser, "-->")) != NULL)
649
                        parser->bufp = p + 3;
650
                return;
651
        }
652
        if (!g_ascii_strncasecmp(parser->bufp, "<style", 6)) {
653
                parser->bufp += 6;
654
                if ((p = html_find_str_case(parser, "</style")) != NULL) {
655
                        parser->bufp = p + 7;
656
                        if ((p = html_find_char(parser, '>')) != NULL)
657
                                parser->bufp = p + 1;
658
                }
659
                return;
660
        }
661
        if (!g_ascii_strncasecmp(parser->bufp, "<script", 7)) {
662
                parser->bufp += 7;
663
                if ((p = html_find_str_case(parser, "</script")) != NULL) {
664
                        parser->bufp = p + 8;
665
                        if ((p = html_find_char(parser, '>')) != NULL)
666
                                parser->bufp = p + 1;
667
                }
668
                return;
669
        }
670

    
671
        parser->bufp++;
672
        if ((p = html_find_char(parser, '>')) == NULL)
673
                return;
674

    
675
        strncpy2(buf, parser->bufp, MIN(p - parser->bufp + 1, len));
676
        g_strstrip(buf);
677
        parser->bufp = p + 1;
678
}