Statistics
| Revision:

root / src / xml.c @ 322

History | View | Annotate | Download (12.7 kB)

1
/*
2
 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3
 * Copyright (C) 1999-2005 Hiroyuki Yamamoto
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
 */
19
20
#include <glib.h>
21
#include <stdio.h>
22
#include <string.h>
23
#include <ctype.h>
24
25
#include "xml.h"
26
#include "main.h"
27
#include "utils.h"
28
#include "codeconv.h"
29
30
#define SPARSE_MEMORY
31
/* if this is defined all attr.names and tag.names are stored
32
 * in a hash table */
33
#if defined(SPARSE_MEMORY)
34
#include "stringtable.h" 
35
36
static StringTable *xml_string_table;
37
38
static void xml_string_table_create(void)
39
{
40
        if (xml_string_table == NULL)
41
                xml_string_table = string_table_new();
42
}
43
#define XML_STRING_ADD(str) \
44
        string_table_insert_string(xml_string_table, (str))
45
#define XML_STRING_FREE(str) \
46
        string_table_free_string(xml_string_table, (str))
47
48
#define XML_STRING_TABLE_CREATE() \
49
        xml_string_table_create()
50
51
#else /* !SPARSE_MEMORY */
52
53
#define XML_STRING_ADD(str) \
54
        g_strdup(str)
55
#define XML_STRING_FREE(str) \
56
        g_free(str)
57
58
#define XML_STRING_TABLE_CREATE()
59
60
#endif /* SPARSE_MEMORY */
61
62
static void xml_free_tag        (XMLTag                *tag);
63
static gint xml_get_parenthesis        (XMLFile        *file,
64
                                 gchar                *buf,
65
                                 gint                 len);
66
67
XMLFile *xml_open_file(const gchar *path)
68
{
69
        XMLFile *newfile;
70
71
        g_return_val_if_fail(path != NULL, NULL);
72
73
        XML_STRING_TABLE_CREATE();
74
75
        newfile = g_new(XMLFile, 1);
76
77
        newfile->fp = fopen(path, "rb");
78
        if (!newfile->fp) {
79
                g_free(newfile);
80
                return NULL;
81
        }
82
83
        newfile->buf = g_string_new(NULL);
84
        newfile->bufp = newfile->buf->str;
85
86
        newfile->dtd = NULL;
87
        newfile->encoding = NULL;
88
        newfile->tag_stack = NULL;
89
        newfile->level = 0;
90
        newfile->is_empty_element = FALSE;
91
92
        return newfile;
93
}
94
95
void xml_close_file(XMLFile *file)
96
{
97
        g_return_if_fail(file != NULL);
98
99
        if (file->fp) fclose(file->fp);
100
101
        g_string_free(file->buf, TRUE);
102
103
        g_free(file->dtd);
104
        g_free(file->encoding);
105
106
        while (file->tag_stack != NULL)
107
                xml_pop_tag(file);
108
109
        g_free(file);
110
}
111
112
static GNode *xml_build_tree(XMLFile *file, GNode *parent, guint level)
113
{
114
        GNode *node = NULL;
115
        XMLNode *xmlnode;
116
        XMLTag *tag;
117
118
        while (xml_parse_next_tag(file) == 0) {
119
                if (file->level < level) break;
120
                if (file->level == level) {
121
                        g_warning("xml_build_tree(): Parse error\n");
122
                        break;
123
                }
124
125
                tag = xml_get_current_tag(file);
126
                if (!tag) break;
127
                xmlnode = xml_node_new(xml_copy_tag(tag), NULL);
128
                xmlnode->element = xml_get_element(file);
129
                if (!parent)
130
                        node = g_node_new(xmlnode);
131
                else
132
                        node = g_node_append_data(parent, xmlnode);
133
134
                xml_build_tree(file, node, file->level);
135
                if (file->level == 0) break;
136
        }
137
138
        return node;
139
}
140
141
GNode *xml_parse_file(const gchar *path)
142
{
143
        XMLFile *file;
144
        GNode *node;
145
146
        file = xml_open_file(path);
147
        g_return_val_if_fail(file != NULL, NULL);
148
149
        xml_get_dtd(file);
150
151
        node = xml_build_tree(file, NULL, file->level);
152
153
        xml_close_file(file);
154
155
#if defined(SPARSE_MEMORY)
156
        if (debug_mode)
157
                string_table_get_stats(xml_string_table);
158
#endif
159
160
        return node;
161
}
162
163
gint xml_get_dtd(XMLFile *file)
164
{
165
        gchar buf[XMLBUFSIZE];
166
        gchar *bufp = buf;
167
168
        if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) return -1;
169
170
        if ((*bufp++ == '?') &&
171
            (bufp = strcasestr(bufp, "xml")) &&
172
            (bufp = strcasestr(bufp + 3, "version")) &&
173
            (bufp = strchr(bufp + 7, '?'))) {
174
                file->dtd = g_strdup(buf);
175
                if ((bufp = strcasestr(buf, "encoding=\""))) {
176
                        bufp += 9;
177
                        extract_quote(bufp, '"');
178
                        file->encoding = g_strdup(bufp);
179
                } else
180
                        file->encoding = g_strdup(CS_INTERNAL);
181
        } else {
182
                g_warning("Can't get xml dtd\n");
183
                return -1;
184
        }
185
186
        return 0;
187
}
188
189
gint xml_parse_next_tag(XMLFile *file)
190
{
191
        gchar buf[XMLBUFSIZE];
192
        guchar *bufp = buf;
193
        gchar *tag_str;
194
        XMLTag *tag;
195
        gint len;
196
197
        if (file->is_empty_element == TRUE) {
198
                file->is_empty_element = FALSE;
199
                xml_pop_tag(file);
200
                return 0;
201
        }
202
203
        if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) {
204
                g_warning("xml_parse_next_tag(): Can't parse next tag\n");
205
                return -1;
206
        }
207
208
        /* end-tag */
209
        if (buf[0] == '/') {
210
                if (strcmp(xml_get_current_tag(file)->tag, buf + 1) != 0) {
211
                        g_warning("xml_parse_next_tag(): Tag name mismatch: %s\n", buf);
212
                        return -1;
213
                }
214
                xml_pop_tag(file);
215
                return 0;
216
        }
217
218
        tag = xml_tag_new(NULL);
219
        xml_push_tag(file, tag);
220
221
        len = strlen(buf);
222
        if (len > 0 && buf[len - 1] == '/') {
223
                file->is_empty_element = TRUE;
224
                buf[len - 1] = '\0';
225
                g_strchomp(buf);
226
        }
227
        if (strlen(buf) == 0) {
228
                g_warning("xml_parse_next_tag(): Tag name is empty\n");
229
                return -1;
230
        }
231
232
        while (*bufp != '\0' && !isspace(*bufp)) bufp++;
233
        if (*bufp == '\0') {
234
                tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL);
235
                if (tag_str) {
236
                        tag->tag = XML_STRING_ADD(tag_str);
237
                        g_free(tag_str);
238
                } else
239
                        tag->tag = XML_STRING_ADD(buf);
240
                return 0;
241
        } else {
242
                *bufp++ = '\0';
243
                tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL);
244
                if (tag_str) {
245
                        tag->tag = XML_STRING_ADD(tag_str);
246
                        g_free(tag_str);
247
                } else
248
                        tag->tag = XML_STRING_ADD(buf);
249
        }
250
251
        /* parse attributes ( name=value ) */
252
        while (*bufp) {
253
                XMLAttr *attr;
254
                gchar *attr_name;
255
                gchar *attr_value;
256
                gchar *utf8_attr_name;
257
                gchar *utf8_attr_value;
258
                gchar *p;
259
                gchar quote;
260
261
                while (isspace(*bufp)) bufp++;
262
                attr_name = bufp;
263
                if ((p = strchr(attr_name, '=')) == NULL) {
264
                        g_warning("xml_parse_next_tag(): Syntax error in tag\n");
265
                        return -1;
266
                }
267
                bufp = p;
268
                *bufp++ = '\0';
269
                while (isspace(*bufp)) bufp++;
270
271
                if (*bufp != '"' && *bufp != '\'') {
272
                        g_warning("xml_parse_next_tag(): Syntax error in tag\n");
273
                        return -1;
274
                }
275
                quote = *bufp;
276
                bufp++;
277
                attr_value = bufp;
278
                if ((p = strchr(attr_value, quote)) == NULL) {
279
                        g_warning("xml_parse_next_tag(): Syntax error in tag\n");
280
                        return -1;
281
                }
282
                bufp = p;
283
                *bufp++ = '\0';
284
285
                g_strchomp(attr_name);
286
                xml_unescape_str(attr_value);
287
                utf8_attr_name = conv_codeset_strdup
288
                        (attr_name, file->encoding, CS_INTERNAL);
289
                utf8_attr_value = conv_codeset_strdup
290
                        (attr_value, file->encoding, CS_INTERNAL);
291
                if (!utf8_attr_name)
292
                        utf8_attr_name = g_strdup(attr_name);
293
                if (!utf8_attr_value)
294
                        utf8_attr_value = g_strdup(attr_value);
295
296
                attr = xml_attr_new(utf8_attr_name, utf8_attr_value);
297
                xml_tag_add_attr(tag, attr);
298
299
                g_free(utf8_attr_value);
300
                g_free(utf8_attr_name);
301
        }
302
303
        return 0;
304
}
305
306
void xml_push_tag(XMLFile *file, XMLTag *tag)
307
{
308
        g_return_if_fail(tag != NULL);
309
310
        file->tag_stack = g_list_prepend(file->tag_stack, tag);
311
        file->level++;
312
}
313
314
void xml_pop_tag(XMLFile *file)
315
{
316
        XMLTag *tag;
317
318
        if (!file->tag_stack) return;
319
320
        tag = (XMLTag *)file->tag_stack->data;
321
322
        xml_free_tag(tag);
323
        file->tag_stack = g_list_remove(file->tag_stack, tag);
324
        file->level--;
325
}
326
327
XMLTag *xml_get_current_tag(XMLFile *file)
328
{
329
        if (file->tag_stack)
330
                return (XMLTag *)file->tag_stack->data;
331
        else
332
                return NULL;
333
}
334
335
GList *xml_get_current_tag_attr(XMLFile *file)
336
{
337
        XMLTag *tag;
338
339
        tag = xml_get_current_tag(file);
340
        if (!tag) return NULL;
341
342
        return tag->attr;
343
}
344
345
gchar *xml_get_element(XMLFile *file)
346
{
347
        gchar *str;
348
        gchar *new_str;
349
        gchar *end;
350
351
        while ((end = strchr(file->bufp, '<')) == NULL)
352
                if (xml_read_line(file) < 0) return NULL;
353
354
        if (end == file->bufp)
355
                return NULL;
356
357
        str = g_strndup(file->bufp, end - file->bufp);
358
        /* this is not XML1.0 strict */
359
        g_strstrip(str);
360
        xml_unescape_str(str);
361
362
        file->bufp = end;
363
        xml_truncate_buf(file);
364
365
        if (str[0] == '\0') {
366
                g_free(str);
367
                return NULL;
368
        }
369
370
        new_str = conv_codeset_strdup(str, file->encoding, CS_INTERNAL);
371
        if (!new_str)
372
                new_str = g_strdup(str);
373
        g_free(str);
374
375
        return new_str;
376
}
377
378
gint xml_read_line(XMLFile *file)
379
{
380
        gchar buf[XMLBUFSIZE];
381
        gint index;
382
383
        if (fgets(buf, sizeof(buf), file->fp) == NULL)
384
                return -1;
385
386
        index = file->bufp - file->buf->str;
387
388
        g_string_append(file->buf, buf);
389
390
        file->bufp = file->buf->str + index;
391
392
        return 0;
393
}
394
395
void xml_truncate_buf(XMLFile *file)
396
{
397
        gint len;
398
399
        len = file->bufp - file->buf->str;
400
        if (len > 0) {
401
                g_string_erase(file->buf, 0, len);
402
                file->bufp = file->buf->str;
403
        }
404
}
405
406
gboolean xml_compare_tag(XMLFile *file, const gchar *name)
407
{
408
        XMLTag *tag;
409
410
        tag = xml_get_current_tag(file);
411
412
        if (tag && strcmp(tag->tag, name) == 0)
413
                return TRUE;
414
        else
415
                return FALSE;
416
}
417
418
XMLNode *xml_node_new(XMLTag *tag, const gchar *text)
419
{
420
        XMLNode *node;
421
422
        node = g_new(XMLNode, 1);
423
        node->tag = tag;
424
        node->element = g_strdup(text);
425
426
        return node;
427
}
428
429
XMLTag *xml_tag_new(const gchar *tag)
430
{
431
        XMLTag *new_tag;
432
433
        new_tag = g_new(XMLTag, 1);
434
        if (tag)
435
                new_tag->tag = XML_STRING_ADD(tag);
436
        else
437
                new_tag->tag = NULL;
438
        new_tag->attr = NULL;
439
440
        return new_tag;
441
}
442
443
XMLAttr *xml_attr_new(const gchar *name, const gchar *value)
444
{
445
        XMLAttr *new_attr;
446
447
        new_attr = g_new(XMLAttr, 1);
448
        new_attr->name = XML_STRING_ADD(name);
449
        new_attr->value = g_strdup(value);
450
451
        return new_attr;
452
}
453
454
void xml_tag_add_attr(XMLTag *tag, XMLAttr *attr)
455
{
456
        tag->attr = g_list_append(tag->attr, attr);
457
}
458
459
XMLTag *xml_copy_tag(XMLTag *tag)
460
{
461
        XMLTag *new_tag;
462
        XMLAttr *attr;
463
        GList *list;
464
465
        new_tag = xml_tag_new(tag->tag);
466
        for (list = tag->attr; list != NULL; list = list->next) {
467
                attr = xml_copy_attr((XMLAttr *)list->data);
468
                xml_tag_add_attr(new_tag, attr);
469
        }
470
471
        return new_tag;
472
}
473
474
XMLAttr *xml_copy_attr(XMLAttr *attr)
475
{
476
        return xml_attr_new(attr->name, attr->value);
477
}
478
479
gint xml_unescape_str(gchar *str)
480
{
481
        gchar *start;
482
        gchar *end;
483
        gchar *p = str;
484
        gchar *esc_str;
485
        gchar ch;
486
        gint len;
487
488
        while ((start = strchr(p, '&')) != NULL) {
489
                if ((end = strchr(start + 1, ';')) == NULL) {
490
                        g_warning("Unescaped `&' appeared\n");
491
                        p = start + 1;
492
                        continue;
493
                }
494
                len = end - start + 1;
495
                if (len < 3) {
496
                        p = end + 1;
497
                        continue;
498
                }
499
500
                Xstrndup_a(esc_str, start, len, return -1);
501
                if (!strcmp(esc_str, "&lt;"))
502
                        ch = '<';
503
                else if (!strcmp(esc_str, "&gt;"))
504
                        ch = '>';
505
                else if (!strcmp(esc_str, "&amp;"))
506
                        ch = '&';
507
                else if (!strcmp(esc_str, "&apos;"))
508
                        ch = '\'';
509
                else if (!strcmp(esc_str, "&quot;"))
510
                        ch = '\"';
511
                else {
512
                        p = end + 1;
513
                        continue;
514
                }
515
516
                *start = ch;
517
                memmove(start + 1, end + 1, strlen(end + 1) + 1);
518
                p = start + 1;
519
        }
520
521
        return 0;
522
}
523
524
gint xml_file_put_escape_str(FILE *fp, const gchar *str)
525
{
526
        const gchar *p;
527
528
        g_return_val_if_fail(fp != NULL, -1);
529
530
        if (!str) return 0;
531
532
        for (p = str; *p != '\0'; p++) {
533
                switch (*p) {
534
                case '<':
535
                        fputs("&lt;", fp);
536
                        break;
537
                case '>':
538
                        fputs("&gt;", fp);
539
                        break;
540
                case '&':
541
                        fputs("&amp;", fp);
542
                        break;
543
                case '\'':
544
                        fputs("&apos;", fp);
545
                        break;
546
                case '\"':
547
                        fputs("&quot;", fp);
548
                        break;
549
                default:
550
                        fputc(*p, fp);
551
                }
552
        }
553
554
        return 0;
555
}
556
557
gint xml_file_put_xml_decl(FILE *fp)
558
{
559
        g_return_val_if_fail(fp != NULL, -1);
560
561
        fprintf(fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", CS_INTERNAL);
562
        return 0;
563
}
564
565
gint xml_file_put_node(FILE *fp, XMLNode *node)
566
{
567
        GList *cur;
568
569
        g_return_val_if_fail(fp != NULL, -1);
570
        g_return_val_if_fail(node != NULL, -1);
571
572
        fprintf(fp, "<%s", node->tag->tag);
573
574
        for (cur = node->tag->attr; cur != NULL; cur = cur->next) {
575
                XMLAttr *attr = (XMLAttr *)cur->data;
576
                fprintf(fp, " %s=\"", attr->name);
577
                xml_file_put_escape_str(fp, attr->value);
578
                fputs("\"", fp);
579
        }
580
581
        if (node->element) {
582
                fputs(">", fp);
583
                xml_file_put_escape_str(fp, node->element);
584
                fprintf(fp, "</%s>\n", node->tag->tag);
585
        } else {
586
                fputs(" />\n", fp);
587
        }
588
589
        return 0;
590
}
591
592
void xml_free_node(XMLNode *node)
593
{
594
        if (!node) return;
595
596
        xml_free_tag(node->tag);
597
        g_free(node->element);
598
        g_free(node);
599
}
600
601
static gboolean xml_free_func(GNode *node, gpointer data)
602
{
603
        XMLNode *xmlnode = node->data;
604
605
        xml_free_node(xmlnode);
606
        return FALSE;
607
}
608
609
void xml_free_tree(GNode *node)
610
{
611
        g_return_if_fail(node != NULL);
612
613
        g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, xml_free_func,
614
                        NULL);
615
616
        g_node_destroy(node);
617
}
618
619
static void xml_free_tag(XMLTag *tag)
620
{
621
        if (!tag) return;
622
623
        XML_STRING_FREE(tag->tag);
624
        while (tag->attr != NULL) {
625
                XMLAttr *attr = (XMLAttr *)tag->attr->data;
626
                XML_STRING_FREE(attr->name);
627
                g_free(attr->value);
628
                g_free(attr);
629
                tag->attr = g_list_remove(tag->attr, tag->attr->data);
630
        }
631
        g_free(tag);
632
}
633
634
static gint xml_get_parenthesis(XMLFile *file, gchar *buf, gint len)
635
{
636
        gchar *start;
637
        gchar *end;
638
639
        buf[0] = '\0';
640
641
        while ((start = strchr(file->bufp, '<')) == NULL)
642
                if (xml_read_line(file) < 0) return -1;
643
644
        start++;
645
        file->bufp = start;
646
647
        while ((end = strchr(file->bufp, '>')) == NULL)
648
                if (xml_read_line(file) < 0) return -1;
649
650
        strncpy2(buf, file->bufp, MIN(end - file->bufp + 1, len));
651
        g_strstrip(buf);
652
        file->bufp = end + 1;
653
        xml_truncate_buf(file);
654
655
        return 0;
656
}