Statistics
| Revision:

root / libsylph / xml.c @ 1005

History | View | Annotate | Download (12.7 kB)

1
/*
2
 * LibSylph -- E-Mail client library
3
 * Copyright (C) 1999-2005 Hiroyuki Yamamoto
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2.1 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
 */
19
20
#include <glib.h>
21
#include <stdio.h>
22
#include <string.h>
23
#include <ctype.h>
24
25
#include "xml.h"
26
#include "utils.h"
27
#include "codeconv.h"
28
29
#define SPARSE_MEMORY
30
/* if this is defined all attr.names and tag.names are stored
31
 * in a hash table */
32
#if defined(SPARSE_MEMORY)
33
#include "stringtable.h" 
34
35
static StringTable *xml_string_table;
36
37
static void xml_string_table_create(void)
38
{
39
        if (xml_string_table == NULL)
40
                xml_string_table = string_table_new();
41
}
42
#define XML_STRING_ADD(str) \
43
        string_table_insert_string(xml_string_table, (str))
44
#define XML_STRING_FREE(str) \
45
        string_table_free_string(xml_string_table, (str))
46
47
#define XML_STRING_TABLE_CREATE() \
48
        xml_string_table_create()
49
50
#else /* !SPARSE_MEMORY */
51
52
#define XML_STRING_ADD(str) \
53
        g_strdup(str)
54
#define XML_STRING_FREE(str) \
55
        g_free(str)
56
57
#define XML_STRING_TABLE_CREATE()
58
59
#endif /* SPARSE_MEMORY */
60
61
static void xml_free_tag        (XMLTag                *tag);
62
static gint xml_get_parenthesis        (XMLFile        *file,
63
                                 gchar                *buf,
64
                                 gint                 len);
65
66
XMLFile *xml_open_file(const gchar *path)
67
{
68
        XMLFile *newfile;
69
70
        g_return_val_if_fail(path != NULL, NULL);
71
72
        XML_STRING_TABLE_CREATE();
73
74
        newfile = g_new(XMLFile, 1);
75
76
        newfile->fp = g_fopen(path, "rb");
77
        if (!newfile->fp) {
78
                g_free(newfile);
79
                return NULL;
80
        }
81
82
        newfile->buf = g_string_new(NULL);
83
        newfile->bufp = newfile->buf->str;
84
85
        newfile->dtd = NULL;
86
        newfile->encoding = NULL;
87
        newfile->tag_stack = NULL;
88
        newfile->level = 0;
89
        newfile->is_empty_element = FALSE;
90
91
        return newfile;
92
}
93
94
void xml_close_file(XMLFile *file)
95
{
96
        g_return_if_fail(file != NULL);
97
98
        if (file->fp) fclose(file->fp);
99
100
        g_string_free(file->buf, TRUE);
101
102
        g_free(file->dtd);
103
        g_free(file->encoding);
104
105
        while (file->tag_stack != NULL)
106
                xml_pop_tag(file);
107
108
        g_free(file);
109
}
110
111
static GNode *xml_build_tree(XMLFile *file, GNode *parent, guint level)
112
{
113
        GNode *node = NULL;
114
        XMLNode *xmlnode;
115
        XMLTag *tag;
116
117
        while (xml_parse_next_tag(file) == 0) {
118
                if (file->level < level) break;
119
                if (file->level == level) {
120
                        g_warning("xml_build_tree(): Parse error\n");
121
                        break;
122
                }
123
124
                tag = xml_get_current_tag(file);
125
                if (!tag) break;
126
                xmlnode = xml_node_new(xml_copy_tag(tag), NULL);
127
                xmlnode->element = xml_get_element(file);
128
                if (!parent)
129
                        node = g_node_new(xmlnode);
130
                else
131
                        node = g_node_append_data(parent, xmlnode);
132
133
                xml_build_tree(file, node, file->level);
134
                if (file->level == 0) break;
135
        }
136
137
        return node;
138
}
139
140
GNode *xml_parse_file(const gchar *path)
141
{
142
        XMLFile *file;
143
        GNode *node;
144
145
        file = xml_open_file(path);
146
        g_return_val_if_fail(file != NULL, NULL);
147
148
        xml_get_dtd(file);
149
150
        node = xml_build_tree(file, NULL, file->level);
151
152
        xml_close_file(file);
153
154
#if defined(SPARSE_MEMORY)
155
        if (get_debug_mode())
156
                string_table_get_stats(xml_string_table);
157
#endif
158
159
        return node;
160
}
161
162
gint xml_get_dtd(XMLFile *file)
163
{
164
        gchar buf[XMLBUFSIZE];
165
        gchar *bufp = buf;
166
167
        if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) return -1;
168
169
        if ((*bufp++ == '?') &&
170
            (bufp = strcasestr(bufp, "xml")) &&
171
            (bufp = strcasestr(bufp + 3, "version")) &&
172
            (bufp = strchr(bufp + 7, '?'))) {
173
                file->dtd = g_strdup(buf);
174
                if ((bufp = strcasestr(buf, "encoding=\""))) {
175
                        bufp += 9;
176
                        extract_quote(bufp, '"');
177
                        file->encoding = g_strdup(bufp);
178
                } else
179
                        file->encoding = g_strdup(CS_INTERNAL);
180
        } else {
181
                g_warning("Can't get xml dtd\n");
182
                return -1;
183
        }
184
185
        return 0;
186
}
187
188
gint xml_parse_next_tag(XMLFile *file)
189
{
190
        gchar buf[XMLBUFSIZE];
191
        gchar *bufp = buf;
192
        gchar *tag_str;
193
        XMLTag *tag;
194
        gint len;
195
196
        if (file->is_empty_element == TRUE) {
197
                file->is_empty_element = FALSE;
198
                xml_pop_tag(file);
199
                return 0;
200
        }
201
202
        if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) {
203
                g_warning("xml_parse_next_tag(): Can't parse next tag\n");
204
                return -1;
205
        }
206
207
        /* end-tag */
208
        if (buf[0] == '/') {
209
                if (strcmp(xml_get_current_tag(file)->tag, buf + 1) != 0) {
210
                        g_warning("xml_parse_next_tag(): Tag name mismatch: %s\n", buf);
211
                        return -1;
212
                }
213
                xml_pop_tag(file);
214
                return 0;
215
        }
216
217
        tag = xml_tag_new(NULL);
218
        xml_push_tag(file, tag);
219
220
        len = strlen(buf);
221
        if (len > 0 && buf[len - 1] == '/') {
222
                file->is_empty_element = TRUE;
223
                buf[len - 1] = '\0';
224
                g_strchomp(buf);
225
        }
226
        if (strlen(buf) == 0) {
227
                g_warning("xml_parse_next_tag(): Tag name is empty\n");
228
                return -1;
229
        }
230
231
        while (*bufp != '\0' && !g_ascii_isspace(*bufp)) bufp++;
232
        if (*bufp == '\0') {
233
                tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL);
234
                if (tag_str) {
235
                        tag->tag = XML_STRING_ADD(tag_str);
236
                        g_free(tag_str);
237
                } else
238
                        tag->tag = XML_STRING_ADD(buf);
239
                return 0;
240
        } else {
241
                *bufp++ = '\0';
242
                tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL);
243
                if (tag_str) {
244
                        tag->tag = XML_STRING_ADD(tag_str);
245
                        g_free(tag_str);
246
                } else
247
                        tag->tag = XML_STRING_ADD(buf);
248
        }
249
250
        /* parse attributes ( name=value ) */
251
        while (*bufp) {
252
                XMLAttr *attr;
253
                gchar *attr_name;
254
                gchar *attr_value;
255
                gchar *utf8_attr_name;
256
                gchar *utf8_attr_value;
257
                gchar *p;
258
                gchar quote;
259
260
                while (g_ascii_isspace(*bufp)) bufp++;
261
                attr_name = bufp;
262
                if ((p = strchr(attr_name, '=')) == NULL) {
263
                        g_warning("xml_parse_next_tag(): Syntax error in tag\n");
264
                        return -1;
265
                }
266
                bufp = p;
267
                *bufp++ = '\0';
268
                while (g_ascii_isspace(*bufp)) bufp++;
269
270
                if (*bufp != '"' && *bufp != '\'') {
271
                        g_warning("xml_parse_next_tag(): Syntax error in tag\n");
272
                        return -1;
273
                }
274
                quote = *bufp;
275
                bufp++;
276
                attr_value = bufp;
277
                if ((p = strchr(attr_value, quote)) == NULL) {
278
                        g_warning("xml_parse_next_tag(): Syntax error in tag\n");
279
                        return -1;
280
                }
281
                bufp = p;
282
                *bufp++ = '\0';
283
284
                g_strchomp(attr_name);
285
                xml_unescape_str(attr_value);
286
                utf8_attr_name = conv_codeset_strdup
287
                        (attr_name, file->encoding, CS_INTERNAL);
288
                utf8_attr_value = conv_codeset_strdup
289
                        (attr_value, file->encoding, CS_INTERNAL);
290
                if (!utf8_attr_name)
291
                        utf8_attr_name = g_strdup(attr_name);
292
                if (!utf8_attr_value)
293
                        utf8_attr_value = g_strdup(attr_value);
294
295
                attr = xml_attr_new(utf8_attr_name, utf8_attr_value);
296
                xml_tag_add_attr(tag, attr);
297
298
                g_free(utf8_attr_value);
299
                g_free(utf8_attr_name);
300
        }
301
302
        return 0;
303
}
304
305
void xml_push_tag(XMLFile *file, XMLTag *tag)
306
{
307
        g_return_if_fail(tag != NULL);
308
309
        file->tag_stack = g_list_prepend(file->tag_stack, tag);
310
        file->level++;
311
}
312
313
void xml_pop_tag(XMLFile *file)
314
{
315
        XMLTag *tag;
316
317
        if (!file->tag_stack) return;
318
319
        tag = (XMLTag *)file->tag_stack->data;
320
321
        xml_free_tag(tag);
322
        file->tag_stack = g_list_remove(file->tag_stack, tag);
323
        file->level--;
324
}
325
326
XMLTag *xml_get_current_tag(XMLFile *file)
327
{
328
        if (file->tag_stack)
329
                return (XMLTag *)file->tag_stack->data;
330
        else
331
                return NULL;
332
}
333
334
GList *xml_get_current_tag_attr(XMLFile *file)
335
{
336
        XMLTag *tag;
337
338
        tag = xml_get_current_tag(file);
339
        if (!tag) return NULL;
340
341
        return tag->attr;
342
}
343
344
gchar *xml_get_element(XMLFile *file)
345
{
346
        gchar *str;
347
        gchar *new_str;
348
        gchar *end;
349
350
        while ((end = strchr(file->bufp, '<')) == NULL)
351
                if (xml_read_line(file) < 0) return NULL;
352
353
        if (end == file->bufp)
354
                return NULL;
355
356
        str = g_strndup(file->bufp, end - file->bufp);
357
        /* this is not XML1.0 strict */
358
        g_strstrip(str);
359
        xml_unescape_str(str);
360
361
        file->bufp = end;
362
        xml_truncate_buf(file);
363
364
        if (str[0] == '\0') {
365
                g_free(str);
366
                return NULL;
367
        }
368
369
        new_str = conv_codeset_strdup(str, file->encoding, CS_INTERNAL);
370
        if (!new_str)
371
                new_str = g_strdup(str);
372
        g_free(str);
373
374
        return new_str;
375
}
376
377
gint xml_read_line(XMLFile *file)
378
{
379
        gchar buf[XMLBUFSIZE];
380
        gint index;
381
382
        if (fgets(buf, sizeof(buf), file->fp) == NULL)
383
                return -1;
384
385
        index = file->bufp - file->buf->str;
386
387
        g_string_append(file->buf, buf);
388
389
        file->bufp = file->buf->str + index;
390
391
        return 0;
392
}
393
394
void xml_truncate_buf(XMLFile *file)
395
{
396
        gint len;
397
398
        len = file->bufp - file->buf->str;
399
        if (len > 0) {
400
                g_string_erase(file->buf, 0, len);
401
                file->bufp = file->buf->str;
402
        }
403
}
404
405
gboolean xml_compare_tag(XMLFile *file, const gchar *name)
406
{
407
        XMLTag *tag;
408
409
        tag = xml_get_current_tag(file);
410
411
        if (tag && strcmp(tag->tag, name) == 0)
412
                return TRUE;
413
        else
414
                return FALSE;
415
}
416
417
XMLNode *xml_node_new(XMLTag *tag, const gchar *text)
418
{
419
        XMLNode *node;
420
421
        node = g_new(XMLNode, 1);
422
        node->tag = tag;
423
        node->element = g_strdup(text);
424
425
        return node;
426
}
427
428
XMLTag *xml_tag_new(const gchar *tag)
429
{
430
        XMLTag *new_tag;
431
432
        new_tag = g_new(XMLTag, 1);
433
        if (tag)
434
                new_tag->tag = XML_STRING_ADD(tag);
435
        else
436
                new_tag->tag = NULL;
437
        new_tag->attr = NULL;
438
439
        return new_tag;
440
}
441
442
XMLAttr *xml_attr_new(const gchar *name, const gchar *value)
443
{
444
        XMLAttr *new_attr;
445
446
        new_attr = g_new(XMLAttr, 1);
447
        new_attr->name = XML_STRING_ADD(name);
448
        new_attr->value = g_strdup(value);
449
450
        return new_attr;
451
}
452
453
void xml_tag_add_attr(XMLTag *tag, XMLAttr *attr)
454
{
455
        tag->attr = g_list_append(tag->attr, attr);
456
}
457
458
XMLTag *xml_copy_tag(XMLTag *tag)
459
{
460
        XMLTag *new_tag;
461
        XMLAttr *attr;
462
        GList *list;
463
464
        new_tag = xml_tag_new(tag->tag);
465
        for (list = tag->attr; list != NULL; list = list->next) {
466
                attr = xml_copy_attr((XMLAttr *)list->data);
467
                xml_tag_add_attr(new_tag, attr);
468
        }
469
470
        return new_tag;
471
}
472
473
XMLAttr *xml_copy_attr(XMLAttr *attr)
474
{
475
        return xml_attr_new(attr->name, attr->value);
476
}
477
478
gint xml_unescape_str(gchar *str)
479
{
480
        gchar *start;
481
        gchar *end;
482
        gchar *p = str;
483
        gchar *esc_str;
484
        gchar ch;
485
        gint len;
486
487
        while ((start = strchr(p, '&')) != NULL) {
488
                if ((end = strchr(start + 1, ';')) == NULL) {
489
                        g_warning("Unescaped `&' appeared\n");
490
                        p = start + 1;
491
                        continue;
492
                }
493
                len = end - start + 1;
494
                if (len < 3) {
495
                        p = end + 1;
496
                        continue;
497
                }
498
499
                Xstrndup_a(esc_str, start, len, return -1);
500
                if (!strcmp(esc_str, "&lt;"))
501
                        ch = '<';
502
                else if (!strcmp(esc_str, "&gt;"))
503
                        ch = '>';
504
                else if (!strcmp(esc_str, "&amp;"))
505
                        ch = '&';
506
                else if (!strcmp(esc_str, "&apos;"))
507
                        ch = '\'';
508
                else if (!strcmp(esc_str, "&quot;"))
509
                        ch = '\"';
510
                else {
511
                        p = end + 1;
512
                        continue;
513
                }
514
515
                *start = ch;
516
                memmove(start + 1, end + 1, strlen(end + 1) + 1);
517
                p = start + 1;
518
        }
519
520
        return 0;
521
}
522
523
gint xml_file_put_escape_str(FILE *fp, const gchar *str)
524
{
525
        const gchar *p;
526
527
        g_return_val_if_fail(fp != NULL, -1);
528
529
        if (!str) return 0;
530
531
        for (p = str; *p != '\0'; p++) {
532
                switch (*p) {
533
                case '<':
534
                        fputs("&lt;", fp);
535
                        break;
536
                case '>':
537
                        fputs("&gt;", fp);
538
                        break;
539
                case '&':
540
                        fputs("&amp;", fp);
541
                        break;
542
                case '\'':
543
                        fputs("&apos;", fp);
544
                        break;
545
                case '\"':
546
                        fputs("&quot;", fp);
547
                        break;
548
                default:
549
                        fputc(*p, fp);
550
                }
551
        }
552
553
        return 0;
554
}
555
556
gint xml_file_put_xml_decl(FILE *fp)
557
{
558
        g_return_val_if_fail(fp != NULL, -1);
559
560
        fprintf(fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", CS_INTERNAL);
561
        return 0;
562
}
563
564
gint xml_file_put_node(FILE *fp, XMLNode *node)
565
{
566
        GList *cur;
567
568
        g_return_val_if_fail(fp != NULL, -1);
569
        g_return_val_if_fail(node != NULL, -1);
570
571
        fprintf(fp, "<%s", node->tag->tag);
572
573
        for (cur = node->tag->attr; cur != NULL; cur = cur->next) {
574
                XMLAttr *attr = (XMLAttr *)cur->data;
575
                fprintf(fp, " %s=\"", attr->name);
576
                xml_file_put_escape_str(fp, attr->value);
577
                fputs("\"", fp);
578
        }
579
580
        if (node->element) {
581
                fputs(">", fp);
582
                xml_file_put_escape_str(fp, node->element);
583
                fprintf(fp, "</%s>\n", node->tag->tag);
584
        } else {
585
                fputs(" />\n", fp);
586
        }
587
588
        return 0;
589
}
590
591
void xml_free_node(XMLNode *node)
592
{
593
        if (!node) return;
594
595
        xml_free_tag(node->tag);
596
        g_free(node->element);
597
        g_free(node);
598
}
599
600
static gboolean xml_free_func(GNode *node, gpointer data)
601
{
602
        XMLNode *xmlnode = node->data;
603
604
        xml_free_node(xmlnode);
605
        return FALSE;
606
}
607
608
void xml_free_tree(GNode *node)
609
{
610
        g_return_if_fail(node != NULL);
611
612
        g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, xml_free_func,
613
                        NULL);
614
615
        g_node_destroy(node);
616
}
617
618
static void xml_free_tag(XMLTag *tag)
619
{
620
        if (!tag) return;
621
622
        XML_STRING_FREE(tag->tag);
623
        while (tag->attr != NULL) {
624
                XMLAttr *attr = (XMLAttr *)tag->attr->data;
625
                XML_STRING_FREE(attr->name);
626
                g_free(attr->value);
627
                g_free(attr);
628
                tag->attr = g_list_remove(tag->attr, tag->attr->data);
629
        }
630
        g_free(tag);
631
}
632
633
static gint xml_get_parenthesis(XMLFile *file, gchar *buf, gint len)
634
{
635
        gchar *start;
636
        gchar *end;
637
638
        buf[0] = '\0';
639
640
        while ((start = strchr(file->bufp, '<')) == NULL)
641
                if (xml_read_line(file) < 0) return -1;
642
643
        start++;
644
        file->bufp = start;
645
646
        while ((end = strchr(file->bufp, '>')) == NULL)
647
                if (xml_read_line(file) < 0) return -1;
648
649
        strncpy2(buf, file->bufp, MIN(end - file->bufp + 1, len));
650
        g_strstrip(buf);
651
        file->bufp = end + 1;
652
        xml_truncate_buf(file);
653
654
        return 0;
655
}